Dominance Analysis

Analysis in progress. Please wait...

Using Raw Data

Note: Input values must be separated by tabs. Copy and paste from Excel/Numbers.

Wait patiently. It will take a minute or so until you get to see the result.
Please make sure that your data includes the header (variable names) in the first row.
The criterion (dependent) variable should be placed in the first column.


                
                

Basic Statistics


                

Correlation


Regression Analysis


                

Dominance Analysis


                

Dominance Weight Plot with 95% CI


Just for Reference: Relative Weight Analysis

Note: Relative weight analysis is criticized and not recommended due to mathematical concerns (see this paper).


                

Feature Selection: Variable Importance (Random Forest)


                

R session info

              

Using Correlation Matrix

Note: Input values must be separated by tabs. Copy and paste from Excel/Numbers.

Please make sure that your data includes the header (variable names) in the first row.
The criterion (dependent) variable should be placed in the first column.


                
                

Check the Input Correlation Matrix


                

Regression Analysis


                

Dominance Analysis

Note: The 95% confidence interval (CI) cannot be calculated from the correlation matrix.


                

Dominance Weight Plot

Note: The 95% confidence interval (CI) cannot be calculated from the correlation matrix.


Just for Reference: Relative Weight Analysis

Note: Relative weight analysis is criticized and not recommended due to mathematical concerns (see this paper).


                

R session info

              
Note

This web application is developed with Shiny.


List of Packages Used
library(shiny)
library(shinyAce)
library(psych)
library(car)
library(rpsychi)
library(boot)
library(plyr)
library(ggplot2)
library(Boruta)
library(relaimpo)
library(MASS)
library(yhat)
library(parallel)

Code

Source code for this application is based on "RWA Web" (by Dr. Scott Tonidandel)

and "The handbook of Research in Foreign Language Learning and Teaching" (Takeuchi & Mizumoto, 2012).

The code for this web application is available at GitHub.


Citation in Publications

Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. https://langtest.jp


Article

Mizumoto, A., & Plonsky, L. (2016). R as a lingua franca: Advantages of using R for quantitative research in applied linguistics. Applied Linguistics, 37(2), 284–291. https://doi.org/10.1093/applin/amv025

Mizumoto, A. (2023). Calculating the relative importance of multiple regression predictor variables using dominance analysis and random forests. Language Learning, 73(1), 161–196. https://doi.org/10.1111/lang.12518


Recommended

To learn more about R, I suggest this excellent and free e-book (pdf), A Guide to Doing Statistics in Second Language Research Using R, written by Dr. Jenifer Larson-Hall.


Developer

Atsushi MIZUMOTO, Ph.D.
Professor of Applied Linguistics
Faculty of Foreign Language Studies /
Graduate School of Foreign Language Education and Research,
Kansai University, Osaka, Japan



Code for "Dominance Analysis"
by Atsushi Mizumoto

show with app
library(shiny)
library(shinyAce)
library(psych)
library(car)
library(rpsychi)
library(boot)
library(plyr)
library(ggplot2)
library(Boruta)
library(relaimpo)
library(MASS)
library(yhat)
library(parallel)

shinyServer(function(input, output, session) {

    observe({
        if (input$do > 0) {
            
#-----------------------------------------------------------------
# Relative Weight Analysis Using Raw Data
#-----------------------------------------------------------------

            # Basic statistics
                bs <- reactive({
                    x <- read.csv(text=input$text, sep="\t")
                    describe(x)[2:13]
                })
                
                output$textarea.out <- renderPrint({
                    bs()
                })

                
            # Correlation
                makecorPlot <- function(){
                    x <- read.csv(text=input$text, sep="\t")
                    pairs.panels(x)
                }
                
                output$corPlot <- renderPlot({
                    print(makecorPlot())
                })
         
         
            # Regression
                reg <- reactive({
                    dat <- read.csv(text=input$text, sep="\t")

                    colnames(dat) <- c("Criterion", c(colnames(dat)[2:ncol(dat)]))
                    result <- lm(Criterion ~., dat)

                    reslt <- summary(result)
                    print(reslt)

                    z <- scale(dat)     # standardize the data
                    z <- data.frame(z)
                    z.res <- summary(lm(Criterion ~ ., z))

                    stdb <- data.frame(round((z.res$coefficients[,1][-1]),3))
                    colnames(stdb)[1] <- "Standardized beta"

                    cat("\n", "---", "\n", "Standardized beta estimates:", "\n")
                    print(stdb)

                if (ncol(dat) >= 3) {

                    VIF <- vif(result)
                    Tolerance <- 1/VIF

                    vif.res <- round(data.frame(VIF, Tolerance),3)

                    cat("\n", "---", "\n", "VIF and tolerance statistic (1/VIF):", "\n")
                    print(vif.res)
                    cat("\n", "VIF should be smaller than 10 (clozer to 1 better);", "\n",
                    "tolerance statistic (1/VIF) should be greater than 0.2.", "\n")
                }
                })
                
                output$reg.out <- renderPrint({
                    reg()
                })
               
               
            # Dominance Analysis
                dominance <- reactive({
                    
                    dat <- read.csv(text=input$text, sep="\t")
                    colnames(dat) <- c("Criterion", c(colnames(dat)[2:ncol(dat)]))
                    
                    # Fit the regression model
                    lm.out <- lm(Criterion ~ ., dat)
                    
                    # Calculate dominance statistics
                    regrOut <- yhat::calc.yhat(lm.out)
                    
                    # Determine number of cores for parallel processing
                    # Use half of available cores if more than 2 cores available
                    num_cores <- parallel::detectCores()
                    num_cores <- ifelse(num_cores > 2, floor(num_cores/2), 1)
                    
                    # Set seed for reproducibility
                    set.seed(8888)
                    
                    # Bootstrap function for dominance analysis
                    boot.out <- NULL
                    tryCatch({
                        # Try to use parallel processing if available
                        if (num_cores > 1) {
                            boot.out <- boot::boot(dat, yhat::boot.yhat, 1000, 
                                                lmOut = lm.out, 
                                                regrout0 = regrOut, 
                                                parallel = "multicore", 
                                                ncpus = num_cores)
                        } else {
                            boot.out <- boot::boot(dat, yhat::boot.yhat, 1000, 
                                                lmOut = lm.out, 
                                                regrout0 = regrOut)
                        }
                    }, error = function(e) {
                        # If parallel fails, try sequential
                        message("Parallel processing failed, trying sequential...")
                        boot.out <<- boot::boot(dat, yhat::boot.yhat, 1000, 
                                            lmOut = lm.out, 
                                            regrout0 = regrOut)
                    })
                    
                    # Generate bootstrap results
                    if (!is.null(boot.out)) {
                        result <- yhat::booteval.yhat(regrOut, bty = "perc", boot.out)
                        
                        # safely extract GenDom column if it exists
                        if (is.null(result$combCIpm) || !("GenDom" %in% colnames(result$combCIpm))) {
                            # Fallback if bootstrap results don't have expected structure
                            da <- relaimpo::calc.relimp(lm.out)
                            da_res <- as.data.frame(round(da@lmg, 4))
                            colnames(da_res) <- "Weight"
                            da_res$CI_lower <- NA
                            da_res$CI_upper <- NA
                            
                            return(list(
                                da_ci = da_res,
                                pv_comparisons = data.frame(
                                    Comparison = "Bootstrap failed to produce expected structure", 
                                    Diff = NA, 
                                    CI_lower = NA, 
                                    CI_upper = NA, 
                                    Significant = ""
                                )
                            ))
                        }
                        
                        # DA weights with CI - use GenDom column which has the correct weights
                        da_weights <- result$combCIpm[, "GenDom", drop=FALSE]
                        
                        # Create a data frame with separate columns for weight, lower CI, and upper CI
                        # Parse the string format "0.176(0.097,0.260)"
                        weights_df <- data.frame(
                            Variable = rownames(da_weights),
                            GenDom_string = da_weights[,1],
                            stringsAsFactors = FALSE
                        )
                        
                        # Extract values with better parsing
                        parse_value_with_ci <- function(str) {
                            # Remove any * at the end
                            str <- sub("\\*$", "", str)
                            
                            # Extract main value (before parenthesis)
                            main_val <- as.numeric(sub("^([0-9.-]+)\\(.*", "\\1", str))
                            
                            # Extract lower CI (between parenthesis and comma)
                            lower_val <- as.numeric(sub(".*\\(([0-9.-]+),.*", "\\1", str))
                            
                            # Extract upper CI (between comma and closing parenthesis)
                            upper_val <- as.numeric(sub(".*,([0-9.-]+)\\).*", "\\1", str))
                            
                            # Check if original string had * at the end
                            significant <- grepl("\\*$", weights_df$GenDom_string[1])
                            
                            return(list(
                                main = main_val,
                                lower = lower_val,
                                upper = upper_val,
                                significant = significant
                            ))
                        }
                        
                        # Apply parsing to each row
                        parsed_values <- lapply(weights_df$GenDom_string, parse_value_with_ci)
                        
                        # Extract values
                        weights_df$Weight <- sapply(parsed_values, function(x) x$main)
                        weights_df$CI_lower <- sapply(parsed_values, function(x) x$lower)
                        weights_df$CI_upper <- sapply(parsed_values, function(x) x$upper)
                        
                        # Create final data frame with weight and CI columns
                        da_ci <- weights_df[, c("Weight", "CI_lower", "CI_upper")]
                        rownames(da_ci) <- weights_df$Variable
                        
                        # Comparisons of PVs - Format as separate columns
                        if (is.null(result$combCIpmDiff) || !("GenDom" %in% colnames(result$combCIpmDiff))) {
                            # If comparison data is not available in expected format
                            return(list(
                                da_ci = round(da_ci, 4),
                                pv_comparisons = data.frame(
                                    Comparison = "Comparisons not available in expected format", 
                                    Diff = NA, 
                                    CI_lower = NA, 
                                    CI_upper = NA, 
                                    Significant = ""
                                )
                            ))
                        }
                        
                        pv_raw_comparisons <- result$combCIpmDiff[, "GenDom", drop = FALSE]
                        
                        # 結果を保存する新しいデータフレームを作成
                        formatted_comparisons <- data.frame(
                            Comparison = rownames(pv_raw_comparisons),
                            Diff = NA_real_,
                            CI_lower = NA_real_,
                            CI_upper = NA_real_,
                            Significant = "",
                            stringsAsFactors = FALSE
                        )
                        
                        # 各行を処理
                        for (i in 1:nrow(pv_raw_comparisons)) {
                            comp_str <- as.character(pv_raw_comparisons[i, 1])
                            
                            # *マークがあるかどうかをチェック
                            is_significant <- grepl("\\*$", comp_str)
                            
                            # *マークを除去して数値を抽出
                            clean_str <- sub("\\*$", "", comp_str)
                            
                            # 数値を抽出
                            main_val <- as.numeric(sub("^([0-9.-]+)\\(.*", "\\1", clean_str))
                            lower_val <- as.numeric(sub(".*\\(([0-9.-]+),.*", "\\1", clean_str))
                            upper_val <- as.numeric(sub(".*,([0-9.-]+)\\).*", "\\1", clean_str))
                            
                            formatted_comparisons$Diff[i] <- main_val
                            formatted_comparisons$CI_lower[i] <- lower_val
                            formatted_comparisons$CI_upper[i] <- upper_val
                            
                            # 統計的有意性を示すマークを追加
                            if (is_significant) {
                                formatted_comparisons$Significant[i] <- "*"
                            }
                        }
                        
                        # 数値を丸める - NAをスキップ
                        formatted_comparisons$Diff <- round(formatted_comparisons$Diff, 4)
                        formatted_comparisons$CI_lower <- round(formatted_comparisons$CI_lower, 4)
                        formatted_comparisons$CI_upper <- round(formatted_comparisons$CI_upper, 4)
                        
                        # Return as a list
                        return(list(
                            da_ci = round(da_ci, 4),
                            pv_comparisons = formatted_comparisons
                        ))
                    } else {
                        # Fallback to simple dominance analysis if bootstrap fails
                        da <- relaimpo::calc.relimp(lm.out)
                        da_res <- as.data.frame(round(da@lmg, 4))
                        colnames(da_res) <- "Weight"
                        
                        # Add placeholder CI columns
                        da_res$CI_lower <- NA
                        da_res$CI_upper <- NA
                        
                        return(list(
                            da_ci = da_res,
                            pv_comparisons = data.frame(
                                Comparison = "Bootstrap failed", 
                                Diff = NA_real_, 
                                CI_lower = NA_real_, 
                                CI_upper = NA_real_, 
                                Significant = ""
                            )
                        ))
                    }
                })
                
                output$dominance.out <- renderPrint({
                    dominance()
                    dom_results <- dominance()

                    # Print weights with CI
                    cat("Dominance Analysis Results with 95% CI:\n\n")
                    print(dom_results$da_ci)
                    
                    cat("\n\n")
                    cat("Predictor Variable Comparisons:\n")
                    print(dom_results$pv_comparisons)
                    cat("\n")
                    cat("Positive values indicate greater predictive importance of the first variable;\n")
                    cat("negative values indicate greater importance of the second.\n")
                    cat("* indicates a statistically significant difference\n")
                    cat("(when the confidence interval does not include zero).\n\n")

                })
                
                
            # RWA
                rwacalc <- reactive({
                    dat <- read.csv(text=input$text, sep="\t")
                    thedata<-dat
                    Labels<-names(thedata)[2:length(thedata)]
                    multRegress<-function(mydata){
                      numVar<<-NCOL(mydata)
                      Variables<<- names(mydata)[2:numVar]
                      mydata<-cor(mydata, use="complete.obs")
                      RXX<-mydata[2:numVar,2:numVar]
                      RXY<-mydata[2:numVar,1]
                      RXX.eigen<-eigen(RXX)
                      D<-diag(RXX.eigen$val)
                      delta<-sqrt(D)
                      lambda<-RXX.eigen$vec%*%delta%*%t(RXX.eigen$vec)
                      lambdasq<-lambda^2
                      beta<-solve(lambda)%*%RXY
                      rsquare<<-sum(beta^2)
                      RawWgt<-lambdasq%*%beta^2
                      import<-(RawWgt/rsquare)*100
                      result<<-data.frame(Variables, Raw.RelWeight=RawWgt, Rescaled.RelWeight=import)
                    }
                    multBootstrap<-function(mydata, indices){
                      mydata<-mydata[indices,]
                      multWeights<-multRegress(mydata)
                      return(multWeights$Raw.RelWeight)
                    }
                    multBootrand<-function(mydata, indices){
                      mydata<-mydata[indices,]
                      multRWeights<-multRegress(mydata)
                      multReps<-multRWeights$Raw.RelWeight
                      randWeight<-multReps[length(multReps)]
                      randStat<-multReps[-(length(multReps))]-randWeight
                      return(randStat)
                    }
                    mybootci<-function(x){
                      boot.ci(multBoot,conf=0.95, type="bca", index=x)
                    }
                    runBoot<-function(num){
                      INDEX<-1:num
                      test<-lapply(INDEX, FUN=mybootci)
                      test2<-t(sapply(test,'[[',i=4)) #extracts confidence interval
                      CIresult<<-data.frame(Variables, CI.Lower.Bound=test2[,4],CI.Upper.Bound=test2[,5])
                    }
                    myRbootci<-function(x){
                      boot.ci(multRBoot,conf=0.95,type="bca",index=x)
                    }
                    runRBoot<-function(num){
                      INDEX<-1:num
                      test<-lapply(INDEX,FUN=myRbootci)
                      test2<-t(sapply(test,'[[',i=4))
                      CIresult<<-data.frame(Labels,CI.Lower.Bound=test2[,4],CI.Upper.Bound=test2[,5])
                    }
                    myCbootci<-function(x){
                      boot.ci(multC2Boot,conf=0.95,type="bca",index=x)
                    }
                    runCBoot<-function(num){
                      INDEX<-1:num
                      test<-lapply(INDEX,FUN=myCbootci)
                      test2<-t(sapply(test,'[[',i=4))
                      CIresult<<-data.frame(Labels2,CI.Lower.Bound=test2[,4],CI.Upper.Bound=test2[,5])
                    }
                    myGbootci<-function(x){
                      boot.ci(groupBoot,conf=0.95,type="bca",index=x)
                    }
                    runGBoot<-function(num){
                      INDEX<-1:num
                      test<-lapply(INDEX,FUN=myGbootci)
                      test2<-t(sapply(test,'[[',i=4))
                      CIresult<<-data.frame(Labels,CI.Lower.Bound=test2[,4],CI.Upper.Bound=test2[,5])
                    }
                    
                    multRegress(thedata)
                    RW.Results<-result
                    
                    RSQ.Results<-rsquare
                    
                    #Bootstrapped Confidence interval around the individual relative weights
                    multBoot<-boot(thedata, multBootstrap, 10000)
                    multci<-boot.ci(multBoot,conf=0.95, type="bca")
                    runBoot(length(thedata[,2:numVar]))
                    CI.Results<-CIresult
                    
                    #Bootstrapped Confidence interval tests of Significance
                    randVar<-rnorm(length(thedata[,1]),0,1)
                    randData<-cbind(thedata,randVar)
                    multRBoot<-boot(randData,multBootrand, 10000)
                    multRci<-boot.ci(multRBoot,conf=0.95, type="bca")
                    runRBoot(length(randData[,2:(numVar-1)]))
                    CI.Significance<-CIresult
                    
                    list(RSQ.Results=RSQ.Results, RW.Results = RW.Results, CI.Results = CI.Results, CI.Significance=CI.Significance) # 他で使うために定義
                    
                })


                rwa <- reactive({
                    cat("R-squared For the Model:", "\n")
                    RSQ.Results <- rwacalc()$RSQ.Results
                    print(RSQ.Results)
                    
                    cat("\n", "The Raw and Rescaled Weights:", "\n")
                    RW.Results <- rwacalc()$RW.Results
                    print(RW.Results)
                    
                    cat("\n", "BCa Confidence Intervals around the raw weights:", "\n")
                    CI.Results <- rwacalc()$CI.Results
                    print(CI.Results)
                    
                    cat("\n", "BCa Confidence Interval Tests of significance:", "\n")
                    cat("  (If 0 is not included, weight is significant at p < .05)", "\n")
                    CI.Significance <- rwacalc()$CI.Significance
                    print(CI.Significance)
                    
                })
                
                output$rwa.out <- renderPrint({
                    rwa()
                })
            
            
            # 95% CI Plot
                confPlot <- function(){
                    dat <- read.csv(text=input$text, sep="\t")
                    
                    dom_results <- dominance()
                    da_ci <- dom_results$da_ci
                    
                    Variables <- rownames(da_ci)
                    
                    plotdat <- data.frame(
                        Variables = Variables,
                        Weight = da_ci$Weight,
                        CI_lower = da_ci$CI_lower,
                        CI_upper = da_ci$CI_upper
                    )
                    
                    dat_model <- read.csv(text=input$text, sep="\t")
                    colnames(dat_model) <- c("Criterion", c(colnames(dat_model)[2:ncol(dat_model)]))
                    result <- lm(Criterion ~., dat_model)
                    model_summary <- summary(result)
                    RSQ.Results <- model_summary$r.squared
                    
                    ggplot(plotdat, aes(reorder(Variables, Weight), Weight)) +
                    geom_bar(stat="identity", fill="lightblue", colour = "blue") +
                    coord_flip() +
                    geom_text(aes(label = round(Weight, 3)), hjust = -0.1, vjust = -0.3) +
                    geom_errorbar(aes(ymin=CI_lower, ymax=CI_upper), width=.2, position=position_dodge(.9)) +
                    labs(title = "Variable Importance (Dominance Analysis)",
                        x = "Predictor Variables",
                        y = "Dominance Weights",
                        caption = paste0("Note: R-squared = ",
                                            round(RSQ.Results, 3),
                                            ". ",
                                            "Dominance weights sum to R-squared. n = ",
                                            nrow(dat), ". "
                        )) +
                    theme_classic()
                }
                    
                output$confPlot <- renderPlot({
                    print(confPlot())
                })
                
                
            # Feature Selection: Variable Importance (Random Forest)
                
                borutaCalc <- function(){
                    dat <- read.csv(text=input$text, sep="\t")
                    dat <- na.omit(dat) # listwise deletion
                    colnames(dat) <- c("Criterion", c(colnames(dat)[2:ncol(dat)]))
                    set.seed(456)
                    boruta <- Boruta(Criterion~., maxRuns = 200, data = dat, doTrace = 2)
                    list(boruta=boruta)
                    }
                
                randomForestPlot <- function(){
                    boruta <- borutaCalc()$boruta
                    plot(boruta)
                    }
                        
                    output$randomForestPlot <- renderPlot({
                        print(randomForestPlot())
                    })

                    
                borutaInfo <- function(){
                    boruta <- borutaCalc()$boruta
                    print(list(boruta, attStats(boruta)))
                    }
                
                    output$boruta.out <- renderPrint({
                        borutaInfo()
                    })

            
            # Info
                info1 <- reactive({
                    info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")
                    info2 <- paste("It was executed on ", date(), ".", sep = "")
                    cat(sprintf(info1), "\n")
                    cat(sprintf(info2), "\n")
                })
                
                output$info1.out <- renderPrint({
                    info1()
                })

            }
        
        })
        
        


#-----------------------------------------------------------------
# Relative Weight Analysis Using Correlation Matrix
#-----------------------------------------------------------------

    # Check the Correlation Matrix
        inputData <- reactive({
            dat <- read.csv(text=input$text2, sep="", na.strings=c("","NA","."))
            print(dat)

        })

        output$inputData.out <- renderPrint({
            inputData()
        })


    # Multiple Regression analysis
        mra2.result <- reactive({
            n <- input$n
            dat <- read.csv(text=input$text2, sep="", na.strings=c("","NA","."))
            dat <- as.data.frame(dat)
            # source("http://mizumot.com/code/multreg.second2.R", encoding="UTF-8")
            multreg.second2 <-
            function (dat, n, m = NULL, sd = NULL, sig.level = 0.05,
                      digits = 3)
            {
              depname <- colnames(dat[1])
              indname <- colnames(dat[-1])
              x <- c(depname, indname)
              corr <- dat
              sd <- sd[x]
              m <- m[x]
              corr.lower <- tanh(atanh(corr) + qnorm(sig.level/2, lower.tail = TRUE)/sqrt(n -
                                                                                            3))
              corr.upper <- tanh(atanh(corr) + qnorm(sig.level/2, lower.tail = FALSE)/sqrt(n -
                                                                                             3))
              corr.conf <- corr.lower
              corr.conf[upper.tri(corr.conf)] <- corr.upper[upper.tri(corr.upper)]
              p <- ncol(corr)
              K <- solve(corr)
              a <- 1/sqrt(diag(K))
              K <- K * outer(a, a)
              partial.corr <- 2 * diag(p) - K
              dimnames(partial.corr) <- dimnames(partial.corr)
              cor.mat <- corr
              cor.mat[upper.tri(cor.mat)] <- partial.corr[upper.tri(partial.corr)]
              num <- which(depname == colnames(corr))
              rxy <- corr[, num][-num]
              Rind <- corr[-num, -num]
              bs <- solve(Rind) %*% rxy
              R.sq <- as.vector(t(as.matrix(rxy)) %*% solve(Rind) %*% as.matrix(rxy))
              k <- nrow(bs)
              bs.sem <- numeric(k)
              for (i in 1:k) {
                xname <- rownames(bs)[i]
                xdel <- setdiff(rownames(bs), xname)
                num <- which(xname == colnames(Rind))
                rxy <- Rind[, num][-num]
                Rind2 <- Rind[-num, -num]
                Ri <- as.vector(t(as.matrix(rxy)) %*% solve(Rind2) %*%
                                  as.matrix(rxy))
                bs.sem[i] <- sqrt((1 - R.sq)/(n - k - 1)) * sqrt(1/(1 -
                                                                      Ri))
              }
              standardized.estimates <- data.frame(matrix(NA, nrow = k,
                                                          ncol = 4))
              rownames(standardized.estimates) <- rownames(bs)
              colnames(standardized.estimates) <- c("estimates", "lower",
                                                    "upper", "std")
              standardized.estimates[, 1] <- as.numeric(bs)
              standardized.estimates[, 4] <- bs.sem
              standardized.estimates[, 2] <- standardized.estimates$estimates +
                qnorm(sig.level/2) * standardized.estimates$std
              standardized.estimates[, 3] <- standardized.estimates$estimates +
                qnorm(sig.level/2, lower.tail = FALSE) * standardized.estimates$std
              if (!is.null(sd)) {
                b <- sd[depname]/sd[rownames(bs)] * standardized.estimates$estimates
                b.sem <- sd[depname]/sd[rownames(bs)] * standardized.estimates$std
                b.lower <- b + qt(sig.level/2, n - k - 1) * b.sem
                b.upper <- b + qt(sig.level/2, n - k - 1, lower.tail = FALSE) *
                  b.sem
                Intercept <- m[depname] - sum(b * m[names(b)])
                raw.estimates <- data.frame(matrix(NA, nrow = k + 1,
                                                   ncol = 4))
                rownames(raw.estimates) <- c("Intercept", names(b))
                colnames(raw.estimates) <- c("estimates", "lower", "upper",
                                             "std")
                raw.estimates[, 1] <- c(Intercept, b)
                raw.estimates[, 4] <- c(NA, bs.sem)
                raw.estimates[, 2] <- c(NA, b.lower)
                raw.estimates[, 3] <- c(NA, b.upper)
              }
              u <- length(indname)
              nu <- n - u - 1
              f.sq <- R.sq/(1 - R.sq)
              f.value <- f.sq * (nu/u)
              delta.lower <- try(FNONCT(f.value, u, nu, prob = 1 - sig.level/2))
              delta.upper <- FNONCT(f.value, u, nu, prob = sig.level/2)
              if (is.character(delta.lower)) {
                delta.lower <- 0
              }
              R.sq.lower <- delta.lower/(delta.lower + u + nu + 1)
              R.sq.upper <- delta.upper/(delta.upper + u + nu + 1)
              omnibus.es <- c(Rsq = R.sq, lower = R.sq.lower, upper = R.sq.upper)
              criterion.power <- c(small = power.multi(n = n, n.ind = u,
                                                       delta = 0.02, sig.level = sig.level), medium = power.multi(n = n,
                                                                                                                  n.ind = u, delta = 0.15, sig.level = sig.level), large = power.multi(n = n,
                                                                                                                                                                                       n.ind = u, delta = 0.35, sig.level = sig.level))
              output <- list(corr.partial.corr = cor.mat, corr.confidence = corr.conf,
                             omnibus.es = omnibus.es, standardized.estimates = standardized.estimates,
                             power = criterion.power)
              if (!is.null(sd)) {
                output <- list(corr.partial.corr = cor.mat, corr.confidence = corr.conf,
                               omnibus.es = omnibus.es, raw.estimates = raw.estimates,
                               standardized.estimates = standardized.estimates,
                               power = criterion.power)
              }
              output <- sapply(output, round, digits)
              return(output)
            }

            multreg.second2(dat, n=n)

        })

        output$mra2.result.out <- renderPrint({
            mra2.result()
        })
        
        
    # Dominance Analysis
        dominance.cor <- reactive({
            dat <- read.csv(text=input$text2, sep="", na.strings=c("","NA","."))
            n <- input$n
            #dat2 <- dat
            #dat <- as.matrix(dat)
            #as.formula(paste(colnames(dat2[1]), " ~ ", paste(colnames(dat2[-1]), collapse="+")))
            #lm.cov <- lmWithCov(as.formula(paste(colnames(dat2[1]), " ~ ", paste(colnames(dat2[-1]), collapse="+"))), dat)
            #da <- dominanceAnalysis(lm.cov)
            #res <- as.data.frame(round(da$contribution.average[[1]], 4))
            #colnames(res) <- "Weight"
            #print(res)
            
            # Generating simulation dataset because dominanceanalysis package cannot be used with older version of R
            mu <- rep(c(0), times = length(colnames(dat)))
            simdat <- mvrnorm(n=n, mu=mu, Sigma=dat, empirical=TRUE)
            simdat <- as.data.frame(simdat)
            colnames(simdat) <- c("Criterion", c(colnames(simdat)[2:ncol(simdat)]))
            res <- lm(Criterion ~., simdat)
            da <- calc.relimp(res)
            da_res <- as.data.frame(round(da@lmg, 4))
            colnames(da_res) <- "Weight"
            da_res
            

        })
        
        output$dominance.cor.out <- renderPrint({
            dominance.cor()
        })
    
    
    # Relative Weight Analysis
        rwacalc.cor <- reactive({
            dat <- read.csv(text=input$text2, sep="", na.strings=c("","NA","."))
            thedata<- as.data.frame(dat)
            Labels <- names(thedata)[2:length(thedata)]
            multRegress <- function(mydata){
              numVar<<-ncol(mydata)
              Variables<<- names(mydata)[2:numVar]
              RXX<-mydata[2:numVar,2:numVar]
              RXY<-mydata[2:numVar,1]
              RXX.eigen<-eigen(RXX)
              D<-diag(RXX.eigen$val)
              delta<-sqrt(D)
              lambda<-RXX.eigen$vec%*%delta%*%t(RXX.eigen$vec)
              lambdasq<-lambda^2
              beta<-solve(lambda)%*%RXY
              rsquare<<-sum(beta^2)
              RawWgt<-lambdasq%*%beta^2
              import<-(RawWgt/rsquare)*100
              result<<-data.frame(Variables, Raw.RelWeight=RawWgt, Rescaled.RelWeight=import)
            }
            multRegress(thedata)
            RW.Results <- result
            RSQ.Results <- rsquare
            
            list(RSQ.Results2=RSQ.Results, RW.Results2 = RW.Results) # 他で使うために定義
        })
            
            
        wra.cor <- reactive({
            cat("R-squared For the Model:", "\n")
            RSQ.Results2 <- rwacalc.cor()$RSQ.Results2
            print(RSQ.Results2)
            
            cat("\n", "The Raw and Rescaled Weights:", "\n")
            RW.Results2 <- rwacalc.cor()$RW.Results2
            print(RW.Results2)
            
        })

        output$wra.cor.out <- renderPrint({
            wra.cor()
        })

        
    # Importance Plot
        imPlot <- function(){
            da_res <- dominance.cor()
            
            Variables <- rownames(da_res)
            
            plotdat <- data.frame(
                Variables = Variables,
                Weight = da_res$Weight
            )
            
            dat <- read.csv(text=input$text2, sep="", na.strings=c("","NA","."))
            RSQ.Results <- dat[1, 1]
            
            ggplot(plotdat, aes(reorder(Variables, Weight), Weight)) +
            geom_bar(stat="identity", fill="lightblue", colour = "blue") +
            coord_flip() +
            scale_y_continuous(limits = c(0, max(plotdat$Weight)*1.1)) +
            geom_text(aes(label = Weight), hjust = -0.1, vjust = -0.3) +
            labs(title = "Variable Importance (Dominance Analysis)",
                x = "Predictor Variables",
                y = "Dominance Weights",
                caption = paste0("Note: R-squared = ",
                                    round(RSQ.Results, 3),
                                    ". ",
                                    "Dominance weights sum to R-squared. n = ",
                                    input$n, ". "
                )) +
            theme_classic()
        }
                
        output$imPlot <- renderPlot({
            print(imPlot())
        })


    # Info
        info2 <- reactive({
            info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")
            info2 <- paste("It was executed on ", date(), ".", sep = "")
            cat(sprintf(info1), "\n")
            cat(sprintf(info2), "\n")
        })

        output$info2.out <- renderPrint({
            info2()
        })

})
library(shiny)
library(shinyAce)




shinyUI(bootstrapPage(


    headerPanel("Dominance Analysis"),


########## loading message #######################################

    tags$head(
        tags$style(type="text/css", "
        #loadmessage {
            position: fixed;
            top: 0px;
            left: 0px;
            width: 100%;
            padding: 10px 0px 10px 0px;
            text-align: center;
            font-weight: bold;
            font-size: 100%;
            color: #000000;
            background-color: #CCFF66;
            z-index: 105;
        }
        "),
        
        tags$script("
            $(document).on('shiny:busy', function() {
                $('#loadmessage').show();
            });
            
            $(document).on('shiny:idle', function() {
                $('#loadmessage').hide();
            });
            
            $(document).ready(function() {
                $('#loadmessage').hide();
            });
        ")
    ),

    div(id="loadmessage", "Analysis in progress. Please wait..."),

###################################################################


    mainPanel(
        tabsetPanel(


# Raw Data

        tabPanel("Raw Data",

                h2("Using Raw Data"),

                p('Note: Input values must be separated by tabs. Copy and paste from Excel/Numbers.'),

                p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>
                    Wait patiently. It will take a minute or so until you get to see the result.<br>
                    Please make sure that your data includes the header (variable names) in the first row.<br>
                    The criterion (dependent) variable should be placed in the first column.</b></div>")),

                aceEditor("text", value="Proficiency\tVocabulary\tGrammar\tPronunciation\n275\t21\t35\t53\n175\t21\t29\t0\n535\t37\t56\t60\n10\t3\t4\t55\n385\t14\t36\t62\n655\t44\t47\t62\n335\t9\t2\t77\n630\t3\t46\t81\n605\t34\t44\t67\n530\t18\t35\t82\n645\t42\t45\t53\n440\t25\t35\t59\n750\t24\t57\t79\n610\t28\t52\t73\n830\t55\t44\t66\n600\t22\t50\t44\n760\t63\t30\t73\n300\t14\t33\t77\n340\t22\t0\t59\n585\t25\t73\t52\n475\t34\t32\t74\n215\t10\t32\t73\n150\t1\t31\t71\n655\t39\t17\t56\n710\t10\t73\t68\n360\t34\t12\t62\n350\t9\t37\t77\n540\t27\t55\t82\n710\t36\t52\t51\n310\t10\t19\t87\n655\t41\t53\t74\n500\t32\t59\t55\n115\t19\t29\t63\n615\t32\t58\t88\n400\t11\t16\t57\n440\t27\t28\t46\n555\t24\t30\t71\n625\t32\t51\t80\n345\t22\t31\t45\n670\t44\t7\t63\n565\t47\t32\t62\n595\t35\t43\t63\n585\t67\t61\t75\n415\t5\t40\t58\n620\t27\t80\t66\n175\t6\t64\t23\n790\t53\t64\t66\n450\t38\t18\t61\n750\t39\t41\t70\n665\t48\t62\t80\n700\t11\t82\t91\n585\t34\t53\t81\n380\t19\t48\t76\n680\t61\t50\t86\n315\t41\t45\t68\n330\t15\t32\t67\n645\t42\t56\t66\n180\t11\t4\t34\n835\t79\t64\t45\n550\t40\t45\t70\n340\t23\t40\t62\n805\t45\t85\t87\n615\t42\t92\t56\n815\t62\t44\t71\n610\t10\t59\t53\n720\t58\t55\t50\n525\t45\t43\t55\n575\t48\t49\t74\n570\t6\t43\t62\n420\t22\t22\t73\n395\t24\t44\t73\n280\t35\t33\t86\n945\t66\t56\t68\n880\t60\t73\t72\n290\t8\t13\t48\n420\t12\t2\t66\n950\t50\t59\t67\n435\t27\t46\t79\n605\t18\t60\t78\n170\t1\t47\t12\n940\t60\t70\t63\n260\t11\t35\t73\n335\t7\t34\t78\n270\t9\t60\t45\n670\t45\t39\t70\n860\t59\t100\t72\n835\t68\t36\t73\n850\t61\t58\t68\n405\t9\t55\t53\n500\t25\t34\t65\n490\t20\t32\t49\n515\t28\t72\t64\n950\t89\t47\t63\n425\t4\t37\t73\n430\t12\t28\t73\n695\t36\t55\t64\n150\t11\t28\t80\n355\t12\t41\t81\n785\t39\t62\t74\n250\t31\t26\t61\n560\t12\t45\t75\n425\t6\t14\t60\n455\t39\t20\t65\n815\t59\t62\t92\n715\t51\t85\t74\n270\t17\t44\t65\n395\t8\t30\t78\n210\t8\t33\t20\n705\t30\t50\t89\n250\t23\t1\t69\n665\t45\t54\t69\n440\t26\t29\t70\n805\t55\t74\t97\n180\t9\t21\t82\n795\t77\t53\t66\n605\t29\t47\t76\n280\t10\t19\t69\n605\t8\t57\t75\n250\t5\t11\t33\n525\t33\t63\t58\n570\t43\t29\t59\n535\t2\t64\t66\n425\t39\t12\t71\n330\t18\t82\t70\n555\t10\t43\t72\n550\t34\t29\t71\n870\t51\t59\t62\n550\t18\t17\t45\n840\t49\t66\t84\n665\t67\t41\t54\n210\t18\t31\t84\n405\t13\t41\t91\n585\t22\t31\t92\n520\t17\t67\t75\n585\t35\t55\t57\n390\t21\t31\t59\n830\t69\t37\t88\n400\t10\t23\t72\n240\t11\t31\t38\n275\t12\t29\t64\n340\t18\t51\t61\n105\t21\t15\t34\n615\t8\t39\t65\n175\t14\t33\t38\n425\t25\t56\t72\n275\t10\t35\t60\n475\t27\t28\t48\n765\t32\t53\t80\n810\t51\t84\t50\n825\t51\t64\t73\n795\t35\t54\t68\n585\t37\t56\t64\n610\t26\t47\t81\n595\t49\t28\t72\n490\t5\t75\t65\n880\t78\t49\t97\n605\t4\t59\t57\n765\t35\t31\t72\n650\t52\t35\t57\n430\t23\t50\t78\n625\t26\t52\t74\n700\t13\t61\t77\n585\t25\t59\t70\n895\t100\t70\t100\n190\t1\t57\t68\n460\t25\t34\t70\n680\t68\t17\t75\n915\t69\t69\t66\n120\t1\t20\t84\n150\t29\t30\t57\n265\t12\t37\t55\n275\t0\t3\t62\n935\t67\t58\t80\n700\t25\t51\t69\n750\t48\t52\t70\n430\t45\t19\t61\n240\t8\t6\t74\n660\t35\t66\t69\n405\t10\t40\t79\n950\t57\t65\t82\n660\t18\t37\t74\n425\t36\t15\t64\n615\t32\t81\t69\n355\t23\t24\t69\n550\t48\t21\t77\n635\t32\t73\t78\n190\t28\t29\t16\n170\t32\t16\t54\n125\t6\t41\t78\n410\t6\t31\t62\n715\t68\t44\t85\n990\t88\t55\t49\n785\t58\t40\t75\n320\t12\t51\t65\n890\t78\t53\t47\n565\t38\t38\t66\n525\t11\t73\t55\n730\t11\t67\t73\n750\t53\t49\t74\n565\t69\t51\t79", mode="r", theme="cobalt"),

                br(),

                tags$head(
                    tags$style(HTML('#do{background-color:Gainsboro}'))
                  ),
                actionButton("do", "Run Analysis"),

                h3("Basic Statistics"),
                verbatimTextOutput("textarea.out"),

                br(),

                h3("Correlation"),
                plotOutput("corPlot"),

                br(),

                h3("Regression Analysis"),
                verbatimTextOutput("reg.out"),

                br(),

                h3("Dominance Analysis"),
                verbatimTextOutput("dominance.out"),
                
                br(),

                h3("Dominance Weight Plot with 95% CI"),
                plotOutput("confPlot"),

                br(),

                h3("Just for Reference: Relative Weight Analysis"),
                p(HTML("Note: Relative weight analysis is criticized and not recommended due to mathematical concerns (see <a href='https://doi.org/10.1080/00273171.2014.905766' target='_blank'>this paper</a>).")),

                verbatimTextOutput("rwa.out"),
                


                br(),

                h3("Feature Selection: Variable Importance (Random Forest)"),
                plotOutput("randomForestPlot"),
                verbatimTextOutput("boruta.out"),

                br(),
                br(),

                strong('R session info'),
                verbatimTextOutput("info1.out")

        ),




# Correlation Matrix

        tabPanel("Correlation Matrix",

                h2("Using Correlation Matrix"),

                p('Note: Input values must be separated by tabs. Copy and paste from Excel/Numbers.'),

                p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>Please make sure that your data includes the header (variable names) in the first row.<br>
                    The criterion (dependent) variable should be placed in the first column.</b></div>")),

                aceEditor("text2",
                          value="\tScore\tWordcount\tCLI\tCommas\tStopwords\tLinking\tWordsSentence\nScore\t1\t0.67\t0.41\t0.02\t-0.35\t0.35\t0.45\nWordcount\t0.67\t1\t0.18\t0.05\t-0.28\t0.24\t0.31\nCLI\t0.41\t0.18\t1\t0.21\t-0.32\t0.34\t0.43\nCommas\t0.02\t0.05\t0.21\t1\t-0.03\t0.22\t0.47\nStopwords\t-0.35\t-0.28\t-0.32\t-0.03\t1\t-0.13\t-0.22\nLinking\t0.35\t0.24\t0.34\t0.22\t-0.13\t1\t0.33\nWordsSentence\t0.45\t0.31\t0.43\t0.47\t-0.22\t0.33\t1", mode="r", theme="cobalt"),

                br(),

                numericInput("n", " Sample Size", 200),

                h3("Check the Input Correlation Matrix"),
                verbatimTextOutput("inputData.out"),

                br(),

                h3("Regression Analysis"),
                verbatimTextOutput("mra2.result.out"),

                br(),

                h3("Dominance Analysis"),
                p('Note: The 95% confidence interval (CI) cannot be calculated from the correlation matrix.'),

                verbatimTextOutput("dominance.cor.out"),

                br(),

                h3("Dominance Weight Plot"),
                p('Note: The 95% confidence interval (CI) cannot be calculated from the correlation matrix.'),
                plotOutput("imPlot"),

                br(),

                h3("Just for Reference: Relative Weight Analysis"),
                p(HTML("Note: Relative weight analysis is criticized and not recommended due to mathematical concerns (see <a href='https://doi.org/10.1080/00273171.2014.905766' target='_blank'>this paper</a>).")),
                verbatimTextOutput("wra.cor.out"),


                br(),
                br(),

                strong('R session info'),
                verbatimTextOutput("info2.out")

        ),



# About

        tabPanel("About",

            strong('Note'),
            p('This web application is developed with',
            a("Shiny.", href="http://www.rstudio.com/shiny/", target="_blank"),
            ''),

            br(),

            strong('List of Packages Used'), br(),
            code('library(shiny)'),br(),
            code('library(shinyAce)'),br(),
            code('library(psych)'),br(),
            code('library(car)'),br(),
            code('library(rpsychi)'),br(),
            code('library(boot)'),br(),
            code('library(plyr)'),br(),
            code('library(ggplot2)'),br(),
            code('library(Boruta)'),br(),
            code('library(relaimpo)'),br(),
            code('library(MASS)'),br(),
            code('library(yhat)'),br(),
            code('library(parallel)'),br(),
            br(),

            strong('Code'),
            p('Source code for this application is based on',
            a('"RWA Web" (by Dr. Scott Tonidandel)', href='https://relativeimportance.davidson.edu/', target="_blank")),
            p('and', a('"The handbook of Research in Foreign Language Learning and Teaching" (Takeuchi & Mizumoto, 2012).', href='https://mizumot.com/handbook/', target="_blank")),

            p('The code for this web application is available at',
            a('GitHub.', href='https://github.com/mizumot/relimp', target="_blank")),

            br(),

            strong('Citation in Publications'),
                p('Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. https://langtest.jp'),

            br(),

            strong('Article'),
                p('Mizumoto, A., & Plonsky, L. (2016).', a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href='https://doi.org/10.1093/applin/amv025', target="_blank"), em('Applied Linguistics,'), '37(2), 284–291. https://doi.org/10.1093/applin/amv025'),
                p('Mizumoto, A. (2023).', a("Calculating the relative importance of multiple regression predictor variables using dominance analysis and random forests.", href='https://doi.org/10.1111/lang.12518', target="_blank"), em('Language Learning,'), '73(1), 161–196. https://doi.org/10.1111/lang.12518'),
            br(),

            strong('Recommended'),
            p('To learn more about R, I suggest this excellent and free e-book (pdf),',
            a("A Guide to Doing Statistics in Second Language Research Using R,", href="https://www.tufs.ac.jp/ts/personal/corpuskun/pdf/2015/__A_Guide_to_Doing_Statistics_in_SLA_Using_R.pdf", target="_blank"),
            'written by Dr. Jenifer Larson-Hall.'),

            br(),

            strong('Developer'),
            p(a("Atsushi MIZUMOTO,", href="https://mizumot.com", target="_blank"),' Ph.D.',br(),
            'Professor of Applied Linguistics',br(),
            'Faculty of Foreign Language Studies /',br(),
            'Graduate School of Foreign Language Education and Research,',br(),
            'Kansai University, Osaka, Japan'),

            br(),

            a(img(src="https://creativecommons.jp/wp-content/uploads/2015/04/by-nc-sa.png?w=75"), target="_blank", href="https://creativecommons.org/licenses/by-nc-sa/4.0/"),

            p(br())

        )
    )
    )
))
Code license: GPL-3