Dominance AnalysisUsing Raw DataNote: Input values must be separated by tabs. Copy and paste from Excel/Numbers.
Basic StatisticsCorrelationRegression AnalysisDominance AnalysisDominance Weight Plot with 95% CIJust for Reference: Relative Weight AnalysisNote: Relative weight analysis is criticized and not recommended due to mathematical concerns (see this paper). Feature Selection: Variable Importance (Random Forest)R session info Using Correlation MatrixNote: Input values must be separated by tabs. Copy and paste from Excel/Numbers.
Check the Input Correlation MatrixRegression AnalysisDominance AnalysisNote: The 95% confidence interval (CI) cannot be calculated from the correlation matrix. Dominance Weight PlotNote: The 95% confidence interval (CI) cannot be calculated from the correlation matrix. Just for Reference: Relative Weight AnalysisNote: Relative weight analysis is criticized and not recommended due to mathematical concerns (see this paper). R session info
Note
This web application is developed with Shiny. List of Packages Used library(shiny)
library(shinyAce)
library(psych)
library(car)
library(rpsychi)
library(boot)
library(plyr)
library(ggplot2)
library(Boruta)
library(relaimpo)
library(MASS)
library(yhat)
Code Source code for this application is based on "RWA Web" (by Dr. Scott Tonidandel) and "The handbook of Research in Foreign Language Learning and Teaching" (Takeuchi & Mizumoto, 2012). The code for this web application is available at GitHub. Citation in Publications Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. https://langtest.jp Article Mizumoto, A., & Plonsky, L. (2016). R as a lingua franca: Advantages of using R for quantitative research in applied linguistics. Applied Linguistics, 37(2), 284–291. https://doi.org/10.1093/applin/amv025 Mizumoto, A. (2023). Calculating the relative importance of multiple regression predictor variables using dominance analysis and random forests. Language Learning, 73(1), 161–196. https://doi.org/10.1111/lang.12518 Recommended To learn more about R, I suggest this excellent and free e-book (pdf), A Guide to Doing Statistics in Second Language Research Using R, written by Dr. Jenifer Larson-Hall. Developer
Atsushi MIZUMOTO,
Ph.D.
![]()
|
library(shiny)
library(shinyAce)
library(psych)
library(car)
library(rpsychi)
library(boot)
library(plyr)
library(ggplot2)
library(Boruta)
library(relaimpo)
library(MASS)
library(yhat)
shinyServer(function(input, output, session) {
observe({
if (input$do > 0) {
#-----------------------------------------------------------------
# Dominance Analysis (Relative Importance Analysis) Using Raw Data
#-----------------------------------------------------------------
# Basic statistics
bs <- reactive({
x <- read.csv(text = input$text, sep = "\t")
describe(x)[2:13]
})
output$textarea.out <- renderPrint({
bs()
})
# Correlation
makecorPlot <- function() {
x <- read.csv(text = input$text, sep = "\t")
pairs.panels(x)
}
output$corPlot <- renderPlot({
print(makecorPlot())
})
# Regression
reg <- reactive({
dat <- read.csv(text = input$text, sep = "\t")
colnames(dat) <- c("Criterion", c(colnames(dat)[2:ncol(dat)]))
result <- lm(Criterion ~ ., dat)
reslt <- summary(result)
print(reslt)
z <- scale(dat) # standardize the data
z <- data.frame(z)
z.res <- summary(lm(Criterion ~ ., z))
stdb <- data.frame(round((z.res$coefficients[, 1][-1]), 3))
colnames(stdb)[1] <- "Standardized beta"
cat("\n", "---", "\n", "Standardized beta estimates:", "\n")
print(stdb)
if (ncol(dat) >= 3) {
VIF <- vif(result)
Tolerance <- 1 / VIF
vif.res <- round(data.frame(VIF, Tolerance), 3)
cat("\n", "---", "\n", "VIF and tolerance statistic (1/VIF):", "\n")
print(vif.res)
cat(
"\n", "VIF should be smaller than 10 (clozer to 1 better);", "\n",
"tolerance statistic (1/VIF) should be greater than 0.2.", "\n"
)
}
})
output$reg.out <- renderPrint({
reg()
})
# Dominance Analysis
dominance <- reactive({
dat <- read.csv(text = input$text, sep = "\t")
colnames(dat) <- c("Criterion", c(colnames(dat)[2:ncol(dat)]))
lm.out <- lm(Criterion ~ ., dat)
regrOut <- yhat::calc.yhat(lm.out)
# Set seed for reproducibility
set.seed(8888)
boot.out <- NULL
tryCatch(
{
boot.out <- boot::boot(dat, yhat::boot.yhat, 100, # Reduced from 1000 to 100
lmOut = lm.out,
regrout0 = regrOut
)
},
error = function(e) {
# More detailed error message
message("Bootstrap failed with error: ", e$message)
}
)
# Generate bootstrap results
if (!is.null(boot.out)) {
result <- yhat::booteval.yhat(regrOut, bty = "perc", boot.out)
# safely extract GenDom column if it exists
if (is.null(result$combCIpm) || !("GenDom" %in% colnames(result$combCIpm))) {
# Fallback if bootstrap results don't have expected structure
da <- relaimpo::calc.relimp(lm.out)
da_res <- as.data.frame(round(da@lmg, 4))
colnames(da_res) <- "Weight"
da_res$CI_lower <- NA
da_res$CI_upper <- NA
return(list(
da_ci = da_res,
pv_comparisons = data.frame(
Comparison = "Bootstrap failed to produce expected structure",
Diff = NA,
CI_lower = NA,
CI_upper = NA,
Significant = ""
)
))
}
da_weights <- result$combCIpm[, "GenDom", drop = FALSE]
# Create a data frame with separate columns for weight, lower CI, and upper CI
# Parse the string format "0.176(0.097,0.260)"
weights_df <- data.frame(
Variable = rownames(da_weights),
GenDom_string = da_weights[, 1],
stringsAsFactors = FALSE
)
# Extract values with better parsing
parse_value_with_ci <- function(str) {
# Remove any * at the end
str <- sub("\\*$", "", str)
# Extract main value (before parenthesis)
main_val <- as.numeric(sub("^([0-9.-]+)\\(.*", "\\1", str))
# Extract lower CI (between parenthesis and comma)
lower_val <- as.numeric(sub(".*\\(([0-9.-]+),.*", "\\1", str))
# Extract upper CI (between comma and closing parenthesis)
upper_val <- as.numeric(sub(".*,([0-9.-]+)\\).*", "\\1", str))
# Check if original string had * at the end
significant <- grepl("\\*$", weights_df$GenDom_string[1])
return(list(
main = main_val,
lower = lower_val,
upper = upper_val,
significant = significant
))
}
# Apply parsing to each row
parsed_values <- lapply(weights_df$GenDom_string, parse_value_with_ci)
# Extract values
weights_df$Weight <- sapply(parsed_values, function(x) x$main)
weights_df$CI_lower <- sapply(parsed_values, function(x) x$lower)
weights_df$CI_upper <- sapply(parsed_values, function(x) x$upper)
# Create final data frame with weight and CI columns
da_ci <- weights_df[, c("Weight", "CI_lower", "CI_upper")]
rownames(da_ci) <- weights_df$Variable
# Comparisons of PVs - Format as separate columns
if (is.null(result$combCIpmDiff) || !("GenDom" %in% colnames(result$combCIpmDiff))) {
# If comparison data is not available in expected format
return(list(
da_ci = round(da_ci, 4),
pv_comparisons = data.frame(
Comparison = "Comparisons not available in expected format",
Diff = NA,
CI_lower = NA,
CI_upper = NA,
Significant = ""
)
))
}
pv_raw_comparisons <- result$combCIpmDiff[, "GenDom", drop = FALSE]
formatted_comparisons <- data.frame(
Comparison = rownames(pv_raw_comparisons),
Diff = NA_real_,
CI_lower = NA_real_,
CI_upper = NA_real_,
Significant = "",
stringsAsFactors = FALSE
)
for (i in 1:nrow(pv_raw_comparisons)) {
comp_str <- as.character(pv_raw_comparisons[i, 1])
is_significant <- grepl("\\*$", comp_str)
clean_str <- sub("\\*$", "", comp_str)
main_val <- as.numeric(sub("^([0-9.-]+)\\(.*", "\\1", clean_str))
lower_val <- as.numeric(sub(".*\\(([0-9.-]+),.*", "\\1", clean_str))
upper_val <- as.numeric(sub(".*,([0-9.-]+)\\).*", "\\1", clean_str))
formatted_comparisons$Diff[i] <- main_val
formatted_comparisons$CI_lower[i] <- lower_val
formatted_comparisons$CI_upper[i] <- upper_val
if (is_significant) {
formatted_comparisons$Significant[i] <- "*"
}
}
formatted_comparisons$Diff <- round(formatted_comparisons$Diff, 4)
formatted_comparisons$CI_lower <- round(formatted_comparisons$CI_lower, 4)
formatted_comparisons$CI_upper <- round(formatted_comparisons$CI_upper, 4)
# Return as a list
return(list(
da_ci = round(da_ci, 4),
pv_comparisons = formatted_comparisons
))
} else {
# Fallback to simple dominance analysis if bootstrap fails
da <- relaimpo::calc.relimp(lm.out)
da_res <- as.data.frame(round(da@lmg, 4))
colnames(da_res) <- "Weight"
# Add placeholder CI columns
da_res$CI_lower <- NA
da_res$CI_upper <- NA
return(list(
da_ci = da_res,
pv_comparisons = data.frame(
Comparison = "Bootstrap failed",
Diff = NA_real_,
CI_lower = NA_real_,
CI_upper = NA_real_,
Significant = ""
)
))
}
})
output$dominance.out <- renderPrint({
dominance()
dom_results <- dominance()
# Print weights with CI
cat("Dominance Analysis Results with 95% CI:\n\n")
print(dom_results$da_ci)
cat("\n\n")
cat("Predictor Variable Comparisons:\n")
print(dom_results$pv_comparisons)
cat("\n")
cat("Positive values indicate greater predictive importance of the first variable;\n")
cat("negative values indicate greater importance of the second.\n")
cat("* indicates a statistically significant difference\n")
cat("(when the confidence interval does not include zero).\n\n")
})
# 95% CI Plot
confPlot <- function() {
dat <- read.csv(text = input$text, sep = "\t")
dom_results <- dominance()
da_ci <- dom_results$da_ci
Variables <- rownames(da_ci)
plotdat <- data.frame(
Variables = Variables,
Weight = da_ci$Weight,
CI_lower = da_ci$CI_lower,
CI_upper = da_ci$CI_upper
)
dat_model <- read.csv(text = input$text, sep = "\t")
colnames(dat_model) <- c("Criterion", c(colnames(dat_model)[2:ncol(dat_model)]))
result <- lm(Criterion ~ ., dat_model)
model_summary <- summary(result)
RSQ.Results <- model_summary$r.squared
ggplot(plotdat, aes(reorder(Variables, Weight), Weight)) +
geom_bar(stat = "identity", fill = "lightblue", colour = "blue") +
coord_flip() +
geom_text(aes(label = round(Weight, 3)), hjust = -0.1, vjust = -0.3) +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = .2, position = position_dodge(.9)) +
labs(
title = "Variable Importance (Dominance Analysis)",
x = "Predictor Variables",
y = "Dominance Weights",
caption = paste0(
"Note: R-squared = ",
round(RSQ.Results, 3),
". ",
"Dominance weights sum to R-squared. n = ",
nrow(dat), ". "
)
) +
theme_classic()
}
output$confPlot <- renderPlot({
print(confPlot())
})
# RWA
rwacalc <- reactive({
dat <- read.csv(text = input$text, sep = "\t")
thedata <- dat
Labels <- names(thedata)[2:length(thedata)]
multRegress <- function(mydata) {
numVar <<- NCOL(mydata)
Variables <<- names(mydata)[2:numVar]
mydata <- cor(mydata, use = "complete.obs")
RXX <- mydata[2:numVar, 2:numVar]
RXY <- mydata[2:numVar, 1]
RXX.eigen <- eigen(RXX)
D <- diag(RXX.eigen$val)
delta <- sqrt(D)
lambda <- RXX.eigen$vec %*% delta %*% t(RXX.eigen$vec)
lambdasq <- lambda^2
beta <- solve(lambda) %*% RXY
rsquare <<- sum(beta^2)
RawWgt <- lambdasq %*% beta^2
import <- (RawWgt / rsquare) * 100
result <<- data.frame(Variables, Raw.RelWeight = RawWgt, Rescaled.RelWeight = import)
}
multBootstrap <- function(mydata, indices) {
mydata <- mydata[indices, ]
multWeights <- multRegress(mydata)
return(multWeights$Raw.RelWeight)
}
multBootrand <- function(mydata, indices) {
mydata <- mydata[indices, ]
multRWeights <- multRegress(mydata)
multReps <- multRWeights$Raw.RelWeight
randWeight <- multReps[length(multReps)]
randStat <- multReps[-(length(multReps))] - randWeight
return(randStat)
}
mybootci <- function(x) {
boot.ci(multBoot, conf = 0.95, type = "bca", index = x)
}
runBoot <- function(num) {
INDEX <- 1:num
test <- lapply(INDEX, FUN = mybootci)
test2 <- t(sapply(test, "[[", i = 4)) # extracts confidence interval
CIresult <<- data.frame(Variables, CI.Lower.Bound = test2[, 4], CI.Upper.Bound = test2[, 5])
}
myRbootci <- function(x) {
boot.ci(multRBoot, conf = 0.95, type = "bca", index = x)
}
runRBoot <- function(num) {
INDEX <- 1:num
test <- lapply(INDEX, FUN = myRbootci)
test2 <- t(sapply(test, "[[", i = 4))
CIresult <<- data.frame(Labels, CI.Lower.Bound = test2[, 4], CI.Upper.Bound = test2[, 5])
}
myCbootci <- function(x) {
boot.ci(multC2Boot, conf = 0.95, type = "bca", index = x)
}
runCBoot <- function(num) {
INDEX <- 1:num
test <- lapply(INDEX, FUN = myCbootci)
test2 <- t(sapply(test, "[[", i = 4))
CIresult <<- data.frame(Labels2, CI.Lower.Bound = test2[, 4], CI.Upper.Bound = test2[, 5])
}
myGbootci <- function(x) {
boot.ci(groupBoot, conf = 0.95, type = "bca", index = x)
}
runGBoot <- function(num) {
INDEX <- 1:num
test <- lapply(INDEX, FUN = myGbootci)
test2 <- t(sapply(test, "[[", i = 4))
CIresult <<- data.frame(Labels, CI.Lower.Bound = test2[, 4], CI.Upper.Bound = test2[, 5])
}
multRegress(thedata)
RW.Results <- result
RSQ.Results <- rsquare
# Reduced number of bootstrap iterations for better performance
multBoot <- boot(thedata, multBootstrap, 1000) # Reduced from 10000
multci <- boot.ci(multBoot, conf = 0.95, type = "bca")
runBoot(length(thedata[, 2:numVar]))
CI.Results <- CIresult
# Reduced number of bootstrap iterations for better performance
randVar <- rnorm(length(thedata[, 1]), 0, 1)
randData <- cbind(thedata, randVar)
multRBoot <- boot(randData, multBootrand, 1000) # Reduced from 10000
multRci <- boot.ci(multRBoot, conf = 0.95, type = "bca")
runRBoot(length(randData[, 2:(numVar - 1)]))
CI.Significance <- CIresult
list(RSQ.Results = RSQ.Results, RW.Results = RW.Results, CI.Results = CI.Results, CI.Significance = CI.Significance)
})
rwa <- reactive({
cat("R-squared For the Model:", "\n")
RSQ.Results <- rwacalc()$RSQ.Results
print(RSQ.Results)
cat("\n", "The Raw and Rescaled Weights:", "\n")
RW.Results <- rwacalc()$RW.Results
print(RW.Results)
cat("\n", "BCa Confidence Intervals around the raw weights:", "\n")
CI.Results <- rwacalc()$CI.Results
print(CI.Results)
cat("\n", "BCa Confidence Interval Tests of significance:", "\n")
cat(" (If 0 is not included, weight is significant at p < .05)", "\n")
CI.Significance <- rwacalc()$CI.Significance
print(CI.Significance)
})
output$rwa.out <- renderPrint({
rwa()
})
# Info
info1 <- reactive({
info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")
info2 <- paste("It was executed on ", date(), ".", sep = "")
cat(sprintf(info1), "\n")
cat(sprintf(info2), "\n")
})
output$info1.out <- renderPrint({
info1()
})
}
})
# 2. Random Forest Analysis observe block
observe({
if (input$run_forest > 0) {
# Feature Selection: Variable Importance (Random Forest)
borutaCalc <- function() {
dat <- read.csv(text = input$text, sep = "\t")
dat <- na.omit(dat) # listwise deletion
colnames(dat) <- c("Criterion", c(colnames(dat)[2:ncol(dat)]))
set.seed(456)
# Reduced maxRuns for better performance
boruta <- Boruta(Criterion ~ ., maxRuns = 50, data = dat, doTrace = 1)
list(boruta = boruta)
}
randomForestPlot <- function() {
boruta <- borutaCalc()$boruta
plot(boruta)
}
output$randomForestPlot <- renderPlot({
print(randomForestPlot())
})
borutaInfo <- function() {
boruta <- borutaCalc()$boruta
print(list(boruta, attStats(boruta)))
}
output$boruta.out <- renderPrint({
borutaInfo()
})
}
})
#-----------------------------------------------------------------
# Dominance Analysis (Relative Importance Analysis) Using Correlation Matrix
#-----------------------------------------------------------------
# Check the Correlation Matrix
inputData <- reactive({
dat <- read.csv(text = input$text2, sep = "", na.strings = c("", "NA", "."))
print(dat)
})
output$inputData.out <- renderPrint({
inputData()
})
# Multiple Regression analysis
mra2.result <- reactive({
n <- input$n
dat <- read.csv(text = input$text2, sep = "", na.strings = c("", "NA", "."))
dat <- as.data.frame(dat)
# source("https://mizumot.com/code/multreg.second2.R", encoding="UTF-8")
multreg.second2 <-
function(dat, n, m = NULL, sd = NULL, sig.level = 0.05,
digits = 3) {
depname <- colnames(dat[1])
indname <- colnames(dat[-1])
x <- c(depname, indname)
corr <- dat
sd <- sd[x]
m <- m[x]
corr.lower <- tanh(atanh(corr) + qnorm(sig.level / 2, lower.tail = TRUE) / sqrt(n -
3))
corr.upper <- tanh(atanh(corr) + qnorm(sig.level / 2, lower.tail = FALSE) / sqrt(n -
3))
corr.conf <- corr.lower
corr.conf[upper.tri(corr.conf)] <- corr.upper[upper.tri(corr.upper)]
p <- ncol(corr)
K <- solve(corr)
a <- 1 / sqrt(diag(K))
K <- K * outer(a, a)
partial.corr <- 2 * diag(p) - K
dimnames(partial.corr) <- dimnames(partial.corr)
cor.mat <- corr
cor.mat[upper.tri(cor.mat)] <- partial.corr[upper.tri(partial.corr)]
num <- which(depname == colnames(corr))
rxy <- corr[, num][-num]
Rind <- corr[-num, -num]
bs <- solve(Rind) %*% rxy
R.sq <- as.vector(t(as.matrix(rxy)) %*% solve(Rind) %*% as.matrix(rxy))
k <- nrow(bs)
bs.sem <- numeric(k)
for (i in 1:k) {
xname <- rownames(bs)[i]
xdel <- setdiff(rownames(bs), xname)
num <- which(xname == colnames(Rind))
rxy <- Rind[, num][-num]
Rind2 <- Rind[-num, -num]
Ri <- as.vector(t(as.matrix(rxy)) %*% solve(Rind2) %*%
as.matrix(rxy))
bs.sem[i] <- sqrt((1 - R.sq) / (n - k - 1)) * sqrt(1 / (1 -
Ri))
}
standardized.estimates <- data.frame(matrix(NA,
nrow = k,
ncol = 4
))
rownames(standardized.estimates) <- rownames(bs)
colnames(standardized.estimates) <- c(
"estimates", "lower",
"upper", "std"
)
standardized.estimates[, 1] <- as.numeric(bs)
standardized.estimates[, 4] <- bs.sem
standardized.estimates[, 2] <- standardized.estimates$estimates +
qnorm(sig.level / 2) * standardized.estimates$std
standardized.estimates[, 3] <- standardized.estimates$estimates +
qnorm(sig.level / 2, lower.tail = FALSE) * standardized.estimates$std
if (!is.null(sd)) {
b <- sd[depname] / sd[rownames(bs)] * standardized.estimates$estimates
b.sem <- sd[depname] / sd[rownames(bs)] * standardized.estimates$std
b.lower <- b + qt(sig.level / 2, n - k - 1) * b.sem
b.upper <- b + qt(sig.level / 2, n - k - 1, lower.tail = FALSE) *
b.sem
Intercept <- m[depname] - sum(b * m[names(b)])
raw.estimates <- data.frame(matrix(NA,
nrow = k + 1,
ncol = 4
))
rownames(raw.estimates) <- c("Intercept", names(b))
colnames(raw.estimates) <- c(
"estimates", "lower", "upper",
"std"
)
raw.estimates[, 1] <- c(Intercept, b)
raw.estimates[, 4] <- c(NA, bs.sem)
raw.estimates[, 2] <- c(NA, b.lower)
raw.estimates[, 3] <- c(NA, b.upper)
}
u <- length(indname)
nu <- n - u - 1
f.sq <- R.sq / (1 - R.sq)
f.value <- f.sq * (nu / u)
delta.lower <- try(FNONCT(f.value, u, nu, prob = 1 - sig.level / 2))
delta.upper <- FNONCT(f.value, u, nu, prob = sig.level / 2)
if (is.character(delta.lower)) {
delta.lower <- 0
}
R.sq.lower <- delta.lower / (delta.lower + u + nu + 1)
R.sq.upper <- delta.upper / (delta.upper + u + nu + 1)
omnibus.es <- c(Rsq = R.sq, lower = R.sq.lower, upper = R.sq.upper)
criterion.power <- c(small = power.multi(
n = n, n.ind = u,
delta = 0.02, sig.level = sig.level
), medium = power.multi(
n = n,
n.ind = u, delta = 0.15, sig.level = sig.level
), large = power.multi(
n = n,
n.ind = u, delta = 0.35, sig.level = sig.level
))
output <- list(
corr.partial.corr = cor.mat, corr.confidence = corr.conf,
omnibus.es = omnibus.es, standardized.estimates = standardized.estimates,
power = criterion.power
)
if (!is.null(sd)) {
output <- list(
corr.partial.corr = cor.mat, corr.confidence = corr.conf,
omnibus.es = omnibus.es, raw.estimates = raw.estimates,
standardized.estimates = standardized.estimates,
power = criterion.power
)
}
output <- sapply(output, round, digits)
return(output)
}
multreg.second2(dat, n = n)
})
output$mra2.result.out <- renderPrint({
mra2.result()
})
# Dominance Analysis
dominance.cor <- reactive({
dat <- read.csv(text = input$text2, sep = "", na.strings = c("", "NA", "."))
n <- input$n
# dat2 <- dat
# dat <- as.matrix(dat)
# as.formula(paste(colnames(dat2[1]), " ~ ", paste(colnames(dat2[-1]), collapse="+")))
# lm.cov <- lmWithCov(as.formula(paste(colnames(dat2[1]), " ~ ", paste(colnames(dat2[-1]), collapse="+"))), dat)
# da <- dominanceAnalysis(lm.cov)
# res <- as.data.frame(round(da$contribution.average[[1]], 4))
# colnames(res) <- "Weight"
# print(res)
# Generating simulation dataset because dominanceanalysis package cannot be used with older version of R
mu <- rep(c(0), times = length(colnames(dat)))
simdat <- mvrnorm(n = n, mu = mu, Sigma = dat, empirical = TRUE)
simdat <- as.data.frame(simdat)
colnames(simdat) <- c("Criterion", c(colnames(simdat)[2:ncol(simdat)]))
res <- lm(Criterion ~ ., simdat)
da <- calc.relimp(res)
da_res <- as.data.frame(round(da@lmg, 4))
colnames(da_res) <- "Weight"
da_res
})
output$dominance.cor.out <- renderPrint({
dominance.cor()
})
# Relative Weight Analysis
rwacalc.cor <- reactive({
dat <- read.csv(text = input$text2, sep = "", na.strings = c("", "NA", "."))
thedata <- as.data.frame(dat)
Labels <- names(thedata)[2:length(thedata)]
multRegress <- function(mydata) {
numVar <<- ncol(mydata)
Variables <<- names(mydata)[2:numVar]
RXX <- mydata[2:numVar, 2:numVar]
RXY <- mydata[2:numVar, 1]
RXX.eigen <- eigen(RXX)
D <- diag(RXX.eigen$val)
delta <- sqrt(D)
lambda <- RXX.eigen$vec %*% delta %*% t(RXX.eigen$vec)
lambdasq <- lambda^2
beta <- solve(lambda) %*% RXY
rsquare <<- sum(beta^2)
RawWgt <- lambdasq %*% beta^2
import <- (RawWgt / rsquare) * 100
result <<- data.frame(Variables, Raw.RelWeight = RawWgt, Rescaled.RelWeight = import)
}
multRegress(thedata)
RW.Results <- result
RSQ.Results <- rsquare
list(RSQ.Results2 = RSQ.Results, RW.Results2 = RW.Results) # 他で使うために定義
})
wra.cor <- reactive({
cat("R-squared For the Model:", "\n")
RSQ.Results2 <- rwacalc.cor()$RSQ.Results2
print(RSQ.Results2)
cat("\n", "The Raw and Rescaled Weights:", "\n")
RW.Results2 <- rwacalc.cor()$RW.Results2
print(RW.Results2)
})
output$wra.cor.out <- renderPrint({
wra.cor()
})
# Importance Plot
imPlot <- function() {
da_res <- dominance.cor()
Variables <- rownames(da_res)
plotdat <- data.frame(
Variables = Variables,
Weight = da_res$Weight
)
dat <- read.csv(text = input$text2, sep = "", na.strings = c("", "NA", "."))
RSQ.Results <- dat[1, 1]
ggplot(plotdat, aes(reorder(Variables, Weight), Weight)) +
geom_bar(stat = "identity", fill = "lightblue", colour = "blue") +
coord_flip() +
scale_y_continuous(limits = c(0, max(plotdat$Weight) * 1.1)) +
geom_text(aes(label = Weight), hjust = -0.1, vjust = -0.3) +
labs(
title = "Variable Importance (Dominance Analysis)",
x = "Predictor Variables",
y = "Dominance Weights",
caption = paste0(
"Note: R-squared = ",
round(RSQ.Results, 3),
". ",
"Dominance weights sum to R-squared. n = ",
input$n, ". "
)
) +
theme_classic()
}
output$imPlot <- renderPlot({
print(imPlot())
})
# Info
info2 <- reactive({
info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")
info2 <- paste("It was executed on ", date(), ".", sep = "")
cat(sprintf(info1), "\n")
cat(sprintf(info2), "\n")
})
output$info2.out <- renderPrint({
info2()
})
})
library(shiny)
library(shinyAce)
shinyUI(bootstrapPage(
headerPanel("Dominance Analysis"),
########## loading message #######################################
div(
id = "loadingMessage",
"Analysis in progress. Please wait...",
style = "position: fixed; top: 0; left: 0; width: 100%; background-color: #CCFF66;
color: black; padding: 10px; text-align: center; font-weight: bold;
z-index: 9999; display: none;"
),
tags$script(HTML("
document.addEventListener('DOMContentLoaded', function() {
var loadingMsg = document.getElementById('loadingMessage');
var checkIntervalId = null;
// Initially hide the message
loadingMsg.style.display = 'none';
// Dominance Analysis button
document.getElementById('do').addEventListener('click', function() {
// Show loading message
loadingMsg.style.display = 'block';
// Clear any existing interval
if (checkIntervalId) clearInterval(checkIntervalId);
// Start checking for results
checkIntervalId = setInterval(function() {
// Check if dominance output exists and has content
var outputElement = document.getElementById('dominance.out');
if (outputElement && outputElement.textContent && outputElement.textContent.length > 20) {
// Results found, hide message
loadingMsg.style.display = 'none';
clearInterval(checkIntervalId);
}
}, 1000); // Check every second
// Backup timeout - hide after 60 seconds regardless
setTimeout(function() {
loadingMsg.style.display = 'none';
if (checkIntervalId) clearInterval(checkIntervalId);
}, 60000);
});
// Random Forest button
document.getElementById('run_forest').addEventListener('click', function() {
// Show loading message
loadingMsg.style.display = 'block';
// Clear any existing interval
if (checkIntervalId) clearInterval(checkIntervalId);
// Start checking for results
checkIntervalId = setInterval(function() {
// Check if boruta output exists and has content
var outputElement = document.getElementById('boruta.out');
if (outputElement && outputElement.textContent && outputElement.textContent.length > 20) {
// Results found, hide message
loadingMsg.style.display = 'none';
clearInterval(checkIntervalId);
}
}, 1000); // Check every second
// Backup timeout - hide after 90 seconds regardless
setTimeout(function() {
loadingMsg.style.display = 'none';
if (checkIntervalId) clearInterval(checkIntervalId);
}, 90000);
});
});
")),
###################################################################
mainPanel(
tabsetPanel(
# Raw Data
tabPanel(
"Raw Data",
h2("Using Raw Data"),
p("Note: Input values must be separated by tabs. Copy and paste from Excel/Numbers."),
p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>
Wait patiently. It will take a minute or so until you get to see the result.<br>
Please make sure that your data includes the header (variable names) in the first row.<br>
The criterion (dependent) variable should be placed in the first column.</b></div>")),
aceEditor("text", value = "Proficiency\tVocabulary\tGrammar\tPronunciation\n275\t21\t35\t53\n175\t21\t29\t0\n535\t37\t56\t60\n10\t3\t4\t55\n385\t14\t36\t62\n655\t44\t47\t62\n335\t9\t2\t77\n630\t3\t46\t81\n605\t34\t44\t67\n530\t18\t35\t82\n645\t42\t45\t53\n440\t25\t35\t59\n750\t24\t57\t79\n610\t28\t52\t73\n830\t55\t44\t66\n600\t22\t50\t44\n760\t63\t30\t73\n300\t14\t33\t77\n340\t22\t0\t59\n585\t25\t73\t52\n475\t34\t32\t74\n215\t10\t32\t73\n150\t1\t31\t71\n655\t39\t17\t56\n710\t10\t73\t68\n360\t34\t12\t62\n350\t9\t37\t77\n540\t27\t55\t82\n710\t36\t52\t51\n310\t10\t19\t87\n655\t41\t53\t74\n500\t32\t59\t55\n115\t19\t29\t63\n615\t32\t58\t88\n400\t11\t16\t57\n440\t27\t28\t46\n555\t24\t30\t71\n625\t32\t51\t80\n345\t22\t31\t45\n670\t44\t7\t63\n565\t47\t32\t62\n595\t35\t43\t63\n585\t67\t61\t75\n415\t5\t40\t58\n620\t27\t80\t66\n175\t6\t64\t23\n790\t53\t64\t66\n450\t38\t18\t61\n750\t39\t41\t70\n665\t48\t62\t80\n700\t11\t82\t91\n585\t34\t53\t81\n380\t19\t48\t76\n680\t61\t50\t86\n315\t41\t45\t68\n330\t15\t32\t67\n645\t42\t56\t66\n180\t11\t4\t34\n835\t79\t64\t45\n550\t40\t45\t70\n340\t23\t40\t62\n805\t45\t85\t87\n615\t42\t92\t56\n815\t62\t44\t71\n610\t10\t59\t53\n720\t58\t55\t50\n525\t45\t43\t55\n575\t48\t49\t74\n570\t6\t43\t62\n420\t22\t22\t73\n395\t24\t44\t73\n280\t35\t33\t86\n945\t66\t56\t68\n880\t60\t73\t72\n290\t8\t13\t48\n420\t12\t2\t66\n950\t50\t59\t67\n435\t27\t46\t79\n605\t18\t60\t78\n170\t1\t47\t12\n940\t60\t70\t63\n260\t11\t35\t73\n335\t7\t34\t78\n270\t9\t60\t45\n670\t45\t39\t70\n860\t59\t100\t72\n835\t68\t36\t73\n850\t61\t58\t68\n405\t9\t55\t53\n500\t25\t34\t65\n490\t20\t32\t49\n515\t28\t72\t64\n950\t89\t47\t63\n425\t4\t37\t73\n430\t12\t28\t73\n695\t36\t55\t64\n150\t11\t28\t80\n355\t12\t41\t81\n785\t39\t62\t74\n250\t31\t26\t61\n560\t12\t45\t75\n425\t6\t14\t60\n455\t39\t20\t65\n815\t59\t62\t92\n715\t51\t85\t74\n270\t17\t44\t65\n395\t8\t30\t78\n210\t8\t33\t20\n705\t30\t50\t89\n250\t23\t1\t69\n665\t45\t54\t69\n440\t26\t29\t70\n805\t55\t74\t97\n180\t9\t21\t82\n795\t77\t53\t66\n605\t29\t47\t76\n280\t10\t19\t69\n605\t8\t57\t75\n250\t5\t11\t33\n525\t33\t63\t58\n570\t43\t29\t59\n535\t2\t64\t66\n425\t39\t12\t71\n330\t18\t82\t70\n555\t10\t43\t72\n550\t34\t29\t71\n870\t51\t59\t62\n550\t18\t17\t45\n840\t49\t66\t84\n665\t67\t41\t54\n210\t18\t31\t84\n405\t13\t41\t91\n585\t22\t31\t92\n520\t17\t67\t75\n585\t35\t55\t57\n390\t21\t31\t59\n830\t69\t37\t88\n400\t10\t23\t72\n240\t11\t31\t38\n275\t12\t29\t64\n340\t18\t51\t61\n105\t21\t15\t34\n615\t8\t39\t65\n175\t14\t33\t38\n425\t25\t56\t72\n275\t10\t35\t60\n475\t27\t28\t48\n765\t32\t53\t80\n810\t51\t84\t50\n825\t51\t64\t73\n795\t35\t54\t68\n585\t37\t56\t64\n610\t26\t47\t81\n595\t49\t28\t72\n490\t5\t75\t65\n880\t78\t49\t97\n605\t4\t59\t57\n765\t35\t31\t72\n650\t52\t35\t57\n430\t23\t50\t78\n625\t26\t52\t74\n700\t13\t61\t77\n585\t25\t59\t70\n895\t100\t70\t100\n190\t1\t57\t68\n460\t25\t34\t70\n680\t68\t17\t75\n915\t69\t69\t66\n120\t1\t20\t84\n150\t29\t30\t57\n265\t12\t37\t55\n275\t0\t3\t62\n935\t67\t58\t80\n700\t25\t51\t69\n750\t48\t52\t70\n430\t45\t19\t61\n240\t8\t6\t74\n660\t35\t66\t69\n405\t10\t40\t79\n950\t57\t65\t82\n660\t18\t37\t74\n425\t36\t15\t64\n615\t32\t81\t69\n355\t23\t24\t69\n550\t48\t21\t77\n635\t32\t73\t78\n190\t28\t29\t16\n170\t32\t16\t54\n125\t6\t41\t78\n410\t6\t31\t62\n715\t68\t44\t85\n990\t88\t55\t49\n785\t58\t40\t75\n320\t12\t51\t65\n890\t78\t53\t47\n565\t38\t38\t66\n525\t11\t73\t55\n730\t11\t67\t73\n750\t53\t49\t74\n565\t69\t51\t79", mode = "r", theme = "cobalt"),
br(),
tags$head(
tags$style(HTML("
.btn-modern {
margin-bottom: 10px;
text-transform: uppercase;
font-weight: bold;
border-radius: 4px;
box-shadow: 0 2px 5px rgba(0,0,0,0.16), 0 2px 5px rgba(0,0,0,0.23);
transition: all 0.3s;
}
.btn-modern:hover {
box-shadow: 0 4px 10px rgba(0,0,0,0.25), 0 4px 10px rgba(0,0,0,0.22);
transform: translateY(-2px);
}
.btn-primary {
background-color: #2196F3;
color: white;
border-color: #1E88E5;
}
.btn-success {
background-color: #4CAF50;
color: white;
border-color: #43A047;
}
"))
),
actionButton("do", "Run Dominance Analysis",
class = "btn-primary btn-modern",
icon = icon("bar-chart")
),
h3("Basic Statistics"),
verbatimTextOutput("textarea.out"),
br(),
h3("Correlation"),
plotOutput("corPlot"),
br(),
h3("Regression Analysis"),
verbatimTextOutput("reg.out"),
br(),
h3("Dominance Analysis"),
verbatimTextOutput("dominance.out"),
br(),
h3("Dominance Weight Plot with 95% CI"),
plotOutput("confPlot"),
br(),
h3("Just for Reference: Relative Weight Analysis"),
p(HTML("Note: Relative weight analysis is criticized and not recommended due to mathematical concerns (see <a href='https://doi.org/10.1080/00273171.2014.905766' target='_blank'>this paper</a>).")),
verbatimTextOutput("rwa.out"),
br(),
h3("Feature Selection: Variable Importance (Random Forest)"),
actionButton("run_forest", "Run Random Forest",
class = "btn-success btn-modern",
icon = icon("tree")
),
plotOutput("randomForestPlot"),
verbatimTextOutput("boruta.out"),
br(),
br(),
strong("R session info"),
verbatimTextOutput("info1.out")
),
# Correlation Matrix
tabPanel(
"Correlation Matrix",
h2("Using Correlation Matrix"),
p("Note: Input values must be separated by tabs. Copy and paste from Excel/Numbers."),
p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>Please make sure that your data includes the header (variable names) in the first row.<br>
The criterion (dependent) variable should be placed in the first column.</b></div>")),
aceEditor("text2",
value = "\tScore\tWordcount\tCLI\tCommas\tStopwords\tLinking\tWordsSentence\nScore\t1\t0.67\t0.41\t0.02\t-0.35\t0.35\t0.45\nWordcount\t0.67\t1\t0.18\t0.05\t-0.28\t0.24\t0.31\nCLI\t0.41\t0.18\t1\t0.21\t-0.32\t0.34\t0.43\nCommas\t0.02\t0.05\t0.21\t1\t-0.03\t0.22\t0.47\nStopwords\t-0.35\t-0.28\t-0.32\t-0.03\t1\t-0.13\t-0.22\nLinking\t0.35\t0.24\t0.34\t0.22\t-0.13\t1\t0.33\nWordsSentence\t0.45\t0.31\t0.43\t0.47\t-0.22\t0.33\t1", mode = "r", theme = "cobalt"
),
br(),
numericInput("n", " Sample Size", 200),
h3("Check the Input Correlation Matrix"),
verbatimTextOutput("inputData.out"),
br(),
h3("Regression Analysis"),
verbatimTextOutput("mra2.result.out"),
br(),
h3("Dominance Analysis"),
p("Note: The 95% confidence interval (CI) cannot be calculated from the correlation matrix."),
verbatimTextOutput("dominance.cor.out"),
br(),
h3("Dominance Weight Plot"),
p("Note: The 95% confidence interval (CI) cannot be calculated from the correlation matrix."),
plotOutput("imPlot"),
br(),
h3("Just for Reference: Relative Weight Analysis"),
p(HTML("Note: Relative weight analysis is criticized and not recommended due to mathematical concerns (see <a href='https://doi.org/10.1080/00273171.2014.905766' target='_blank'>this paper</a>).")),
verbatimTextOutput("wra.cor.out"),
br(),
br(),
strong("R session info"),
verbatimTextOutput("info2.out")
),
# About
tabPanel(
"About",
strong("Note"),
p(
"This web application is developed with",
a("Shiny.", href = "http://www.rstudio.com/shiny/", target = "_blank"),
""
),
br(),
strong("List of Packages Used"), br(),
code("library(shiny)"), br(),
code("library(shinyAce)"), br(),
code("library(psych)"), br(),
code("library(car)"), br(),
code("library(rpsychi)"), br(),
code("library(boot)"), br(),
code("library(plyr)"), br(),
code("library(ggplot2)"), br(),
code("library(Boruta)"), br(),
code("library(relaimpo)"), br(),
code("library(MASS)"), br(),
code("library(yhat)"), br(),
br(),
strong("Code"),
p(
"Source code for this application is based on",
a('"RWA Web" (by Dr. Scott Tonidandel)', href = "https://relativeimportance.davidson.edu/", target = "_blank")
),
p("and", a('"The handbook of Research in Foreign Language Learning and Teaching" (Takeuchi & Mizumoto, 2012).', href = "https://mizumot.com/handbook/", target = "_blank")),
p(
"The code for this web application is available at",
a("GitHub.", href = "https://github.com/mizumot/relimp", target = "_blank")
),
br(),
strong("Citation in Publications"),
p("Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. https://langtest.jp"),
br(),
strong("Article"),
p("Mizumoto, A., & Plonsky, L. (2016).", a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href = "https://doi.org/10.1093/applin/amv025", target = "_blank"), em("Applied Linguistics,"), "37(2), 284–291. https://doi.org/10.1093/applin/amv025"),
p("Mizumoto, A. (2023).", a("Calculating the relative importance of multiple regression predictor variables using dominance analysis and random forests.", href = "https://doi.org/10.1111/lang.12518", target = "_blank"), em("Language Learning,"), "73(1), 161–196. https://doi.org/10.1111/lang.12518"),
br(),
strong("Recommended"),
p(
"To learn more about R, I suggest this excellent and free e-book (pdf),",
a("A Guide to Doing Statistics in Second Language Research Using R,", href = "https://www.tufs.ac.jp/ts/personal/corpuskun/pdf/2015/__A_Guide_to_Doing_Statistics_in_SLA_Using_R.pdf", target = "_blank"),
"written by Dr. Jenifer Larson-Hall."
),
br(),
strong("Developer"),
p(
a("Atsushi MIZUMOTO,", href = "https://mizumot.com", target = "_blank"), " Ph.D.", br(),
"Professor of Applied Linguistics", br(),
"Faculty of Foreign Language Studies /", br(),
"Graduate School of Foreign Language Education and Research,", br(),
"Kansai University, Osaka, Japan"
),
br(),
a(img(src = "https://creativecommons.jp/wp-content/uploads/2015/04/by-nc-sa.png?w=75"), target = "_blank", href = "https://creativecommons.org/licenses/by-nc-sa/4.0/"),
p(br())
)
)
)
))