Text AnalysisWord length (letters)Word length (syllables)SummaryDetails
SummaryDetails
Note
This web application is developed with Shiny. List of Packages Used library(shiny)
library(koRpus)
Code Source code for this application is mostly from koRpus: An R packge for text analysis. The code for this web application is available at GitHub.
If you want to run this code on your computer (in a local R session), run the code below:
Citation in Publications Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp Article Mizumoto, A., & Plonsky, L. (2015). R as a lingua franca: Advantages of using R for quantitative research in applied linguistics. Applied Linguistics, Advance online publication. doi:10.1093/applin/amv025 Recommended To learn more about R, I suggest this excellent and free e-book (pdf), A Guide to Doing Statistics in Second Language Research Using R, written by Dr. Jenifer Larson-Hall. Also, if you are a cool Mac user and want to use R with GUI, MacR is defenitely the way to go! Author
Atsushi MIZUMOTO,
Ph.D.
|
library(shiny)
library(koRpus)
shinyServer(function(input, output){
tagged.text <- reactive(tokenize(input$text, format="obj", lang="en"))
hyphenated.text <- reactive({
# set the next line to activate caching, if this application is run on a shiny server
#set.kRp.env(hyph.cache.file=file.path("/var","shiny-server","cache","koRpus",paste("hyph.cache.",input$lang,".rdata", sep="")))
hyphen(tagged.text(), quiet=TRUE)
})
output$word.list <- renderTable({
x <- input$text
x <- tolower(x)
words <- unlist (strsplit (x, split = "[[:space:]]+|[[:punct:]]+"))
Word <- words[words !=""]
Word.freq <- as.data.frame(table (Word))
Word.sorted <- Word.freq[order(Word.freq$Freq, decreasing = TRUE), ]
return(Word.sorted)
})
#output$word.list <- renderPrint({
#word.list()
#})
output$letter.plot <- renderPlot(plot(tagged.text(), what="letters"))
output$desc <- renderTable({
basic.desc.data <- as.data.frame(describe(tagged.text())[c("all.chars","normalized.space","chars.no.space", "letters.only","lines",
"punct","digits","words","sentences","avg.sentc.length","avg.word.length")])
syll.desc.data <- as.data.frame(describe(hyphenated.text())[c("num.syll", "avg.syll.word")])
colnames(basic.desc.data) <- c("All characters","Normalized space","Characters (no space)", "Characters (letters only)","Lines",
"Punctuation","Digits","Words","Sentences","Avg. sentence length","Avg. word length")
colnames(syll.desc.data) <- c("Syllables", "Avg. syllable per word")
desc.data <- cbind(basic.desc.data, syll.desc.data)
rownames(desc.data) <- c("Value")
t(desc.data)
})
output$desc.lttr.disrib <- renderTable({
t(describe(tagged.text())[["lttr.distrib"]])
})
output$syll.disrib <- renderTable({
t(describe(hyphenated.text())[["syll.distrib"]])
})
LD.results <- reactive(lex.div(tagged.text(), segment=input$LD.segment, factor.size=input$LD.factor, min.tokens=input$LD.minTokens,
rand.sample=input$LD.random, window=input$LD.window, case.sens=input$LD.caseSens, detailed=FALSE, char=c(), quiet=TRUE))
output$lexdiv.sum <- renderTable({
summary(LD.results())
})
output$lexdiv.res <- renderPrint({
LD.results()
})
RD.results <- reactive(readability(tagged.text(), hyphen=hyphenated.text(), index=input$RD.indices, quiet=TRUE))
output$readability.sum <- renderTable({
summary(RD.results())
})
output$readability.res <- renderPrint({
RD.results()
})
})
library(shiny)
shinyUI(
pageWithSidebar(
headerPanel("Text Analysis"),
sidebarPanel(
# limit the maximum amount of text to be analyzed
includeHTML("./maxlength.html"),
h4("Text to analyze:"),
tags$textarea(id="text", rows=30, cols=35, maxlength=10000,
onblur="if(this.value==\"\") this.value=\"(Paste your text here. Text limit is 10000 characters, but should at least have 100 words.)\";",
onfocus="if(this.value==\"(Paste your text here. Text limit is 10000 characters, but should at least have 100 words.)\") this.value=\"\";",
"(Paste your text here. Text limit is 10000 characters, but should at least have 100 words.)"),
conditionalPanel("input.tab == 'chkLexdiv'",
h4("Lexical diversity options:"),
numericInput("LD.segment", "MSTTR segment size:", 100),
sliderInput("LD.factor", "MTLD/MTLD-MA factor size:", min=0, max=1, value=0.72),
numericInput("LD.minTokens", "MTLD-MA min. tokens/factor:", 9),
numericInput("LD.random", "HD-D sample size:", 42),
numericInput("LD.window", "MATTR moving window:", 100),
checkboxInput("LD.caseSens", "Case sensitive", FALSE)
),
conditionalPanel("input.tab == 'chkReadability'",
h4("Readability options:"),
checkboxGroupInput("RD.indices", label="Measures to calculate",
choices=c("ARI"="ARI",
"ARI (NRI)"="ARI.NRI",
"ARI (simplified)"="ARI.simple",
"Coleman-Liau"="Coleman.Liau",
"Danielson-Bryan"="Danielson.Bryan",
"Dickes-Steiwer"="Dickes.Steiwer",
"ELF"="ELF",
"Farr-Jenkins-Paterson"="Farr.Jenkins.Paterson",
"Farr-Jenkins-Paterson (Powers-Sumner-Kearl)"="Farr.Jenkins.Paterson.PSK",
"Flesch"="Flesch",
"Flesch (Powers-Sumner-Kearl)"="Flesch.PSK",
"Flesch (DE, Amstad)"="Flesch.de",
"Flesch (ES, Fernandez-Huerta)"="Flesch.es",
"Flesch (FR, Kandel-Moles)"="Flesch.fr",
"Flesch (NL, Douma)"="Flesch.nl",
"Flesch-Kincaid"="Flesch.Kincaid",
"FOG"="FOG",
"FOG (Powers-Sumner-Kearl)"="FOG.PSK",
"FOG (NRI)"="FOG.NRI",
"FORCAST"="FORCAST",
"FORCAST (reading grade level)"="FORCAST.RGL",
"Fucks Stilcharakteristik"="Fucks",
"Linsear-Write"="Linsear.Write",
"LIX"="LIX",
"Neue Wiener Sachtextformeln"="nWS",
"RIX"="RIX",
"SMOG"="SMOG",
"SMOG (DE, Bamberger-Vanecek)"="SMOG.de",
"SMOG (formula C)"="SMOG.C",
"SMOG (simplified)"="SMOG.simple",
"Strain"="Strain",
"TRI"="TRI",
"Wheeler-Smith"="Wheeler.Smith",
"Wheeler-Smith (DE, Bamberger-Vanecek)"="Wheeler.Smith.de"),
selected=c("ARI",
"Coleman.Liau",
"Danielson.Bryan",
"Dickes.Steiwer",
"ELF",
"Farr.Jenkins.Paterson",
"Flesch",
"Flesch.Kincaid",
"FOG",
"FORCAST",
"Fucks",
"Linsear.Write",
"LIX",
"RIX",
"SMOG",
"Strain",
"Wheeler.Smith"))
)
# submitButton("Update View")
),
mainPanel(
tabsetPanel(
tabPanel("Descriptive statistics",
tableOutput("desc"),
h5("Word length (letters)"),
tableOutput("desc.lttr.disrib"),
h5("Word length (syllables)"),
tableOutput("syll.disrib"),
plotOutput("letter.plot")
),
tabPanel("Word list",
tableOutput("word.list")
),
tabPanel("Lexical diversity",
h5("Summary"),
tableOutput("lexdiv.sum"),
h5("Details"),
pre(textOutput("lexdiv.res")),
value="chkLexdiv"
),
tabPanel("Readability",
h5("Summary"),
tableOutput("readability.sum"),
h5("Details"),
pre(textOutput("readability.res")),
value="chkReadability"
),
tabPanel("About",
strong('Note'),
p('This web application is developed with',
a("Shiny.", href="http://www.rstudio.com/shiny/", target="_blank"),
''),
br(),
strong('List of Packages Used'), br(),
code('library(shiny)'),br(),
code('library(koRpus)'),br(),
br(),
strong('Code'),
p('Source code for this application is mostly from',
a('koRpus: An R packge for text analysis.', href='http://reaktanz.de/?c=hacking&s=koRpus', target="_blank")),
p('The code for this web application is available at',
a('GitHub.', href='https://github.com/mizumot/corpus', target="_blank")),
p('If you want to run this code on your computer (in a local R session), run the code below:',
br(),
code('library(shiny)'),br(),
code('runGitHub("corpus","mizumot")')
),
br(),
strong('Citation in Publications'),
p('Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp'),
br(),
strong('Article'),
p('Mizumoto, A., & Plonsky, L. (2015).', a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href='http://applij.oxfordjournals.org/content/early/2015/06/24/applin.amv025.abstract', target="_blank"), em('Applied Linguistics,'), 'Advance online publication. doi:10.1093/applin/amv025'),
br(),
strong('Recommended'),
p('To learn more about R, I suggest this excellent and free e-book (pdf),',
a("A Guide to Doing Statistics in Second Language Research Using R,", href="http://cw.routledge.com/textbooks/9780805861853/guide-to-R.asp", target="_blank"),
'written by Dr. Jenifer Larson-Hall.'),
p('Also, if you are a cool Mac user and want to use R with GUI,',
a("MacR", href="https://sites.google.com/site/casualmacr/", target="_blank"),
'is defenitely the way to go!'),
br(),
strong('Author'),
p(a("Atsushi MIZUMOTO,", href="http://mizumot.com", target="_blank"),' Ph.D.',br(),
'Professor of Applied Linguistics',br(),
'Faculty of Foreign Language Studies /',br(),
'Graduate School of Foreign Language Education and Research,',br(),
'Kansai University, Osaka, Japan'),
br(),
a(img(src="http://i.creativecommons.org/p/mark/1.0/80x15.png"), target="_blank", href="http://creativecommons.org/publicdomain/mark/1.0/"),
p(br())
)
))
))