comparison text_to_wordmatrix.xml @ 0:0692d11af909 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
author iuc
date Wed, 24 Mar 2021 08:33:25 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0692d11af909
1 <tool id="text_to_wordmatrix" name="Text to wordmatrix" version="@VERSION@" license="MIT">
2 <description>by extracting most frequent words</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <requirements>
7 <requirement type="package" version="2.0.3">r-argparse</requirement>
8 <requirement type="package" version="0.7.0">r-snowballc</requirement>
9 <requirement type="package" version="0.3.6">r-pubmedwordcloud</requirement>
10 <requirement type="package" version="1.2.0">r-semnetcleaner</requirement>
11 <requirement type="package" version="0.9.3">r-textclean</requirement>
12 <requirement type="package" version="1.5.3">r-stringi</requirement>
13 <requirement type="package" version="1.4.0">r-stringr</requirement>
14 </requirements>
15 <command detect_errors="exit_code"><![CDATA[
16 Rscript
17 '${__tool_directory__}/text_to_wordmatrix.R'
18 --input '$input'
19 --output '$output'
20 --number '$number'
21 $remove_num
22 $lower_case
23 $remove_stopwords
24 $stemDoc
25 $plurals
26 ]]>
27 </command>
28 <inputs>
29 <param argument="--input" type="data" format="tabular" label="Input file" />
30 <param argument="--number" type="integer" value="50" min="1" max="500" label="Number of most frequent words that should be extracted per row."/>
31 <param argument="--remove_num" type="boolean" truevalue="--remove_num" falsevalue="" checked="false" label="Remove any numbers in text." />
32 <param argument="--lower_case" type="boolean" truevalue="" falsevalue="--lower_case" checked="true" label="Translate all characters are to lower case." />
33 <param argument="--remove_stopwords" type="boolean" truevalue="" falsevalue="--remove_stopwords" checked="true" label="Remove english stopwords" help="e.g. 'the' or 'not'" />
34 <param argument="--stemDoc" type="boolean" truevalue="--stemDoc" falsevalue="" checked="false" label="Apply Porter's stemming algorithm: collapsing words to a common root to aid comparison of vocabulary." />
35 <param argument="--plurals" type="boolean" truevalue="" falsevalue="--plurals" checked="true" label="Transform words in plural to their singular form." />
36 </inputs>
37 <outputs>
38 <data format="tabular" name="output" />
39 </outputs>
40 <tests>
41 <test>
42 <param name="input" value="pubmed_by_queries_output_abstracts" ftype="tabular"/>
43 <output name="output">
44 <assert_contents>
45 <has_n_lines n="7"/>
46 </assert_contents>
47 </output>
48 </test>
49 <test>
50 <param name="input" value="pubmed_by_queries_output_abstracts" ftype="tabular"/>
51 <param name="remove_num" value="True"/>
52 <param name="remove_stopwords" value="False"/>
53 <param name="plurals" value="False"/>
54 <output name="output">
55 <assert_contents>
56 <has_n_lines n="7"/>
57 </assert_contents>
58 </output>
59 </test>
60 </tests>
61 <help><![CDATA[
62
63 **What it does**
64
65 The tool extracts for each row the most frequent words from the text in columns starting with "ABSTRACT" or "TEXT. The extracted words from each row are united in one large binary matrix, with 0= word not frequently occurring in text of that row and 1= word frequently present in text of that row.
66
67 - Input table:
68
69 The output of "pubmed_by_queries" or "abstracts_by_pmids" tools, or a table with text in columns starting with "ABSTRACT" or "TEXT".
70
71 - Output table:
72
73 A binary matrix in that each column represents one of the extracted words.
74
75 -----
76
77 **Example**
78
79 - Input table:
80
81 | ABSTRACT_1 | ABSTRACT_2 | TEXT_1
82 | abcd def... | abcd def... | abcd def...
83 | abcd def... | abcd def... | abcd def...
84
85 - Extract of output table:
86
87 | chronic | seizure | child | channel | signaling | grin2a
88 | 1 | 1 | 1 | 1 | 1 | 1
89 | 0 | 1 | 0 | 1 | 0 | 1
90
91 ]]></help>
92 <expand macro="citations"/>
93 </tool>