comparison pmids_to_pubtator_matrix.xml @ 0:69714f06f18b draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
author iuc
date Wed, 24 Mar 2021 08:33:56 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:69714f06f18b
1 <tool id="pmids_to_pubtator_matrix" name="PMIDs to PubTator" version="@VERSION@" license="MIT">
2 <description>binary matrix</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <requirements>
7 <requirement type="package" version="2.0.3">r-argparse</requirement>
8 <requirement type="package" version="1.4.0">r-stringr</requirement>
9 <requirement type="package" version="1.98_1.2">r-rcurl</requirement>
10 <requirement type="package" version="1.5.3">r-stringi</requirement>
11 </requirements>
12 <command detect_errors="exit_code"><![CDATA[
13 Rscript
14 '${__tool_directory__}/pmids_to_pubtator_matrix.R'
15 --input '$input'
16 --output '$output'
17 --number '$number'
18 $byid
19 --categories
20 #for $category in $categories:
21 '$category'
22 #end for
23 ]]>
24 </command>
25 <inputs>
26 <param argument="--input" type="data" format="tabular" label="Input file with PMID IDs" />
27 <param argument="--categories" type="select" label="categories" multiple="true" display="checkboxes">
28 <option value="Gene">Genes</option>
29 <option value="Disease">Diseases</option>
30 <option value="Mutation">Mutations</option>
31 <option value="Chemical">Chemicals</option>
32 <option value="Species">Species</option>
33 </param>
34 <param argument="--byid" label="If you want to find common gene IDs / mesh IDs instead of specific scientific terms." name="byid" type="boolean" truevalue="--byid" falsevalue="" help="byid" checked="false"/>
35 <param argument="--number" label="Number of most frequent terms/IDs to extract." name="number" optional="true" type="integer" help="number" value="50"/>
36 </inputs>
37 <outputs>
38 <data format="tabular" name="output" />
39 </outputs>
40 <tests>
41 <test>
42 <param name="input" value="pubmed_by_queries_output" ftype="tabular"/>
43 <param name="categories" value="Gene,Mutation"/>
44 <output name="output">
45 <assert_contents>
46 <has_n_lines n="7"/>
47 </assert_contents>
48 </output>
49 </test>
50 <test>
51 <param name="input" value="pubmed_by_queries_output" ftype="tabular"/>
52 <param name="categories" value="Gene,Disease"/>
53 <param name="byid" value="True"/>
54 <output name="output">
55 <assert_contents>
56 <has_n_lines n="7"/>
57 </assert_contents>
58 </output>
59 </test>
60 <test>
61 <param name="input" value="pubmed_by_queries_output" ftype="tabular"/>
62 <param name="categories" value="Gene,Disease"/>
63 <param name="number" value="5"/>
64 <output name="output">
65 <assert_contents>
66 <has_n_lines n="7"/>
67 </assert_contents>
68 </output>
69 </test>
70 </tests>
71 <help><![CDATA[
72
73 **What it does**
74
75 The tool uses all PMIDs per row and extracts "Gene", "Disease", "Mutation", "Chemical" and "Species" terms of the corresponding abstracts,
76 using PubTator annotations. The user can choose from which categories terms should be extracted. The extracted terms are united in one
77 large binary matrix, with 0= term not present in abstracts of that row and 1= term present in abstracts of that row.
78 The user can decide if the scientific terms should be extracted and used as they are or if they should be grouped by their
79 geneIDs/ meshIDs (several terms are often grouped into one ID). The the user can specify a number of most frequent words to extract per row.
80
81 - Input file:
82
83 Output of 'abstracts_by_pmids' tool, or tab-delimited table with columns containing PMIDs.
84 The names of the PMID columns should start with "PMID", e.g. "PMID_1", "PMID_2" etc.
85
86 - Output file:
87
88 Binary matrix in that each column represents one of the extracted terms.
89
90 -----
91
92 **Example**
93
94 - Input table:
95
96 | PMID_1 | PMID_2 | PMID_2
97 | 33565071 | 33531663 | 33528079
98 | 33377604 | 33334860 | 33277917
99
100 - Extract of output table:
101
102 | egfr | hormone | tp53 | scn8a | cacna1a | grin2a
103 | 1 | 0 | 1 | 0 | 1 | 0
104 | 1 | 1 | 1 | 1 | 0 | 1
105
106
107 ]]></help>
108 <expand macro="citations"/>
109 </tool>