diff sampcomp.xml @ 0:557cf45ff2c8 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanocompore commit 8fa5ff35b45c2b046c7f4800410cf39cb89a299a"
author iuc
date Tue, 05 May 2020 06:57:10 -0400
parents
children c43f4b80f5a9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sampcomp.xml	Tue May 05 06:57:10 2020 -0400
@@ -0,0 +1,220 @@
+<?xml version="1.0"?>
+<tool id="nanocompore_sampcomp" name="SampComp" version="@TOOL_VERSION@+@WRAPPER_VERSION@">
+    <description>to compare Nanopolished datasets</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command><![CDATA[nanocompore --version]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        ## initialize
+        ## requires a minimum of 3 threads
+        threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) &&
+        ## same name pattern required
+        #for $i, $current in enumerate($file1_rep)
+            ln -s '$current.file' 'sample_1_${i}.tsv' &&
+            ln -s '$current.index' 'sample_1_${i}.tsv.idx' &&
+        #end for
+        #for $i, $current in enumerate($file2_rep)
+            ln -s '$current.file' 'sample_2_${i}.tsv' &&
+            ln -s '$current.index' 'sample_2_${i}.tsv.idx' &&
+        #end for
+
+        ## run
+        nanocompore sampcomp
+        ## required
+        --label1 '$label1'
+        #set files1 = ','.join(['sample_1_' + str(item) + '.tsv' for item in range(len($file1_rep))])
+        --file_list1 '$files1'
+        --label2 '$label2'
+        #set files2 = ','.join(['sample_2_' + str(item) + '.tsv' for item in range(len($file2_rep))])
+        --file_list2 '$files2'
+        --fasta '$fasta'
+        ## optional
+        #if $ap.bed
+            --bed '$ap.bed'
+        #end if
+        --max_invalid_kmers_freq $ap.max_invalid_kmers_freq
+        --min_coverage $ap.min_coverage
+        --min_ref_length $ap.min_ref_length
+        --comparison_methods '$ap.comparison_methods'
+        --sequence_context $ap.sequence_context
+        --sequence_context_weights '$ap.sequence_context_weights'
+        --pvalue_thr $ap.pvalue_thr
+        $ap.logit
+        $ap.allow_warnings
+        --outpath 'results'
+        --nthreads \$threads
+        --log_level debug
+
+        && tar -cf 'results/db.tar' 'results/out_SampComp.db.bak' 'results/out_SampComp.db.dir' 'results/out_SampComp.db.dat'
+        ]]></command>
+    <inputs>
+        <param argument="--label1" type="text" value="Condition 1" label="Set label of first condition"/>
+        <repeat name="file1_rep" min="1" title="First condition files">
+            <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list1)"/>
+            <param name="index" type="data" format="tabular" label="Select index file"/>
+        </repeat>
+        <param argument="--label2" type="text" value="Condition 2" label="Set label of second condition"/>
+        <repeat name="file2_rep" min="1" title="Second condition files">
+            <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list2)"/>
+            <param name="index" type="data" format="tabular" label="Select index file"/>
+        </repeat>
+        <param argument="--fasta" type="data" format="fasta" label="Select mapping file"/>
+
+        <section name="ap" title="Advanced parameters">
+            <param argument="--bed" type="data" format="bed" optional="true" label="Select mapping file with annotation of transcriptome"/>
+            <param argument="--max_invalid_kmers_freq" type="float" value="0.1" min="0.0" max="1.0" label="Set max fequency of invalid kmers"/>
+            <param argument="--min_coverage" type="integer" value="30" min="0" label="Set minimum coverage required in each condition to do the comparison"/>
+            <param argument="--min_ref_length" type="integer" value="100" min="0" label="Set minimum length of a reference transcript to include it in the analysis"/>
+            <param argument="--comparison_methods" type="select" multiple="true" label="Select comparison methods">
+                <option value="GMM" selected="true">GMM</option>
+                <option value="KS" selected="true">KS</option>
+                <option value="TT">TT</option>
+                <option value="MW">MW</option>
+            </param>
+            <param argument="--sequence_context" type="integer" value="0" min="0" max="4" label="Set sequence context for combining p-values"/>
+            <param argument="--sequence_context_weights" type="select" label="Select type of weights to use for combining p-values">
+                <option value="uniform" selected="true">Uniform</option>
+                <option value="harmonic">Harmonic</option>
+            </param>
+            <param argument="--pvalue_thr" type="float" value="0.05" min="0.0" max="1.0" label="Set adjusted p-value threshold for reporting significant sites"/>
+            <param argument="--logit" type="boolean" truevalue="--logit" falsevalue="" label="Use logistic regression testing also when all conditions have replicates?"/>
+            <param argument="--allow_warnings" type="boolean" truevalue="--allow_warnings" falsevalue="" label="Should runtime warnings during the ANOVA tests raise an error?"/>
+            <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)">
+                <option value="results" selected="true">Results</option>
+                <option value="shift" selected="true">Shift stats</option>
+                <option value="db" selected="true">Database (*.db.dir, *.db.bak, *.db.dat)</option>
+                <option value="log">Log</option>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+         <data name="out_results" format="tabular" from_work_dir="results/out_nanocompore_results.tsv" label="${tool.name} on ${on_string}: Results">
+            <filter>'results' in ap['out']</filter>
+        </data>
+         <data name="out_shift" format="tabular" from_work_dir="results/out_nanocompore_shift_stats.tsv" label="${tool.name} on ${on_string}: Shift stats">
+            <filter>'shift' in ap['out']</filter>
+        </data>
+        <data name="out_db" format="tar" from_work_dir="results/db.tar" label="${tool.name} on ${on_string}: Database">
+            <filter>'db' in ap['out']</filter>
+        </data>
+        <data name="out_log" format="txt" from_work_dir="results/out_SampComp.log" label="${tool.name} on ${on_string}: log">
+            <filter>'log' in ap['out']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- #1 -->
+        <test expect_num_outputs="3">
+            <repeat name="file1_rep">
+                <param name="file" value="sample1.tsv"/>
+                <param name="index" value="sample1.tsv.idx"/>
+            </repeat>
+            <repeat name="file2_rep">
+                <param name="file" value="sample2.tsv"/>
+                <param name="index" value="sample2.tsv.idx"/>
+            </repeat>
+            <param name="fasta" value="reference.fa"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_n_lines n="3"/>
+                    <has_text_matching expression="pos&#09;chr.+"/>
+                    <has_text_matching expression="22102&#09;NA.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_shift">
+                <assert_contents>
+                    <has_n_lines n="3"/>
+                    <has_text_matching expression="ref\_id&#09;pos.+"/>
+                    <has_text_matching expression="chr&#09;22102.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_db">
+                <assert_contents>
+                    <has_size value="5408256"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #2 -->
+        <test expect_num_outputs="4">
+            <param name="label1" value="C1"/>
+            <repeat name="file1_rep">
+                <param name="file" value="sample1.tsv"/>
+                <param name="index" value="sample1.tsv.idx"/>
+            </repeat>
+            <param name="label2" value="C2"/>
+            <repeat name="file2_rep">
+                <param name="file" value="sample2.tsv"/>
+                <param name="index" value="sample2.tsv.idx"/>
+            </repeat>
+            <param name="fasta" value="reference.fa"/>
+            <section name="ap">
+                <param name="max_invalid_kmers_freq" value="0.2"/>
+                <param name="min_coverage" value="31"/>
+                <param name="min_ref_length" value="101"/>
+                <param name="comparison_methods" value="GMM,KS,TT,MW"/>
+                <param name="sequence_context" value="1"/>
+                <param name="sequence_context_weights" value="harmonic"/>
+                <param name="pvalue_thr" value="0.06"/>
+                <param name="logit" value="true"/>
+                <param name="allow_warnings" value="true"/>
+                <param name="out" value="results,shift,db,log"/>
+            </section>
+            <output name="out_results">
+                <assert_contents>
+                    <has_n_lines n="3"/>
+                    <has_text_matching expression="pos&#09;chr.+"/>
+                    <has_text_matching expression="22102&#09;NA.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_shift">
+                <assert_contents>
+                    <has_n_lines n="3"/>
+                    <has_text_matching expression="ref\_id&#09;pos.+"/>
+                    <has_text_matching expression="chr&#09;22102.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_db">
+                <assert_contents>
+                    <has_size value="5410304"/>
+                </assert_contents>
+            </output>
+            <output name="out_log">
+                <assert_contents>
+                    <has_n_lines n="31"/>
+                    <has_text_matching expression=".+package\_name.+"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+SampComp provides a very flexible analysis framework with a few mandatory options and many optional parameters.
+
+First, SampComp parses the sample eventalign collapse files and then the observed results are piled-up per reference at position level. In a second time, positions are compared using various statistical methods and the statistics are stored in a shelve DBM database containing the results for all positions with sufficient coverage.
+
+**Input**
+
+SampComp requires sample files obtained with NanopolishComp EventalignCollapse as explained before (see data preparation) for both the control and the experimental conditions. 2 conditions are expected and at least 2 replicates per conditions are highly recommended.
+
+A transcriptome FASTA reference file is required to extract kmer sequences during the analyses. The reference has to be the same as the one used at the mapping step.
+
+Optionally, a BED file containing the genome annotations corresponding to the transcriptome fasta file can be provided. In that case Nanocompore will also convert the transcript coordinates into the genome space.
+
+**Output**
+
+The database object returned by Sampcomp is a Python GDBM object database indexed by reference id and can be be used with SampCompDB.
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file