Mercurial > repos > iuc > nanocompore_sampcomp
diff sampcomp.xml @ 0:557cf45ff2c8 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanocompore commit 8fa5ff35b45c2b046c7f4800410cf39cb89a299a"
author | iuc |
---|---|
date | Tue, 05 May 2020 06:57:10 -0400 |
parents | |
children | c43f4b80f5a9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sampcomp.xml Tue May 05 06:57:10 2020 -0400 @@ -0,0 +1,220 @@ +<?xml version="1.0"?> +<tool id="nanocompore_sampcomp" name="SampComp" version="@TOOL_VERSION@+@WRAPPER_VERSION@"> + <description>to compare Nanopolished datasets</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command><![CDATA[nanocompore --version]]></version_command> + <command detect_errors="exit_code"><![CDATA[ + ## initialize + ## requires a minimum of 3 threads + threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) && + ## same name pattern required + #for $i, $current in enumerate($file1_rep) + ln -s '$current.file' 'sample_1_${i}.tsv' && + ln -s '$current.index' 'sample_1_${i}.tsv.idx' && + #end for + #for $i, $current in enumerate($file2_rep) + ln -s '$current.file' 'sample_2_${i}.tsv' && + ln -s '$current.index' 'sample_2_${i}.tsv.idx' && + #end for + + ## run + nanocompore sampcomp + ## required + --label1 '$label1' + #set files1 = ','.join(['sample_1_' + str(item) + '.tsv' for item in range(len($file1_rep))]) + --file_list1 '$files1' + --label2 '$label2' + #set files2 = ','.join(['sample_2_' + str(item) + '.tsv' for item in range(len($file2_rep))]) + --file_list2 '$files2' + --fasta '$fasta' + ## optional + #if $ap.bed + --bed '$ap.bed' + #end if + --max_invalid_kmers_freq $ap.max_invalid_kmers_freq + --min_coverage $ap.min_coverage + --min_ref_length $ap.min_ref_length + --comparison_methods '$ap.comparison_methods' + --sequence_context $ap.sequence_context + --sequence_context_weights '$ap.sequence_context_weights' + --pvalue_thr $ap.pvalue_thr + $ap.logit + $ap.allow_warnings + --outpath 'results' + --nthreads \$threads + --log_level debug + + && tar -cf 'results/db.tar' 'results/out_SampComp.db.bak' 'results/out_SampComp.db.dir' 'results/out_SampComp.db.dat' + ]]></command> + <inputs> + <param argument="--label1" type="text" value="Condition 1" label="Set label of first condition"/> + <repeat name="file1_rep" min="1" title="First condition files"> + <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list1)"/> + <param name="index" type="data" format="tabular" label="Select index file"/> + </repeat> + <param argument="--label2" type="text" value="Condition 2" label="Set label of second condition"/> + <repeat name="file2_rep" min="1" title="Second condition files"> + <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list2)"/> + <param name="index" type="data" format="tabular" label="Select index file"/> + </repeat> + <param argument="--fasta" type="data" format="fasta" label="Select mapping file"/> + + <section name="ap" title="Advanced parameters"> + <param argument="--bed" type="data" format="bed" optional="true" label="Select mapping file with annotation of transcriptome"/> + <param argument="--max_invalid_kmers_freq" type="float" value="0.1" min="0.0" max="1.0" label="Set max fequency of invalid kmers"/> + <param argument="--min_coverage" type="integer" value="30" min="0" label="Set minimum coverage required in each condition to do the comparison"/> + <param argument="--min_ref_length" type="integer" value="100" min="0" label="Set minimum length of a reference transcript to include it in the analysis"/> + <param argument="--comparison_methods" type="select" multiple="true" label="Select comparison methods"> + <option value="GMM" selected="true">GMM</option> + <option value="KS" selected="true">KS</option> + <option value="TT">TT</option> + <option value="MW">MW</option> + </param> + <param argument="--sequence_context" type="integer" value="0" min="0" max="4" label="Set sequence context for combining p-values"/> + <param argument="--sequence_context_weights" type="select" label="Select type of weights to use for combining p-values"> + <option value="uniform" selected="true">Uniform</option> + <option value="harmonic">Harmonic</option> + </param> + <param argument="--pvalue_thr" type="float" value="0.05" min="0.0" max="1.0" label="Set adjusted p-value threshold for reporting significant sites"/> + <param argument="--logit" type="boolean" truevalue="--logit" falsevalue="" label="Use logistic regression testing also when all conditions have replicates?"/> + <param argument="--allow_warnings" type="boolean" truevalue="--allow_warnings" falsevalue="" label="Should runtime warnings during the ANOVA tests raise an error?"/> + <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)"> + <option value="results" selected="true">Results</option> + <option value="shift" selected="true">Shift stats</option> + <option value="db" selected="true">Database (*.db.dir, *.db.bak, *.db.dat)</option> + <option value="log">Log</option> + </param> + </section> + </inputs> + <outputs> + <data name="out_results" format="tabular" from_work_dir="results/out_nanocompore_results.tsv" label="${tool.name} on ${on_string}: Results"> + <filter>'results' in ap['out']</filter> + </data> + <data name="out_shift" format="tabular" from_work_dir="results/out_nanocompore_shift_stats.tsv" label="${tool.name} on ${on_string}: Shift stats"> + <filter>'shift' in ap['out']</filter> + </data> + <data name="out_db" format="tar" from_work_dir="results/db.tar" label="${tool.name} on ${on_string}: Database"> + <filter>'db' in ap['out']</filter> + </data> + <data name="out_log" format="txt" from_work_dir="results/out_SampComp.log" label="${tool.name} on ${on_string}: log"> + <filter>'log' in ap['out']</filter> + </data> + </outputs> + <tests> + <!-- #1 --> + <test expect_num_outputs="3"> + <repeat name="file1_rep"> + <param name="file" value="sample1.tsv"/> + <param name="index" value="sample1.tsv.idx"/> + </repeat> + <repeat name="file2_rep"> + <param name="file" value="sample2.tsv"/> + <param name="index" value="sample2.tsv.idx"/> + </repeat> + <param name="fasta" value="reference.fa"/> + <output name="out_results"> + <assert_contents> + <has_n_lines n="3"/> + <has_text_matching expression="pos	chr.+"/> + <has_text_matching expression="22102	NA.+"/> + </assert_contents> + </output> + <output name="out_shift"> + <assert_contents> + <has_n_lines n="3"/> + <has_text_matching expression="ref\_id	pos.+"/> + <has_text_matching expression="chr	22102.+"/> + </assert_contents> + </output> + <output name="out_db"> + <assert_contents> + <has_size value="5408256"/> + </assert_contents> + </output> + </test> + <!-- #2 --> + <test expect_num_outputs="4"> + <param name="label1" value="C1"/> + <repeat name="file1_rep"> + <param name="file" value="sample1.tsv"/> + <param name="index" value="sample1.tsv.idx"/> + </repeat> + <param name="label2" value="C2"/> + <repeat name="file2_rep"> + <param name="file" value="sample2.tsv"/> + <param name="index" value="sample2.tsv.idx"/> + </repeat> + <param name="fasta" value="reference.fa"/> + <section name="ap"> + <param name="max_invalid_kmers_freq" value="0.2"/> + <param name="min_coverage" value="31"/> + <param name="min_ref_length" value="101"/> + <param name="comparison_methods" value="GMM,KS,TT,MW"/> + <param name="sequence_context" value="1"/> + <param name="sequence_context_weights" value="harmonic"/> + <param name="pvalue_thr" value="0.06"/> + <param name="logit" value="true"/> + <param name="allow_warnings" value="true"/> + <param name="out" value="results,shift,db,log"/> + </section> + <output name="out_results"> + <assert_contents> + <has_n_lines n="3"/> + <has_text_matching expression="pos	chr.+"/> + <has_text_matching expression="22102	NA.+"/> + </assert_contents> + </output> + <output name="out_shift"> + <assert_contents> + <has_n_lines n="3"/> + <has_text_matching expression="ref\_id	pos.+"/> + <has_text_matching expression="chr	22102.+"/> + </assert_contents> + </output> + <output name="out_db"> + <assert_contents> + <has_size value="5410304"/> + </assert_contents> + </output> + <output name="out_log"> + <assert_contents> + <has_n_lines n="31"/> + <has_text_matching expression=".+package\_name.+"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +SampComp provides a very flexible analysis framework with a few mandatory options and many optional parameters. + +First, SampComp parses the sample eventalign collapse files and then the observed results are piled-up per reference at position level. In a second time, positions are compared using various statistical methods and the statistics are stored in a shelve DBM database containing the results for all positions with sufficient coverage. + +**Input** + +SampComp requires sample files obtained with NanopolishComp EventalignCollapse as explained before (see data preparation) for both the control and the experimental conditions. 2 conditions are expected and at least 2 replicates per conditions are highly recommended. + +A transcriptome FASTA reference file is required to extract kmer sequences during the analyses. The reference has to be the same as the one used at the mapping step. + +Optionally, a BED file containing the genome annotations corresponding to the transcriptome fasta file can be provided. In that case Nanocompore will also convert the transcript coordinates into the genome space. + +**Output** + +The database object returned by Sampcomp is a Python GDBM object database indexed by reference id and can be be used with SampCompDB. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file