Mercurial > repos > namhsuya > te_finder
diff TEfinder.xml @ 0:b81a83c743d3 draft default tip
Uploaded
author | namhsuya |
---|---|
date | Tue, 09 Aug 2022 06:58:49 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TEfinder.xml Tue Aug 09 06:58:49 2022 +0000 @@ -0,0 +1,114 @@ +<tool id="te_finder_1" name="TEfinder" version="1.0.1" profile="21.05"> + <description>A bioinformatics tool for detecting novel transposable element insertions</description> + + <requirements> + <requirement type="package" version="1.15.1">samtools</requirement> + <requirement type="package" version="2.30.0">bedtools</requirement> + <requirement type="package" version="2.27.4">picard</requirement> + <requirement type="package" version="3.4">grep</requirement> + <requirement type="package" version="1.07.1">bc</requirement> + </requirements> + + <command> + <![CDATA[ + '$__tool_directory__/TEfinder' -fa '$required_inputs.FastaFile' + -alignment '$required_inputs.alignmentFile' + -gtf '$required_inputs.TransposonsInGenome' + -te '$required_inputs.TransposonsToSearch' + -bamo '$discordantreads' + -bedo '$bteinsertion' + -threads '\${GALAXY_SLOTS:-1}' + -fis $($advanced_options.FragmentInsertSize) + -md $($advanced_options.MaxDistanceForMerge) + -k $($advanced_options.MaxTSDLength) + #if str( $advanced_options.OutFormat) == "gtf": + -gtfo '$gteinsertion' + -out $($advanced_options.OutFormat) + #end if + ]]> + </command> + + <inputs> + <!-- <param format="fasta" name="input" type="data" label="Source file"/> --> + <section name="required_inputs" title="Required Inputs" expanded="True"> + <param name="FastaFile" type="data" format="fasta" label="Select reference genome FASTA index (FA/FASTA file)" /> + <param name="alignmentFile" type="data" format="bam" label="Select sample reads aligned to reference genome (BAM/SAM file)" /> + <param name="TransposonsInGenome" type="data" format="gtf" label="Select reference genome TE annotation (GFF/GTF file)" /> + <param name="TransposonsToSearch" type="data" format="text" label="Select TE names (single column text file)" /> + </section> + <!-- Advanced Options --> + <section name="advanced_options" title="Advanced Options" expanded="False"> + <param name="FragmentInsertSize" argument="-fis" type="integer" min="0" value="400" label="Short-read sequencing fragment insert size [400]" /> + <param name="MaxDistanceForMerge" argument="-md" type="integer" min="0" value="150" label="Maximum distance between reads for bedtools merge" /> + <param name="MaxTSDLength" argument="-k" type="integer" min="0" value="20" label="Maximum TE target site duplication (TSD) length" /> + <param name="OutFormat" argument="-out" type="select" display="radio" label="Select output format as GTF [BED]" help="See help below for more details"> + <option value="gtf">Other available format is GTF (-out gtf)</option> + <option value="bed" selected="True">Default format is BED</option> + </param> + </section> + </inputs> + + <outputs> + <data format="bed" name="bteinsertion" label="${tool.name} on ${on_string}: BED" /> + <data format="gtf" name="gteinsertion" label="${tool.name} on ${on_string}: GTF"> + <filter>advanced_options['OutFormat'] and 'gtf' in advanced_options['OutFormat']</filter> + </data> + <data format="bam" name="discordantreads" /> + </outputs> + + <tests> + <!-- Test for the most simple case for BED output : Running TEfinder with a .bam file and a .fasta file --> + <test expect_num_outputs="2"> + <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt --> + <param name="FastaFile" ftype="fasta" value="reference.fa"/> + <param name="alignmentFile" ftype="bam" value="sample.bam"/> + <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/> + <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/> + <param name="OutFormat" value="bed" /> + <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/> + <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/> + </test> + + <!-- Test for the GTF output --> + <test expect_num_outputs="3"> + <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt -out gtf --> + <param name="FastaFile" ftype="fasta" value="reference.fa"/> + <param name="alignmentFile" ftype="bam" value="sample.bam"/> + <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/> + <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/> + <param name="OutFormat" value="gtf" /> + <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/> + <output name="gteinsertion" file="TEinsertions.gtf" ftype="gtf"/> + <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/> + </test> + </tests> + + <help> + <![CDATA[ + A bioinformatics tool for detecting novel transposable element insertions + + Authors: Vista Sohrab & Dilay Hazal Ayhan + + TEfinder uses discordant reads to detect novel transposable element insertion events in paired-end sample sequencing data. + + **Output files**:: + + TE_insertions.bed contains identified TE insertion events in sample (in the final column, FILTER attribute with "PASS" refers to high confidence insertion events while instances labeled as "in_repeat", "weak_evidence", "strand bias" or a combination of these three labels indicate less confident insertion events) + + TE_insertions.gtf is provided with the same information as the BED file if using -out GTF + + DiscordantReads.bam contains all discordant reads that have been identified based on the TEs of interest that have been submitted to TEfinder + + **Note**:: + + Modifying the maximum TSD length (-k) could be useful if there is an unexpected number of insertion events identified with the default parameter. The optimal maximum TSD length can vary across datasets. + Modifying the fragment insert size (-fis) based on the sequencing library preparation can be useful. + + ]]> + </help> + + <citations> + <citation type="doi">10.5281/zenodo.4479946</citation> + </citations> + +</tool> \ No newline at end of file