diff genebed_maf_to_fasta.xml @ 0:f24a9ff28d3c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genebed_maf_to_fasta/ commit 17e2194066ca843f6b2391a9632ea9de67d39351"
author iuc
date Fri, 21 Aug 2020 15:10:13 -0400
parents
children 020c78e91cc5
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genebed_maf_to_fasta.xml	Fri Aug 21 15:10:13 2020 -0400
@@ -0,0 +1,73 @@
+<tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks" version="1.0.1">
+    <description>given a set of coding exon intervals</description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <command>
+        <![CDATA[
+        #if $maf_source_type.maf_source == "cached"
+            #set $tab = '\t'
+            echo "$maf_source_type.mafType.fields.name${tab}$maf_source_type.mafType.fields.value${tab}$maf_source_type.mafType.fields.indexed_for${tab}$maf_source_type.mafType.fields.exists_in_maf${tab}$maf_source_type.mafType.fields.path" >> ./maf_indexes.loc &&
+        #end if
+
+        python '$__tool_directory__/interval_maf_to_merged_fasta.py' 
+            #if $maf_source_type.maf_source == "user"
+                --mafSource='$maf_source_type.maf_file' --mafIndex='$maf_source_type.maf_file.metadata.maf_index'
+            #else
+                --mafSource=$maf_source_type.mafType.fields.value
+            #end if
+            --geneBED
+            --dbkey=$dbkey 
+            --species=$maf_source_type.species 
+            --interval_file='$input1' 
+            --output_file='$out_file1' 
+            --mafSourceType=$maf_source_type.maf_source 
+            --mafIndexFileDir=.
+            --overwrite_with_gaps=$overwrite_with_gaps
+        ]]>
+    </command>
+    <inputs>
+        <param name="input1" type="data" format="bed" label="Gene BED File">
+            <validator type="unspecified_build" />
+            <!-- allow 12+ columns, not as strict as possible. TODO: only list bed files with 12+ columns -->
+            <validator type="expression" message="Input must be in BED12 format.">value.metadata.columns &gt;= 12</validator> 
+        </param>
+        <expand macro="maf_source" />
+        <param name="overwrite_with_gaps" type="select" label="Split into Gapless MAF blocks" help="When set to Yes, blocks are divided around gaps appearing in any species. This will prevent gaps occurring in the interior of the sequence for an aligning species from overwriting a nucleotide found for the same position in a lower-scoring block.">
+            <option value="True" selected="true">No</option>
+            <option value="False">Yes</option>
+        </param>
+    </inputs>
+    <outputs>
+      <data format="fasta" name="out_file1" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" dbkey="hg38" value="maf_to_fasta-in1.bed"/>
+            <param name="maf_source" value="cached"/>
+            <param name="maf_identifier" value="test"/>
+            <param name="species" value="hg38,eulFla1,mm10"/>
+            <param name="overwrite_with_gaps" value="True"/>
+            <output name="out_file1" file="maf_to_fasta-out1.fasta" />
+        </test>
+        <test>
+            <param name="input1" dbkey="hg38" value="maf_to_fasta-in2.bed"/>
+            <param name="maf_source" value="user"/>
+            <param name="maf_file" dbkey="hg38" value="maf_to_fasta-in2.maf"/>
+            <param name="species" value="hg38,eulFla1,mm10"/>
+            <param name="overwrite_with_gaps" value="True"/>
+            <output name="out_file1" file="maf_to_fasta-out2.fasta" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+The coding sequence of genes are usually composed of several coding exons. Each of these coding exons is an individual genomic region, which when concatenated with each other constitutes the coding sequence. A single genomic region can be covered by multiple alignment blocks. In many cases it is desirable to stitch these alignment blocks together. This tool accepts a list of gene-based intervals, in the Gene BED format. For every interval it performs the following:
+
+  * finds all MAF blocks that overlap the coding regions;
+  * sorts MAF blocks by alignment score;
+  * stitches blocks together and resolves overlaps based on alignment score;
+  * outputs alignments in FASTA format.
+    </help>
+    <expand macro="citations" />
+</tool>