changeset 0:a530fa18e9e8 draft default tip

Upload ctat tools.
author trinity_ctat
date Tue, 17 Jul 2018 11:51:50 -0400
parents
children
files ctat_star_fusion.xml test-data/StarFusion/reads_1.fq.gz test-data/StarFusion/reads_2.fq.gz tool-data/ctat_genome_resource_libs.loc.sample tool_data_table_conf.xml.sample
diffstat 5 files changed, 128 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_star_fusion.xml	Tue Jul 17 11:51:50 2018 -0400
@@ -0,0 +1,99 @@
+<tool id="ctat_star_fusion" name="ctat_star_fusion" version="1.0.1" profile="17.05">
+
+    <description>Fusion-finding Pipeline Using the STAR Aligner</description>
+    <requirements>
+	<requirement type="package" version="1.4.0">star-fusion</requirement>
+    </requirements>
+    <!-- Using command's detect_errors instead of below.
+    <stdio>
+      <exit_code range="1:"  level="fatal"   description="Error returned from pipeline" />
+    </stdio>
+    <regex match="Must investigate error above."
+           source="stderr"
+           level="fatal"
+           description="Unknown error encountered" />
+    -->
+    <command detect_errors="default">
+      <![CDATA[
+      STAR-Fusion
+        --genome_lib_dir "${genome_resource_lib.fields.path}"
+        --left_fq  "${left_input}"
+        --right_fq "${right_input}"
+        --output_dir subdir
+        --STAR_SortedByCoordinate
+      ]]>
+    </command>
+
+    <inputs>
+      <param format="fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/>
+      <param format="fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/>
+      <param name="genome_resource_lib" type="select" label="Select a reference genome">
+        <options from_data_table="ctat_genome_resource_libs">
+          <filter type="sort_by" column="2" />
+          <validator type="no_options" message="No indexes are available" />
+        </options>
+      </param>
+    </inputs>
+
+    <outputs>
+      <data format="txt" name="starfusion_log" label="${tool.name} on ${on_string}: log" from_work_dir="subdir/Log.out"/>
+      <data format="bam" name="aligned_bam" label="${tool.name} on ${on_string}: Aligned Bam" from_work_dir="subdir/std.STAR.bam"/>
+      <data format="tabular" name="candidates" label="${tool.name} on ${on_string}: Fusion candidates" from_work_dir="subdir/star-fusion.fusion_predictions.abridged.tsv"/>
+      <data format="tabular" name="chimeric_junc" label="${tool.name} on ${on_string}: Chimeric.out.junction" from_work_dir="subdir/std.Chimeric.out.junction"/> 
+    </outputs>
+    <tests>
+      <test>
+        <param name="left_input" value="StarFusion/reads_1.fq.gz" />
+        <param name="right_input" value="StarFusion/reads_2.fq.gz" />
+        <!--
+        <param name="left_input" value="reads.left.simPE.fq" />
+        <param name="right_input" value="reads.right.simPE.fq" />
+        -->
+        <!-- FIX - now that we added the CTAT ref lib path as a parameter, how do we find it for testing?
+        <param name="genome_resource_lib.fields.path" value="?????" />
+        -->
+        <!--
+        <output name="aligned_bam" file="SF_out_aligned.bam" />
+        <output name="candidates" file="SF_out_fusion_candidates.dat" />
+        <output name="chimeric_junc" file="SF_out_chimeric.junction" />
+        <output name="starfusion_log" file="SF_out.log" />
+        -->
+        <output name="starfusion_log">
+            <assert_contents>
+               <has_line_matching expression=".+" />
+               <has_line line="ALL DONE!" />
+            </assert_contents>
+        </output>
+        <output name="aligned_bam">
+            <assert_contents>
+               <has_line_matching expression=".+" />
+               <!-- The following checks for the magic number at the start of the bam file -->
+               <!-- At first I thought "\x8B\x1F" was the number, but it turns out the file
+                    produced by the StarFusion test had that sequence in the file somewhere else.
+               -->
+               <has_text_matching expression="\x1F\x8B" />
+            </assert_contents>
+        </output>
+        <output name="candidates">
+            <assert_contents>
+               <has_line_matching expression=".+" />
+               <has_line line="#fusion_name&#009;JunctionReads&#009;SpanningFrags&#009;Splice_type&#009;LeftGene&#009;LeftBreakpoint&#009;RightGene&#009;RightBreakpoint&#009;Long_double_anchor_support" />
+            </assert_contents>
+        </output>
+        <output name="chimeric_junc">
+            <assert_contents>
+               <has_line_matching expression=".+" />
+               <has_line_matching expression="^chr20.*" />
+            </assert_contents>
+        </output>
+      </test>
+    </tests>
+    <help>
+.. class:: infomark
+
+STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. Please read more here_.
+
+.. _here: https://github.com/STAR-Fusion/STAR-Fusion/wiki
+
+    </help>
+</tool>
Binary file test-data/StarFusion/reads_1.fq.gz has changed
Binary file test-data/StarFusion/reads_2.fq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ctat_genome_resource_libs.loc.sample	Tue Jul 17 11:51:50 2018 -0400
@@ -0,0 +1,15 @@
+# This file lists the locations of CTAT Genome Resource Libraries
+# Usually there will only be one library, but it is concievable 
+# that there could be multiple libraries.
+# This file format is as follows
+# (white space characters are TAB characters):
+#
+#<value>    <name>  <path>
+# value is a unique id
+# name is the display name
+# path is the directory where the genome resource lib files are stored
+#
+#ctat_genome_resource_libs.loc could look like:
+#
+#GRCh38_v27_CTAT_lib_Feb092018	CTAT_GenomeResourceLib_GRCh38_v27_CTAT_lib_Feb092018	/path/to/ctat/genome/resource/lib/directory
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Jul 17 11:51:50 2018 -0400
@@ -0,0 +1,14 @@
+<tables>
+    <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_genome_resource_libs.loc" />
+    </table>
+    <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_centrifuge_indexes.loc" />
+    </table>
+    <table name="ctat_lncrna_annotations" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_lncrna_annotations.loc" />
+    </table>
+</tables>