Mercurial > repos > trinity_ctat > ctat_star_fusion
changeset 0:a530fa18e9e8 draft default tip
Upload ctat tools.
author | trinity_ctat |
---|---|
date | Tue, 17 Jul 2018 11:51:50 -0400 |
parents | |
children | |
files | ctat_star_fusion.xml test-data/StarFusion/reads_1.fq.gz test-data/StarFusion/reads_2.fq.gz tool-data/ctat_genome_resource_libs.loc.sample tool_data_table_conf.xml.sample |
diffstat | 5 files changed, 128 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ctat_star_fusion.xml Tue Jul 17 11:51:50 2018 -0400 @@ -0,0 +1,99 @@ +<tool id="ctat_star_fusion" name="ctat_star_fusion" version="1.0.1" profile="17.05"> + + <description>Fusion-finding Pipeline Using the STAR Aligner</description> + <requirements> + <requirement type="package" version="1.4.0">star-fusion</requirement> + </requirements> + <!-- Using command's detect_errors instead of below. + <stdio> + <exit_code range="1:" level="fatal" description="Error returned from pipeline" /> + </stdio> + <regex match="Must investigate error above." + source="stderr" + level="fatal" + description="Unknown error encountered" /> + --> + <command detect_errors="default"> + <![CDATA[ + STAR-Fusion + --genome_lib_dir "${genome_resource_lib.fields.path}" + --left_fq "${left_input}" + --right_fq "${right_input}" + --output_dir subdir + --STAR_SortedByCoordinate + ]]> + </command> + + <inputs> + <param format="fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/> + <param format="fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/> + <param name="genome_resource_lib" type="select" label="Select a reference genome"> + <options from_data_table="ctat_genome_resource_libs"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </inputs> + + <outputs> + <data format="txt" name="starfusion_log" label="${tool.name} on ${on_string}: log" from_work_dir="subdir/Log.out"/> + <data format="bam" name="aligned_bam" label="${tool.name} on ${on_string}: Aligned Bam" from_work_dir="subdir/std.STAR.bam"/> + <data format="tabular" name="candidates" label="${tool.name} on ${on_string}: Fusion candidates" from_work_dir="subdir/star-fusion.fusion_predictions.abridged.tsv"/> + <data format="tabular" name="chimeric_junc" label="${tool.name} on ${on_string}: Chimeric.out.junction" from_work_dir="subdir/std.Chimeric.out.junction"/> + </outputs> + <tests> + <test> + <param name="left_input" value="StarFusion/reads_1.fq.gz" /> + <param name="right_input" value="StarFusion/reads_2.fq.gz" /> + <!-- + <param name="left_input" value="reads.left.simPE.fq" /> + <param name="right_input" value="reads.right.simPE.fq" /> + --> + <!-- FIX - now that we added the CTAT ref lib path as a parameter, how do we find it for testing? + <param name="genome_resource_lib.fields.path" value="?????" /> + --> + <!-- + <output name="aligned_bam" file="SF_out_aligned.bam" /> + <output name="candidates" file="SF_out_fusion_candidates.dat" /> + <output name="chimeric_junc" file="SF_out_chimeric.junction" /> + <output name="starfusion_log" file="SF_out.log" /> + --> + <output name="starfusion_log"> + <assert_contents> + <has_line_matching expression=".+" /> + <has_line line="ALL DONE!" /> + </assert_contents> + </output> + <output name="aligned_bam"> + <assert_contents> + <has_line_matching expression=".+" /> + <!-- The following checks for the magic number at the start of the bam file --> + <!-- At first I thought "\x8B\x1F" was the number, but it turns out the file + produced by the StarFusion test had that sequence in the file somewhere else. + --> + <has_text_matching expression="\x1F\x8B" /> + </assert_contents> + </output> + <output name="candidates"> + <assert_contents> + <has_line_matching expression=".+" /> + <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	Long_double_anchor_support" /> + </assert_contents> + </output> + <output name="chimeric_junc"> + <assert_contents> + <has_line_matching expression=".+" /> + <has_line_matching expression="^chr20.*" /> + </assert_contents> + </output> + </test> + </tests> + <help> +.. class:: infomark + +STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. Please read more here_. + +.. _here: https://github.com/STAR-Fusion/STAR-Fusion/wiki + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ctat_genome_resource_libs.loc.sample Tue Jul 17 11:51:50 2018 -0400 @@ -0,0 +1,15 @@ +# This file lists the locations of CTAT Genome Resource Libraries +# Usually there will only be one library, but it is concievable +# that there could be multiple libraries. +# This file format is as follows +# (white space characters are TAB characters): +# +#<value> <name> <path> +# value is a unique id +# name is the display name +# path is the directory where the genome resource lib files are stored +# +#ctat_genome_resource_libs.loc could look like: +# +#GRCh38_v27_CTAT_lib_Feb092018 CTAT_GenomeResourceLib_GRCh38_v27_CTAT_lib_Feb092018 /path/to/ctat/genome/resource/lib/directory +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Jul 17 11:51:50 2018 -0400 @@ -0,0 +1,14 @@ +<tables> + <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/ctat_genome_resource_libs.loc" /> + </table> + <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/ctat_centrifuge_indexes.loc" /> + </table> + <table name="ctat_lncrna_annotations" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/ctat_lncrna_annotations.loc" /> + </table> +</tables>