Mercurial > repos > iuc > star_fusion
view star_fusion.xml @ 1:0b44456754e2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/star_fusion commit d6a7537117d814677113ba9a8e4783a80dd228a2
author | iuc |
---|---|
date | Thu, 20 Apr 2017 08:12:14 -0400 |
parents | 93704f98f56e |
children | 137942fac417 |
line wrap: on
line source
<tool id="star_fusion" name="STAR-Fusion" version="0.5.4-3" profile="17.01"> <description>detect fusion genes in RNA-Seq data</description> <requirements> <!-- Bio-conda --> <requirement type="package" version="0.5.4">star-fusion</requirement> </requirements> <stdio> <regex match="command not found" source="stderr" level="fatal"/> <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/> <regex match="FATAL ERROR" source="stderr" level="fatal"/> <regex match="Warning:" source="stderr" level="warning"/> <regex match="CMD:" source="stderr" level="warning"/> <regex match="-done creating index file:" source="stderr" level="warning"/> <regex match="-parsing GTF file:" source="stderr" level="warning"/> <regex match="-building interval tree" source="stderr" level="warning"/> <regex match="-parsing fusion evidence:" source="stderr" level="warning"/> <regex match="-mapping reads to genes" source="stderr" level="warning"/> <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/> <regex match="Process complete" source="stderr" level="warning"/> </stdio> <version_command>STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?"</version_command> <command><![CDATA[ ## 1. ensure the blastn file is provided as *.gz if file --mime-type '${blast_pairs}' | grep -q /gzip\$; then gzip_suffix='' ; else ## Older versions of gzip do not support the -k option to keep ## the original file - this should be an universion solution gzip -1 -c -- '${blast_pairs}' > '${blast_pairs}.gz' && gzip_suffix='.gz' ; fi && ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory ## - @todo once write a decent STAR and STAR Fusion data manager prep_genome_lib.pl --genome_fa '${fasta_type.ownFile}' --gtf '${geneModel}' --blast_pairs "${blast_pairs}\$gzip_suffix" --CPU \${GALAXY_SLOTS:-1} --output_dir "\$(pwd)/tmp_star_fusion_genome_dir" && ## Link in fastq files so they have appropriate extensions #if str($input_params.input_source) != "use_chimeric": #if $input_params.left_fq.is_of_type("fastq.gz"): #set read1 = 'input_1.fastq.gz' #else: #set read1 = 'input_1.fastq' #end if ln -f -s '${input_params.left_fq}' ${read1} && #if $input_params.right_fq: #if $input_params.right_fq.is_of_type("fastq.gz"): #set read2 = 'input_2.fastq.gz' #else: #set read2 = 'input_2.fastq' #end if ln -f -s '${input_params.right_fq}' ${read2} && #end if #end if ## 3. Run STAR-Fusion STAR-Fusion #if str($input_params.input_source) == "use_chimeric": --chimeric_junction '${input_params.chimeric_junction}' #else: --left_fq ${read1} #if $input_params.right_fq: --right_fq ${read2} #end if #end if --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir" #if str($params.settingsType) == "full": --min_junction_reads $params.min_junction_reads --min_sum_frags $params.min_sum_frags --max_promiscuity $params.max_promiscuity --min_novel_junction_support $params.min_novel_junction_support --min_alt_pct_junction $params.min_alt_pct_junction --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist --E $params.E #end if ]]></command> <inputs> <conditional name="input_params"> <param name="input_source" type="select" label="Use output from earlier STAR run or let STAR Fusion control running STAR"> <option value="use_chimeric">Use output from earlier STAR</option> <option value="use_fastq">Let STAR Fusion control running STAR</option> </param> <when value="use_chimeric"> <param name="chimeric_junction" type="data" format="interval" argument="--chimeric_junction" label="Chimeric junction file from STAR (with STAR-Fusion settings)"/> </when> <when value="use_fastq"> <param name="left_fq" type="data" format="fastqsanger,fastqsanger.gz" argument="--left_fq" label="left.fq file"/> <param name="right_fq" type="data" format="fastqsanger,fastqsanger.gz" optional="true" argument="--right_fq" label="right.fq file (actually optional, but highly recommended)"/> </when> </conditional> <!-- Genome source. --> <conditional name="fasta_type"> <param name="fasta_type_selector" type="select" label="Source for sequence to search"> <option value="cached">Locally Cached sequences</option> <option value="history" selected="true">Sequences from your history</option> </param> <when value="cached"> <param name="ownFile" type="select" label="Genome to search"> <options from_data_table="all_fasta"> <column name="dbkey" index="1"/> <column name="name" index="2"/> <column name="value" index="3"/> </options> </param> </when> <when value="history"> <param name="ownFile" type="data" format="fasta" label="Select the reference genome (FASTA file)"/> </when> </conditional> <param name="geneModel" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/> <param name="blast_pairs" type="data" format="tabular" label="Result of BLAST+-blastn of the reference fasta sequence with itself" help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/> <conditional name="params"> <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter."> <option value="default" selected="true">Use Defaults</option> <option value="full">Full parameter list</option> </param> <when value="default" /> <when value="full"><!-- Full/advanced params. --> <param name="min_junction_reads" type="integer" value="1" label="minimum number of junction-spanning reads required." argument="--min_junction_reads"/> <param name="min_sum_frags" type="integer" value="2" label="minimum fusion support = (#junction_reads + #spanning_frags)" argument="--min_sum_frags"/> <param name="max_promiscuity" type="integer" value="3" label="maximum number of partners allowed for a given fusion" argument="--max_promiscuity"/> <param name="min_novel_junction_support" type="integer" value="3" label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions" argument="--min_novel_junction_support"/> <param name="min_alt_pct_junction" type="float" value="10" label="10% of the dominant isoform junction support" argument="--min_alt_pct_junction"/> <param name="aggregate_novel_junction_dist" type="integer" value="5" label="non-ref junctions within 5 are merged into single calls" argument="--aggregate_novel_junction_dist"/> <param name="E" type="float" value="0.001" label="E-value threshold for blast searches" argument="-E"/> </when> </conditional> </inputs> <outputs> <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_candidates.final" from_work_dir="star-fusion.fusion_candidates.final"/> </outputs> <tests> <test> <param name="input_source" value="use_chimeric" /> <param name="chimeric_junction" ftype="interval" value="test1.tabular" /> <param name="fasta_type_selector" value="history" /> <param name="ownFile" ftype="fasta" value="test1.fa" /> <param name="geneModel" ftype="gtf" value="test1.gtf" /> <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> <param name="settingsType" value="default" /> <!-- Last column of the results contains data in a random order so exact matching is not feasible --> <output name="output_final"> <assert_contents> <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> </assert_contents> </output> </test> <test> <param name="input_source" value="use_fastq" /> <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/> <param name="fasta_type_selector" value="history" /> <param name="ownFile" ftype="fasta" value="test1.fa" /> <param name="geneModel" ftype="gtf" value="test1.gtf" /> <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> <param name="settingsType" value="default" /> <!-- Last column of the results contains data in a random order so exact matching is not feasible --> <output name="output_final"> <assert_contents> <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> </assert_contents> </output> </test> <test> <param name="input_source" value="use_fastq" /> <param name="left_fq" ftype="fastqsanger.gz" value="test1.fastqsanger.gz"/> <param name="fasta_type_selector" value="history" /> <param name="ownFile" ftype="fasta" value="test1.fa" /> <param name="geneModel" ftype="gtf" value="test1.gtf" /> <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> <param name="settingsType" value="default" /> <!-- Last column of the results contains data in a random order so exact matching is not feasible --> <output name="output_final"> <assert_contents> <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> </assert_contents> </output> </test> </tests> <help> **What it does** STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. **Input: files required to run STAR-Fusion** - A genome reference sequence (FASTA-format) - A corresponding protein-coding gene annotation set (GTF/GFF Format) - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well. The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts. More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki </help> <citations> <citation type="bibtex"> @unpublished{star_fusion, author = {Brian Haas and Nicolas Stransky and Daniel Nicorici}, title = {STAR-Fusion}, url = {https://github.com/STAR-Fusion/STAR-Fusion} } </citation> </citations> </tool>