Galaxy |

Changeset 0:efddb7a0b3db (2011-09-16)

Next changeset 1:36306d8086fa (2011-09-16)

Commit message:
Uploaded

added:
defuse/README
defuse/defuse-0.4.2.tar.gz
defuse/defuse.xml
defuse/modified_scripts.tgz
defuse/tool-data/defuse.loc.sample

diff -r 000000000000 -r efddb7a0b3db defuse/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse/README Fri Sep 16 13:07:35 2011 -0400

@@ -0,0 +1,33 @@
+The DeFuse galaxy tool is based on DeFuse_Version_0.4.2
+ http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
+
+DeFuse is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.
+
+
+Manual:
+ http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
+
+
+The included defuse source code is from: http://sourceforge.net/projects/defuse/files/defuse/0.4/defuse-0.4.2.tar.gz/download
+(without the defuse-0.4.2 dir level)
+tar zxf defuse-0.4.2.tar.gz
+cd tool
+make
+cd ..
+
+To use with non human genome references:
+tar zxf modified_scripts.tgz
+Defuse source was modified to include 2 extra parameters for non human references: gene_id_pattern and transcript_id_pattern
+ scripts/alignjob.pl
+ scripts/annotate_fusions.pl
+ scripts/calculate_expression_simple.pl
+ scripts/filter_bulk_fusion_reads.pl
+ scripts/filter_sam_genes.pl
+ scripts/find_concordant_ensembl.pl
+ scripts/find_gene_clusters.pl
+
+
+The defuse.xml galaxy tool wrapper will generate a defuse config.txt using values from tool-data/defuse.loc
+and call scripts/defuse.pl
+
+

diff -r 000000000000 -r efddb7a0b3db defuse/defuse-0.4.2.tar.gz

Binary file defuse/defuse-0.4.2.tar.gz has changed

diff -r 000000000000 -r efddb7a0b3db defuse/defuse.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse/defuse.xml Fri Sep 16 13:07:35 2011 -0400

b'@@ -0,0 +1,637 @@\n+<tool id="defuse" name="DeFuse" version="1.0">\n+ <description>identify fusion transcripts</description>\n+ <requirements>\n+ <requirement type="binary"></requirement>\n+ </requirements>\n+ <command interpreter="perl">\n+ scripts/defuse.pl\n+ -c `cp $defuse_config $config_txt; echo $defuse_config`\n+ -d `mkdir -p data_dir; ln -s $left_pairendreads data_dir/reads_1.fastq; ln -s $right_pairendreads data_dir/reads_2.fastq; echo data_dir`\n+ -o output_dir -p 8\n+ </command>\n+ <inputs>\n+ <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/>\n+ <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>\n+ <conditional name="refGenomeSource">\n+ <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">\n+ <option value="indexed">Use a built-in DeFuse Reference Dataset</option>\n+ <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>\n+ </param>\n+ <when value="indexed">\n+ <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">\n+ <options from_file="defuse.loc">\n+ <column name="name" index="1"/>\n+ <column name="value" index="2"/>\n+ <filter type="sort_by" column="0" />\n+ <validator type="no_options" message="No indexes are available" />\n+ </options>\n+ </param>\n+ <conditional name="defuse_param">\n+ <param name="settings" type="select" label="Defuse parameter settings" help="">\n+ <option value="preSet">Default settings</option>\n+ <option value="full">Full parameter list</option>\n+ </param>\n+ <when value="preSet" />\n+ <when value="full">\n+ <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />\n+ <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />\n+ <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />\n+ <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">\n+ <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>\n+ </param>\n+ <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />\n+ <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" />\n+ <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">\n+ <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>\n+ </param>\n+ <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" />\n+ <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" />\n+ <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />\n+ <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio">\n+ <validator type="in_range" message="Choose a val'..b'um_splice_variants : number of potential splice variants for this gene pair\n+ - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2\n+ - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1\n+\n+\n+**Example**\n+\n+results.tsv::\n+\n+ cluster_id\tsplitr_sequence\tsplitr_count\tsplitr_span_pvalue\tsplitr_pos_pvalue\tsplitr_min_pvalue\tadjacent\taltsplice\tbreak_adj_entropy1\tbreak_adj_entropy2\tbreak_adj_entropy_min\tbreak_predict\tbreakpoint_homology\tbreakseqs_estislands_percident\tcdna_breakseqs_percident\tconcordant_ratio\tdeletion\test_breakseqs_percident\teversion\texonboundaries\texpression1\texpression2\tgene1\tgene2\tgene_align_strand1\tgene_align_strand2\tgene_chromosome1\tgene_chromosome2\tgene_end1\tgene_end2\tgene_location1\tgene_location2\tgene_name1\tgene_name2\tgene_start1\tgene_start2\tgene_strand1\tgene_strand2\tgenome_breakseqs_percident\tgenomic_break_pos1\tgenomic_break_pos2\tgenomic_strand1\tgenomic_strand2\tinterchromosomal\tinterrupted_index1\tinterrupted_index2\tinversion\tlibrary_name\tmax_map_count\tmax_repeat_proportion\tmean_map_count\tmin_map_count\tnum_multi_map\tnum_splice_variants\torf\tread_through\trepeat_proportion1\trepeat_proportion2\tspan_count\tspan_coverage1\tspan_coverage2\tspan_coverage_max\tspan_coverage_min\tsplice_score\tsplicing_index1\tsplicing_index2\t\n+ 1169\tGCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT\t2\t0.000436307890680442\t0.110748295953850\t0.0880671602973091\tN\tY\t3.19872427442695\t3.48337348351473\t3.19872427442695\tsplitr\t0\t0\t0\t0\tY\t0\tN\tN\t0\t0\tENSG00000105549\tENSG00000213753\t+\t-\t19\t19\t376013\t59111168\tintron\tupstream\tTHEG\tAC016629.2\t361750\t59084870\t-\t+\t0\t375099\t386594\t+\t-\tN\t8.34107429512245\t-\tN\toutput_dir\t82\t0.677852348993289\t40.6666666666667\t1\t11\t1\tN\tN\t0.361271676300578\t0.677852348993289\t12\t0.758602776578432\t0.569678713445872\t0.758602776578432\t0.569678713445872\t2\t0.416666666666667\t-\t\n+ 3596\tTGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG\t250\t7.00711162298275e-72\t0.00912124762512338\t0.00684237452309549\tN\tN\t3.31745197152461\t3.47233119514066\t3.31745197152461\tsplitr\t7\t0.0157657657657656\t0\t0\tN\t0.0135135135135136\tN\tN\t0\t0\tENSG00000156860\tENSG00000212932\t-\t+\t16\t21\t30682131\t48111157\tcoding\tupstream\tFBRS\tRPL23AP4\t30670289\t48110676\t+\t+\t0.0157657657657656\t30680678\t9827473\t-\t+\tY\t-\t-\tN\toutput_dir\t2\t1\t1.11111111111111\t1\t1\t1\tN\tN\t0\t1\t9\t0.325530693397641\t0.296465452915709\t0.325530693397641\t0.296465452915709\t2\t-\t-\t\n+\n+ </help>\n+</tool>\n'

diff -r 000000000000 -r efddb7a0b3db defuse/modified_scripts.tgz

Binary file defuse/modified_scripts.tgz has changed

diff -r 000000000000 -r efddb7a0b3db defuse/tool-data/defuse.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse/tool-data/defuse.loc.sample Fri Sep 16 13:07:35 2011 -0400

@@ -0,0 +1,10 @@
+## Configurstion info for prepared data references for DeFuse
+## http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
+## 3 columns separated by the TAB character
+## The 3rd column has dictionary values that will be substituted in the config file for defuse
+## It should likely contain keys: dataset_directory gene_models genome_fasta repeats_filename est_fasta est_alignments unigene_fasta
+## If this is not a Homo_sapiens reference also need keys: gene_id_pattern transcript_id_pattern chromosomes
+
+#db_key name {'config_key':'config_value'}
+hg19 GRCh37(hg19) {'samtools_bin':'/soft/samtools/0.1.12a/bin/samtools', 'gene_id_pattern':'ENSG\d+', 'transcript_id_pattern':'ENST\d+', 'dataset_directory':'/project/db/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
+mm9 NCBIM37(mm9) {'samtools_bin':'/soft/samtools/0.1.12a/bin/samtools', 'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/project/db/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}