Next changeset 1:1fe9d667447c (2022-09-23) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit b12158e6cc9b1b2bd6e7522dfc183e9055575823 |
added:
arriba_get_filters.xml macros.xml static/images/draw-fusions-example.png test-data/Aligned.out.bam test-data/Aligned.out.bam.bai test-data/Aligned.out.sam test-data/cytobands.tsv test-data/fusions.tsv test-data/genome.fasta.gz test-data/genome.gtf.gz test-data/protein_domains.gff3 tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample |
b |
diff -r 000000000000 -r 125d20cb23d7 arriba_get_filters.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arriba_get_filters.xml Wed Jul 27 11:25:14 2022 +0000 |
[ |
@@ -0,0 +1,73 @@ +<tool id="arriba_get_filters" name="Arriba Get Filters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> + <description>to history</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + BASE_DIR=\$(dirname \$(dirname `which arriba`)) && + REF_SCRIPT=`find \$BASE_DIR -name 'download_references.sh'` && + REF_DIR=\$(dirname \$REF_SCRIPT) && + REF_NAME=${arriba_reference_name.split('+')[0].replace('viral','')} && + echo \$REF_NAME && + cp `find \$REF_DIR -name 'blacklist_*' | grep -i \$REF_NAME` '$blacklist' && + cp `find \$REF_DIR -name 'known_fusions_*' | grep -i \$REF_NAME` '$known_fusions' && + cp `find \$REF_DIR -name 'protein_domains_*' | grep -i \$REF_NAME` '$protein_domains' && + cp `find \$REF_DIR -name 'cytobands_*' | grep -i \$REF_NAME` '$cytobands' + #* + cp "\$REF_DIR/blacklist_*${arriba_reference_name}*" '$blacklist' && + cp "\$REF_DIR/known_fusions_*${arriba_reference_name}*" '$known_fusions' && + cp "\$REF_DIR/protein_domains_*${arriba_reference_name}*" '$protein_domains' && + cp "\$REF_DIR/cytobands_*${arriba_reference_name}*" '$cytobands' + *# + ]]></command> + <inputs> + <param name="arriba_reference_name" type="text" label="Select reference"> + <help>GRCh38 GRCh37 hg38 hg19 GRCm38 mm10</help> + <option value="GRCh38">GRCh38</option> + <option value="GRCh37">GRCh37</option> + <option value="hg38">hg38</option> + <option value="hg19">hg19</option> + <option value="GRCm39">GRCm39</option> + <option value="GRCm38">GRCm38</option> + <option value="mm39">mm39</option> + <option value="mm10">mm10</option> + </param> + </inputs> + <outputs> + <data name="blacklist" format="tabular.gz" label="${tool.name} ${arriba_reference_name} blacklist.tsv.gz"/> + <data name="known_fusions" format="tabular.gz" label="${tool.name} ${arriba_reference_name} known_fusions.tsv.gz"/> + <data name="protein_domains" format="gff3" label="${tool.name} ${arriba_reference_name} protein_domains.gff3"/> + <data name="cytobands" format="tabular" label="${tool.name} ${arriba_reference_name} cytobands.tsv"/> + </outputs> + <tests> + <test> + <param name="arriba_reference_name" value="GRCh38"/> + <output name="cytobands"> + <assert_contents> + <has_text_matching expression="1\t1\t\d+\tp36.33\tgneg"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**Arriba Get Filters** + +Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions. +It is based on chimeric alignments found by the STAR RNA-Seq aligner. + +The **Arriba Get Filters** tool adds the following Arriba distribution input_files_ to your galaxy history: + + - blacklist + - known_fusions + - protein_domains + - cytobands + + +.. _Arriba: https://arriba.readthedocs.io/en/latest/ +.. _input_files: https://arriba.readthedocs.io/en/latest/input-files/ + +]]></help> + <expand macro="citations" /> +</tool> |
b |
diff -r 000000000000 -r 125d20cb23d7 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jul 27 11:25:14 2022 +0000 |
[ |
b'@@ -0,0 +1,311 @@\n+<macros>\n+ <token name="@TOOL_VERSION@">2.3.0</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">arriba</requirement>\n+ <yield/>\n+ </requirements>\n+ </xml>\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1101/gr.257246.119</citation>\n+ <yield />\n+ </citations>\n+ </xml>\n+ <xml name="version_command">\n+ <version_command>arriba -h | grep Version | sed \'s/^.* //\'</version_command>\n+ </xml>\n+ <xml name="genome_source" token_assembly_optional="false" >\n+ <conditional name="genome">\n+ <param name="genome_source" type="select" label="Genome assembly fasta (that was used for STAR alignment)">\n+ <option value="history">From your history</option>\n+ <option value="cached">Use built-in Genome reference</option>\n+ </param>\n+ <when value="history">\n+ <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/>\n+ </when>\n+ <when value="cached">\n+ <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">\n+ <options from_data_table="all_fasta">\n+ <validator type="no_options" message="No reference genomes are available" />\n+ </options>\n+ </param>\n+ </when>\n+ </conditional>\n+ </xml>\n+ <xml name="gtf_source" token_assembly_optional="false" >\n+ <conditional name="genome_gtf">\n+ <param name="gtf_source" type="select" label="Genome GTF annotation source">\n+ <option value="history">From your history</option>\n+ <!-- <option value="cached">Use built-in Gtf annotation</option> -->\n+ </param>\n+ <when value="history">\n+ <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/>\n+ </when>\n+ </conditional>\n+ </xml>\n+\n+ <token name="@GENOME_SOURCE@"><![CDATA[\n+#if str($genome.genome_source) == "history"\n+ #if $genome.assembly\n+ #set $genome_assembly = \'genome.fa\'\n+ ln -sf \'$genome.assembly\' $genome_assembly &&\n+ #end if\n+#elif str($genome.genome_source) == "cached"\n+ #set $genome_assembly = $genome.ref_file.fields.fasta\n+#end if\n+ ]]></token>\n+ <token name="@GTF_SOURCE@"><![CDATA[\n+#if str($genome_gtf.gtf_source) == "history"\n+ #if $genome_gtf.annotation.is_of_type(\'gtf.gz\')\n+ #set $genome_annotation = \'genome.gtf.gz\'\n+ #else\n+ #set $genome_annotation = \'genome.gtf\'\n+ #end if\n+ ln -sf \'$genome_gtf.annotation\' $genome_annotation &&\n+#end if\n+ ]]></token>\n+\n+ <xml name="visualization_options">\n+ <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/>\n+ <section name="options" expanded="false" title="Draw Fusion Options">\n+ <param argument="--sampleName" type="text" value="" optional="true" label="Sample Name printed as the title on every page"/>\n+ <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection">\n+ <help>By default the transcript isoform with the highest coverage is drawn.\n+ Alternatively, the transcript isoform that is provided in the columns\n+ transcript_id1 and transcript_id2 in the given fusions file can be drawn.\n+ Selecting the isoform with the highest coverage usually produces nicer plots,\n+ in the sense that the coverage track is smooth and shows a visible increase in coverage '..b' </param>\n+ <param argument="--coverageRange" type="text" value="" optional="true" label="Maximum coverage for plot">\n+ <help>When the parameter --alignments is used, coverage plots are drawn above the transcripts of the fused genes. \n+ The plots can be cropped at a fixed level by passing a non-zero value to this parameter. \n+ When only a single value is given, both coverage plots (for gene1 and gene2) are cropped at the same level. \n+ When two comma-separated values are given, the cutoffs can be specified independently for the two plots. \n+ A value of 0 indicates that no cropping should be applied (i.e., the cutoff is set to the peak coverage) \n+ and that the coverage plots of both genes should be on the same scale. This is the default behavior. \n+ A value of 0,0 also indicates that no cropping should be applied, \n+ but the coverage plots of the two genes have different scales: \n+ each one is scaled individually to the peak coverage of the respective gene. \n+ Default: 0\n+ </help>\n+ <validator type="regex" message="">^\\d+(,\\d+)?$</validator>\n+ </param>\n+ </section>\n+ </xml>\n+ <token name="@DRAW_FUSIONS@">\n+draw_fusions.R\n+ --fusions=\'$fusions\'\n+ --alignments=\'Aligned.sortedByCoord.out.bam\'\n+ --annotation=\'$genome_gtf.annotation\'\n+ --output=fusions.pdf\n+ #if $visualization.cytobands\n+ --cytobands=\'$visualization.cytobands\'\n+ #end if\n+ #if $protein_domains\n+ --proteinDomains=\'$protein_domains\'\n+ #end if\n+ ## Visualization Options\n+ #if $visualization.options.transcriptSelection\n+ --transcriptSelection=$visualization.options.transcriptSelection\n+ #end if\n+ #if $visualization.options.minConfidenceForCircosPlot\n+ --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot\n+ #end if\n+ #if $visualization.options.squishIntrons\n+ --squishIntrons=$visualization.options.squishIntrons\n+ #if $visualization.options.squishIntrons == \'FALSE\' and $visualization.options.showIntergenicVicinity\n+ --showIntergenicVicinity=\'$visualization.options.showIntergenicVicinity\'\n+ #end if\n+ #end if\n+ #if $visualization.options.mergeDomainsOverlappingBy\n+ --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy\n+ #end if\n+ #if $visualization.options.sampleName\n+ --sampleName=\'$visualization.options.sampleName\'\n+ #end if\n+ #if $visualization.options.printExonLabels\n+ --printExonLabels=$visualization.options.printExonLabels\n+ #end if\n+ #if $visualization.options.coverageRange\n+ --coverageRange=\'$visualization.options.coverageRange\'\n+ #end if\n+ #if $visualization.options.render3dEffect\n+ --render3dEffect=$visualization.options.render3dEffect\n+ #end if\n+ #if $visualization.options.optimizeDomainColors\n+ --optimizeDomainColors=$visualization.options.optimizeDomainColors\n+ #end if\n+ #if $visualization.options.color1\n+ --color1=\'$visualization.options.color1\'\n+ #end if\n+ #if $visualization.options.color2\n+ --color2=\'$visualization.options.color2\'\n+ #end if\n+ #if str($visualization.options.pdfWidth)\n+ --pdfWidth=$visualization.options.pdfWidth\n+ #end if\n+ #if str($visualization.options.pdfHeight)\n+ --pdfHeight=$visualization.options.pdfHeight\n+ #end if\n+ # fontFamily\n+ #if $visualization.options.fontFamily\n+ --fontFamily=$visualization.options.fontFamily\n+ #end if\n+ #if str($visualization.options.fontSize)\n+ --fontSize=$visualization.options.fontSize\n+ #end if\n+</token>\n+</macros>\n' |
b |
diff -r 000000000000 -r 125d20cb23d7 static/images/draw-fusions-example.png |
b |
Binary file static/images/draw-fusions-example.png has changed |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/Aligned.out.bam |
b |
Binary file test-data/Aligned.out.bam has changed |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/Aligned.out.bam.bai |
b |
Binary file test-data/Aligned.out.bam.bai has changed |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/Aligned.out.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Aligned.out.sam Wed Jul 27 11:25:14 2022 +0000 |
b |
b'@@ -0,0 +1,89 @@\n+@HD\tVN:1.4\tSO:coordinate\n+@SQ\tSN:22\tLN:269079\n+@SQ\tSN:9\tLN:515509\n+@PG\tID:STAR\tPN:STAR\tVN:2.7.8a\tCL:STAR --runThreadN 12 --genomeDir tempstargenomedir --genomeLoad NoSharedMemory --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --limitBAMsortRAM 122880000000 --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --outSAMattributes NH HI AS nM ch --outSAMunmapped Within --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --outSAMattrIHstart 1 --winAnchorMultimapNmax 50 --chimSegmentMin 12 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1 --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --twopassMode Basic\n+@CO\tuser command line: STAR --runThreadN 12 --genomeLoad NoSharedMemory --genomeDir tempstargenomedir --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --twopassMode Basic --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --outSAMstrandField intronMotif --outSAMattrIHstart 1 --outSAMattributes NH HI AS nM ch --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outSAMunmapped Within --chimSegmentMin 12 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --winAnchorMultimapNmax 50 --limitBAMsortRAM 122880000000 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1\n+BCR-ABL1-46\t163\t22\t225687\t60\t71M2994N7M1344N72M\t=\t225737\t5255\tAACTGGAGGCAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGGACGCTTTGAACATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTG\tCCCGGGGGG=GGGJJJGGJJJGGJJJJCJJGGJJGCJGCGGGC8J8JGGJJJJJGJJC(JGCCG=GGJJGCCCGC8GCCGGGGGG=GGCGGG1GG=GC1G=CJCJJCCCGGCGG1CGG1GGGGGGGG=GGGGGCCGCGGG8GGGCGG=GG\tNH:i:1\tHI:i:1\tAS:i:285\tnM:i:1\tXS:A:+\tNM:i:1\n+BCR-ABL1-72\t163\t22\t225696\t60\t62M2994N7M1344N81M\t=\t228752\t5264\tCAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGCACGCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCA\tCCCCGGGGGGGGGGJGJCCCJ1GJJJJGCGGGCJJJ=C1JJGGJGG8JGC=CCGJ1JGG8GGGGGJCGJCCGGGCG=CGGGGGGCGG=GGCGGG=8CCGCGGJJJ=JGGGCGGGGGCCGCCGGGGGGGGC=CCGCG8GGGGGC1GGGGCC\tNH:i:1\tHI:i:1\tAS:i:290\tnM:i:1\tXS:A:+\tNM:i:1\n+BCR-ABL1-46\t83\t22\t225737\t60\t21M2994N7M1344N105M717N17M\t=\t225687\t-5255\tGCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGG\t=GGCGGGGGGG=GGGCCCGCCCGGGGGGGGGGCCGGGGCGG8CGCGGG1JGGCCGG(C=GCCCGGGGGGCGGGGGCGCGGCGGJCGGGJJGJGGGJJCGGGJJJGJJJJJJJGJJJJGGGJJJJJGGJJJJJGCJJJCGGGGGGGGGCCC\tNH:i:1\tHI:i:1\tAS:i:285\tnM:i:1\tXS:A:+\tNM:i:0\n+BCR-ABL1-72\t83\t22\t228752\t60\t3S7M1344N105M717N35M\t=\t225696\t-5264\tTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAAT\t=GGGGGG==GGGGCCCC=GGGGG=GGGGCGGGCGGGGGGG=CGGCCGCCJGGCGGGGG=GGG8GGGCGGC=G=CCJGGGGGGCGJJGJJCGGGGGGJJJGCJCCGJG=JJJGJGJJCJJJJGJJJJJJJ=GCJGJGCGGG=GGGGGGCC=\tNH:i:1\tHI:i:1\tAS:i:290\tnM:i:1\tXS:A:+\tNM:i:0\n+BCR-ABL1-4\t99\t22\t230111\t60\t97M717N53M\t=\t230176\t889\tAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCC\tC==GGGGGGGGGGJJJJ1JJJGGJJGGJGGJJGJJCJGJGJJCGGCJGCJJJJCGJGGGGJGGGGGGCCGG8JGGCGCGG=GGGGGGGGGGGGGG=GCCGJGGGCCGGGGGG1GGGGGGCGCGGCGGGGGG=GGGGGGGGGCCGCGGGCC\tNH:i:1\tHI:i:1\tAS:i:259\tnM:i:0\tch:A:1\tXS:A:+\tNM:i:0\n+BCR-ABL1-18\t99\t22\t230118\t60\t90M717N60M\t=\t230165\t882\tCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGTAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCAATCAGCCACTGGAT\tCCCGGGCGGGCGGJGJJJJJJJJJ='..b'GGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGCCAGAGGTCCATCTCGCT\tCCGGGCGCGCGGGCG=CCCGGCGCGGGGC=CGGCGGCCGCGGGJJJJCCGCCG(GCCCCCGGCCGGG=G8GGGGGGCC=C=CGGJGJJJGC=JGGJJJGJGJ1JJJGC=JJJG=JCJJJJJJJ=JJGGGJJJCGJJJGGGGGCGG=GCCC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-74\t77\t*\t0\t0\t*\t*\t0\t0\tTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTC\tCC11GGGGGGGGGGCCJJJGCGJJGJJJJJGGGGGGJJJGGJG==GCJCJ=GGJJGGJJGGCJGG=GGGGGJGGJGC=GC=GGGCGGGCGGGGCCGCGGGJCGC=GGC8CGCGCGGGGGGCGCC1GGCGCC=GCCGCGGC8GCGGGCCCG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n+BCR-ABL1-74\t141\t*\t0\t0\t*\t*\t0\t0\tCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAGGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG\tCCCGGGGGGCGCGJGGJJGGJGJJJGJGGJJGGJGJJ1=JCJJGGGJJJJGGGJGCCJGGJGG=J1JG8JGCGGGJG=GC1CGCCGGCG(GGCGGCGGGGGCJC1CCGC==CCGGGGCGGCGGGCCGGCGCGC8CCCCGGG=GGGC=GGG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n+BCR-ABL1-66\t77\t*\t0\t0\t*\t*\t0\t0\tTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAAACCCATAGAG\tCCC=GGGGCGGGGJJJJJGJJJJ=JJJGJJ1GJJGJJJJJGJJJJJGGGGCGJJGGGJJJGGCGGGGJGCGG1JCGGG=GCCGCG=GC=G=GCCGGGGG8JGGGGGGGGGGGG=GGCGGC8GGCCGGGC=GGGGGGGGG=CGG=8GGCCG\tNH:i:0\tHI:i:0\tAS:i:159\tnM:i:0\tuT:A:1\n+BCR-ABL1-66\t141\t*\t0\t0\t*\t*\t0\t0\tCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG\tCCCGGGGGGGGGGGGJ=JGJJJJJJJGGJJCCCJGJJ1JJJGCJGGGGJJJJ=GGGJGJGC(GGGGJGGGJG1=GGGGGGGG=G=C=GG8CC8GGGGGCCCCJCCCJGCG=GGCCGGCGGCGGCG==1GCCGGC1GGGGGCGGGGGGCGG\tNH:i:0\tHI:i:0\tAS:i:159\tnM:i:0\tuT:A:1\n+BCR-ABL1-58\t77\t*\t0\t0\t*\t*\t0\t0\tATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGG\tCCCGGCGGGGGGGGJJJJJGJJGJGJGJGJJJJJJJJJCJGJJJJGCG=8GGGJGJGGCGGJGCGJJJCJGGG=CGCCGGCCGGGCGCGGGCGCG1GGGCCCGGGGCG8GCCC=C8CGCGG=CCCGCCCCGGG=CCGGCGGGCGGGGGCG\tNH:i:0\tHI:i:0\tAS:i:185\tnM:i:3\tuT:A:1\n+BCR-ABL1-58\t141\t*\t0\t0\t*\t*\t0\t0\tTTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATTCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATG\tCCCGGGGGGGGGGJJJJJJGJGJJJGGJ=JJJJJJJJGC=GJJGGJJGJJGG1GCJGGGG=JGGG8C=GCCGC==GGGCGGGGGG=GGG=(G=CCGCCGGGGCJJJJGGGC8GCGCGCG8CGGCCGGGCGCGCGG8CCGG8CGGGGGGGG\tNH:i:0\tHI:i:0\tAS:i:185\tnM:i:3\tuT:A:1\n+BCR-ABL1-24\t77\t*\t0\t0\t*\t*\t0\t0\tCGCAGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGGCTGAGTGAAG\tCC11GCGGGGGGGJCGJGJJCCJJJJGJJJJGJJGGJJJCJJJG8JJJ1GJ=JGGGGJJJCG=8GGCGCCGGGCCGGGCGGGGCGGGGCCGCGGCCGGG=J1GCCC1(CCGGCGGGCCGCGGGCGGGGC=GGCGCCGCC1GCGGGGGCGG\tNH:i:0\tHI:i:0\tAS:i:154\tnM:i:3\tuT:A:1\n+BCR-ABL1-24\t141\t*\t0\t0\t*\t*\t0\t0\tTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAA\tC=CCGGGGGGGGCJ1GGJJJJ1JJJJJGJJ=GJJG8GGJ=GJGJJGJJGGGCGJGCGGGCGGG8GG=GJJGCG1GCGGJGCCGGCGGGCCGGGCG8GGGGG8C1==CGGCCCGCGGGGC8GCGGG8GGGCGCCGCCGCGGGCGGGGGGCG\tNH:i:0\tHI:i:0\tAS:i:154\tnM:i:3\tuT:A:1\n+BCR-ABL1-10\t77\t*\t0\t0\t*\t*\t0\t0\tAGGTTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACG\tCC=GGGGGGGGGG1GJJJJJCJJJJJJJJJJJGJ=GJJJGCJJJJCJGJGCJGJJJGGJJJGGCCGGJGC=GGJ1C8GGGGGGCGCCGGGGGGCGGGCGCCCG1GGCGCGCGGGCC8GCGCGCGC8CCCGCGCGGGGGCGGGGGCGGCGG\tNH:i:0\tHI:i:0\tAS:i:181\tnM:i:2\tuT:A:1\n+BCR-ABL1-10\t141\t*\t0\t0\t*\t*\t0\t0\tATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGA\t1CCGGCGGGGGG1GGJJJGCC1JJJJCCG=JGGJJGJJJ=GGGGGJJGGGGGGC1J=CJGCGGGGCGC(CGGGGG=GGGGG(G=CGGCGGGGCCCGC=CCCCJJCC8G1GGGGCGGGGGGCGCGGGGGGGCG=GGCCGCCGCC1G=GGGG\tNH:i:0\tHI:i:0\tAS:i:181\tnM:i:2\tuT:A:1\n' |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/cytobands.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cytobands.tsv Wed Jul 27 11:25:14 2022 +0000 |
b |
@@ -0,0 +1,5 @@ +contig start end name giemsa +22 1 40586 q11.22 gpos25 +22 40586 269079 q11.23 gneg +9 1 21036 q34.11 gneg +9 21036 515509 q34.12 gpos25 |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/fusions.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fusions.tsv Wed Jul 27 11:25:14 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +#gene1 gene2 strand1(gene/fusion) strand2(gene/fusion) breakpoint1 breakpoint2 site1 site2 type split_reads1 split_reads2 discordant_mates coverage1 coverage2 confidence reading_frame tags retained_protein_domains closest_genomic_breakpoint1 closest_genomic_breakpoint2 gene_id1 gene_id2 transcript_id1 transcript_id2 direction1 direction2 filters fusion_transcript peptide_sequence read_identifiers +BCR ABL1 +/+ +/+ 22:230999 9:275100 CDS/splice-site CDS/splice-site translocation 1 3 0 3 8 low in-frame . Bcr-Abl_oncoprotein_oligomerisation_domain(100%),C2_domain(100%),RhoGEF_domain(100%)|F-actin_binding(100%),Protein_kinase_domain(100%),SH2_domain(100%),SH3_domain(100%) . . ENSG00000186716 ENSG00000097007 ENST00000305877 ENST00000372348 downstream upstream . AGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAG___ATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAA|AAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAG___GTGAAAAGCTCCGGG SFSLTSVELQMLTNSCVKLQTVHSIPLTINKEDDESPGLYGFLNVIVHSATGFKQSS|kALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLR BCR-ABL1-4,BCR-ABL1-28,BCR-ABL1-60,BCR-ABL1-76 |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/genome.fasta.gz |
b |
Binary file test-data/genome.fasta.gz has changed |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/genome.gtf.gz |
b |
Binary file test-data/genome.gtf.gz has changed |
b |
diff -r 000000000000 -r 125d20cb23d7 test-data/protein_domains.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/protein_domains.gff3 Wed Jul 27 11:25:14 2022 +0000 |
b |
b'@@ -0,0 +1,83 @@\n+9\tpfam\tprotein_domain\t33502\t33541\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t33992\t34063\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t35324\t35381\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t37391\t37409\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t37479\t37553\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t38833\t38931\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t41390\t41413\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t41489\t41494\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t43744\t43846\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t44647\t44729\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t47496\t47541\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t51664\t51812\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t102331\t102396\t0\t+\t.\tName=Zinc finger%2C C2H2 type;color=#80FF00;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF00096\n+9\tpfam\tprotein_domain\t102412\t102480\t0\t+\t.\tName=C2H2-type zinc finger;color=#80FF80;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF13894\n+9\tpfam\tprotein_domain\t114903\t114949\t0\t+\t.\tName=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382\n+9\tpfam\tprotein_domain\t116528\t116596\t0\t+\t.\tName=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382\n+9\tpfam\tprotein_domain\t121951\t121971\t0\t+\t.\tName=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985\n+9\tpfam\tprotein_domain\t123179\t123300\t0\t+\t.\tName=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985\n+9\tpfam\tprotein_domain\t275219\t275273\t0\t+\t.\tName=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018\n+9\tpfam\tprotein_domain\t275837\t275922\t0\t+\t.\tName=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018\n+9\tpfam\tprotein_domain\t275962\t276132\t0\t+\t.\tName=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017\n+9\tpfam\tprotein_domain\t283799\t283855\t0\t+\t.\tName=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017\n+9\tpfam\tprotein_domain\t283973\t284071\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t293165\t293249\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t293896\t294073\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t295904\t296088\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t299451\t299603\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t301104\t301156\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;ge'..b'd=PF00053\n+9\tpfam\tprotein_domain\t489945\t490067\t0\t+\t.\tName=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053\n+9\tpfam\tprotein_domain\t490710\t490856\t0\t+\t.\tName=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053\n+22\tpfam\tprotein_domain\t2420\t2524\t0\t-\t.\tName=Armadillo/beta-catenin-like repeat;color=#000080;gene_id=ENSG00000100218;gene_name=RSPH14;protein_domain_id=PF00514\n+22\tpfam\tprotein_domain\t36321\t37004\t0\t+\t.\tName=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503\n+22\tpfam\tprotein_domain\t63673\t63981\t0\t+\t.\tName=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503\n+22\tpfam\tprotein_domain\t90736\t90740\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t93060\t93112\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t93619\t93720\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t96554\t96622\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t98578\t98629\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t99484\t99565\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t99749\t99839\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t101465\t101502\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t121553\t121771\t0\t+\t.\tName=Bcr-Abl oncoprotein oligomerisation domain;color=#FF0000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF09036\n+22\tpfam\tprotein_domain\t201581\t201640\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t201941\t202126\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t208994\t209101\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t212118\t212178\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t213667\t213719\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t214220\t214312\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t230954\t230999\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t233127\t233224\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t235610\t235741\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t250010\t250018\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t252302\t252422\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n+22\tpfam\tprotein_domain\t253473\t253607\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n+22\tpfam\tprotein_domain\t254554\t254659\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n+22\tpfam\tprotein_domain\t255138\t255228\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n' |
b |
diff -r 000000000000 -r 125d20cb23d7 tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed Jul 27 11:25:14 2022 +0000 |
b |
@@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# \ No newline at end of file |
b |
diff -r 000000000000 -r 125d20cb23d7 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jul 27 11:25:14 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> |