Next changeset 1:4f1efcc055d5 (2022-09-23) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit b12158e6cc9b1b2bd6e7522dfc183e9055575823 |
added:
arriba.xml macros.xml static/images/draw-fusions-example.png test-data/Aligned.out.bam test-data/Aligned.out.bam.bai test-data/Aligned.out.sam test-data/cytobands.tsv test-data/fusions.tsv test-data/genome.fasta.gz test-data/genome.gtf.gz test-data/protein_domains.gff3 tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample |
b |
diff -r 000000000000 -r a24ca22b906e arriba.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arriba.xml Wed Jul 27 11:24:44 2022 +0000 |
[ |
b'@@ -0,0 +1,757 @@\n+<tool id="arriba" name="Arriba" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">\n+ <description>detect gene fusions from STAR aligned RNA-Seq data</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ <xml name="fusion_actions">\n+ <actions>\n+ <action name="comment_lines" type="metadata" default="1" />\n+ <action name="column_names" type="metadata" default="gene1,gene2,strand1(gene/fusion),strand2(gene/fusion),breakpoint1,breakpoint2,site1,site2,type,split_reads1,split_reads2,discordant_mates,coverage1,coverage2,confidence,reading_frame,tags,retained_protein_domains,closest_genomic_breakpoint1,closest_genomic_breakpoint2,gene_id1,gene_id2,transcript_id1,transcript_id2,direction1,direction2,filters,fusion_transcript,peptide_sequence,read_identifiers" />\n+ </actions>\n+ </xml>\n+ <token name="@VIRAL_CONTIG@">([AN]C_([*]|\\d+))</token>\n+ <token name="@GENOME_CONTIG@">((chr)?(\\d|\\d\\d|X|Y|M(t)?))</token>\n+ </macros>\n+ <expand macro="requirements" />\n+ <expand macro="version_command" />\n+ <command detect_errors="exit_code"><![CDATA[\n+@GENOME_SOURCE@\n+@GTF_SOURCE@\n+#set $filter_list = []\n+#if $options.filters\n+ #set $filter_list = $options.filters.split(\',\')\n+#end if\n+#if $blacklist\n+ #if $blacklist.is_of_type(\'tabular.gz\')\n+ #set $blacklist_file = \'blacklist.tsv.gz\'\n+ ln -sf \'$blacklist\' $blacklist_file &&\n+ #else\n+ #set $blacklist_file = $blacklist\n+ #end if\n+#else\n+ #if \'blacklist\' not in $filter_list\n+ #silent $filter_list.append(\'blacklist\')\n+ #end if\n+#end if\n+#set $filters = \',\'.join($filter_list) \n+#if $known_fusions\n+ #if $known_fusions.is_of_type(\'tabular.gz\')\n+ #set $known_fusions_file = \'known_fusions.tsv.gz\'\n+ ln -sf \'$known_fusions\' $known_fusions_file &&\n+ #else\n+ #set $known_fusions_file = $known_fusions\n+ #end if\n+#end if\n+#if $tags\n+ #if $tags.is_of_type(\'tabular.gz\')\n+ #set $tags_file = \'tags.tsv.gz\'\n+ ln -sf \'$tags\' $tags_file &&\n+ #else\n+ #set $tags_file = $tags\n+ #end if\n+#end if\n+ arriba \n+ -x \'$input\'\n+ #if $chimeric\n+ -c \'$chimeric\'\n+ #end if\n+ -a \'$genome_assembly\'\n+ -g \'$genome_annotation\'\n+ #if $blacklist\n+ -b \'$blacklist_file\'\n+ #end if\n+ #if $filters\n+ -f \'$filters\'\n+ #end if\n+ #if $protein_domains\n+ -p \'$protein_domains\'\n+ #end if\n+ #if $known_fusions\n+ -k \'$known_fusions_file\'\n+ #end if\n+ #if $tags\n+ -t \'$tags_file\'\n+ #end if\n+ #if str($wgs.use_wgs) == "yes"\n+ -d \'$wgs.wgs\'\n+ #if str($wgs.max_genomic_breakpoint_distance)\n+ -D $wgs.max_genomic_breakpoint_distance\n+ #end if\n+ #end if\n+ -o fusions.tsv\n+#if $output_fusions_discarded\n+ -O fusions.discarded.tsv \n+#end if\n+## Arriba options\n+ #if $options.gtf_features\n+ -G \'$options.gtf_features\'\n+ #end if\n+ #if $options.strandedness\n+ -s $options.strandedness\n+ #end if\n+ #if $options.genome_contigs\n+ -i \'$options.genome_contigs\'\n+ #end if\n+ #if $options.viral_contigs\n+ -v \'$options.viral_contigs\'\n+ #end if\n+ #if str($options.max_evalue)\n+ -E $options.max_evalue\n+ #end if\n+ #if str($options.min_supporting_reads)\n+ -S $options.min_supporting_reads\n+ #end if\n+ #if str($options.max_mismappers)\n+ -m $options.max_mismappers\n+ #end if\n+ #if str($options.max_homolog_identity)\n+ -L $options.max_homolog_identity\n+ #end if\n+ #if str($options.homopolymer_length)\n+ -H $options.homopolymer_length\n+ #end if\n+ #if str($options.read_through_distance)\n+ -R $options.read_through_distance\n+ #end if\n+ #if str($options.min_anchor_length)\n+ -A $options.min_anchor_length\n+ #end if\n+ #if str($options.many_spliced_events)\n+ -M $options.many_spl'..b'mbiguous positions, such as positions with diverse reference mismatches, are represented as ?. Missing information due to insufficient coverage is denoted as an ellipsis (...). If the switch -I is used, then an attempt is made to fill missing information with the assembly sequence. A sequence stretch that was taken from the assembly sequence rather than the supporting reads is wrapped in parentheses (( and )). In addition, when -I is used, the sequence is trimmed to the boundaries of the fused transcripts. The coordinate of the fusion breakpoint relative to the start of the transcript can thus easily be inferred by counting the bases from the beginning of the fusion transcript to the breakpoint character (|). In case the full sequence could be constructed from the combined information of supporting reads and assembly sequence, the start of the fusion transcript is marked by a caret sign (^) and the end by a dollar sign ($). If the full sequence could not be constructed, these signs are missing.\n+\n+ * peptide_sequence : This column contains the fusion peptide sequence. The sequence is translated from the fusion transcript given in the column fusion_transcript and determines the reading frame of the fused genes according to the transcript isoforms given in the columns transcript_id1 and transcript_id2. Translation starts at the start of the assembled fusion transcript or when the start codon is encountered in the 5\' gene. Translation ends when either the end of the assembled fusion transcript is reached or when a stop codon is encountered. If the fusion transcript contains an ellipsis (...), the sequence beyond the ellipsis is trimmed before translation, because the reading frame cannot be determined reliably. The column contains a dot (.), when the transcript sequence could not be predicted or when the precise breakpoints are unknown due to lack of split reads or when the fusion transcript does not overlap any coding exons in the 5\' gene or when no start codon could be found in the 5\' gene or when there is a stop codon prior to the fusion junction (in which case the column reading_frame contains the value stop-codon). The breakpoint is represented as a pipe symbol (|). If a codon spans the breakpoint, the amino acid is placed on the side of the breakpoint where two of the three bases reside. Codons resulting from non-template bases are flanked by two pipes. Amino acids are written as lowercase characters in the following situations: non-silent SNVs/SNPs, insertions, frameshifts, codons spanning the breakpoint, non-coding regions (introns/intergenic regions/UTRs), and non-template bases. Codons which cannot be translated to amino acids, such as those having invalid characters, are represented as ?.\n+\n+ * read_identifiers : This column contains the names of the supporting reads separated by commas.\n+\n+ - fusions.discarded.tsv\n+\n+ The file fusions.discarded.tsv (as specified by the parameter -O) contains all events that Arriba classified as an artifact or that are also observed in healthy tissue. It has the same format as the file fusions.tsv. \n+\n+\n+**VISUALIZATION**\n+\n+See: https://arriba.readthedocs.io/en/latest/visualization/\n+\n+ - fusions.pdf\n+\n+ A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint.\n+\n+.. image:: draw-fusions-example.png\n+ :width: 800\n+ :height: 467\n+\n+\n+.. _Arriba: https://arriba.readthedocs.io/en/latest/\n+.. _INPUTS: https://arriba.readthedocs.io/en/latest/input-files/\n+.. _OUTPUTS: https://arriba.readthedocs.io/en/latest/output-files/\n+.. _VISUALIZATION: https://arriba.readthedocs.io/en/latest/visualization/\n+.. _OPTIONS: https://arriba.readthedocs.io/en/latest/command-line-options/\n+\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n' |
b |
diff -r 000000000000 -r a24ca22b906e macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jul 27 11:24:44 2022 +0000 |
[ |
b'@@ -0,0 +1,311 @@\n+<macros>\n+ <token name="@TOOL_VERSION@">2.3.0</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">arriba</requirement>\n+ <yield/>\n+ </requirements>\n+ </xml>\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1101/gr.257246.119</citation>\n+ <yield />\n+ </citations>\n+ </xml>\n+ <xml name="version_command">\n+ <version_command>arriba -h | grep Version | sed \'s/^.* //\'</version_command>\n+ </xml>\n+ <xml name="genome_source" token_assembly_optional="false" >\n+ <conditional name="genome">\n+ <param name="genome_source" type="select" label="Genome assembly fasta (that was used for STAR alignment)">\n+ <option value="history">From your history</option>\n+ <option value="cached">Use built-in Genome reference</option>\n+ </param>\n+ <when value="history">\n+ <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/>\n+ </when>\n+ <when value="cached">\n+ <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">\n+ <options from_data_table="all_fasta">\n+ <validator type="no_options" message="No reference genomes are available" />\n+ </options>\n+ </param>\n+ </when>\n+ </conditional>\n+ </xml>\n+ <xml name="gtf_source" token_assembly_optional="false" >\n+ <conditional name="genome_gtf">\n+ <param name="gtf_source" type="select" label="Genome GTF annotation source">\n+ <option value="history">From your history</option>\n+ <!-- <option value="cached">Use built-in Gtf annotation</option> -->\n+ </param>\n+ <when value="history">\n+ <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/>\n+ </when>\n+ </conditional>\n+ </xml>\n+\n+ <token name="@GENOME_SOURCE@"><![CDATA[\n+#if str($genome.genome_source) == "history"\n+ #if $genome.assembly\n+ #set $genome_assembly = \'genome.fa\'\n+ ln -sf \'$genome.assembly\' $genome_assembly &&\n+ #end if\n+#elif str($genome.genome_source) == "cached"\n+ #set $genome_assembly = $genome.ref_file.fields.fasta\n+#end if\n+ ]]></token>\n+ <token name="@GTF_SOURCE@"><![CDATA[\n+#if str($genome_gtf.gtf_source) == "history"\n+ #if $genome_gtf.annotation.is_of_type(\'gtf.gz\')\n+ #set $genome_annotation = \'genome.gtf.gz\'\n+ #else\n+ #set $genome_annotation = \'genome.gtf\'\n+ #end if\n+ ln -sf \'$genome_gtf.annotation\' $genome_annotation &&\n+#end if\n+ ]]></token>\n+\n+ <xml name="visualization_options">\n+ <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/>\n+ <section name="options" expanded="false" title="Draw Fusion Options">\n+ <param argument="--sampleName" type="text" value="" optional="true" label="Sample Name printed as the title on every page"/>\n+ <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection">\n+ <help>By default the transcript isoform with the highest coverage is drawn.\n+ Alternatively, the transcript isoform that is provided in the columns\n+ transcript_id1 and transcript_id2 in the given fusions file can be drawn.\n+ Selecting the isoform with the highest coverage usually produces nicer plots,\n+ in the sense that the coverage track is smooth and shows a visible increase in coverage '..b' </param>\n+ <param argument="--coverageRange" type="text" value="" optional="true" label="Maximum coverage for plot">\n+ <help>When the parameter --alignments is used, coverage plots are drawn above the transcripts of the fused genes. \n+ The plots can be cropped at a fixed level by passing a non-zero value to this parameter. \n+ When only a single value is given, both coverage plots (for gene1 and gene2) are cropped at the same level. \n+ When two comma-separated values are given, the cutoffs can be specified independently for the two plots. \n+ A value of 0 indicates that no cropping should be applied (i.e., the cutoff is set to the peak coverage) \n+ and that the coverage plots of both genes should be on the same scale. This is the default behavior. \n+ A value of 0,0 also indicates that no cropping should be applied, \n+ but the coverage plots of the two genes have different scales: \n+ each one is scaled individually to the peak coverage of the respective gene. \n+ Default: 0\n+ </help>\n+ <validator type="regex" message="">^\\d+(,\\d+)?$</validator>\n+ </param>\n+ </section>\n+ </xml>\n+ <token name="@DRAW_FUSIONS@">\n+draw_fusions.R\n+ --fusions=\'$fusions\'\n+ --alignments=\'Aligned.sortedByCoord.out.bam\'\n+ --annotation=\'$genome_gtf.annotation\'\n+ --output=fusions.pdf\n+ #if $visualization.cytobands\n+ --cytobands=\'$visualization.cytobands\'\n+ #end if\n+ #if $protein_domains\n+ --proteinDomains=\'$protein_domains\'\n+ #end if\n+ ## Visualization Options\n+ #if $visualization.options.transcriptSelection\n+ --transcriptSelection=$visualization.options.transcriptSelection\n+ #end if\n+ #if $visualization.options.minConfidenceForCircosPlot\n+ --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot\n+ #end if\n+ #if $visualization.options.squishIntrons\n+ --squishIntrons=$visualization.options.squishIntrons\n+ #if $visualization.options.squishIntrons == \'FALSE\' and $visualization.options.showIntergenicVicinity\n+ --showIntergenicVicinity=\'$visualization.options.showIntergenicVicinity\'\n+ #end if\n+ #end if\n+ #if $visualization.options.mergeDomainsOverlappingBy\n+ --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy\n+ #end if\n+ #if $visualization.options.sampleName\n+ --sampleName=\'$visualization.options.sampleName\'\n+ #end if\n+ #if $visualization.options.printExonLabels\n+ --printExonLabels=$visualization.options.printExonLabels\n+ #end if\n+ #if $visualization.options.coverageRange\n+ --coverageRange=\'$visualization.options.coverageRange\'\n+ #end if\n+ #if $visualization.options.render3dEffect\n+ --render3dEffect=$visualization.options.render3dEffect\n+ #end if\n+ #if $visualization.options.optimizeDomainColors\n+ --optimizeDomainColors=$visualization.options.optimizeDomainColors\n+ #end if\n+ #if $visualization.options.color1\n+ --color1=\'$visualization.options.color1\'\n+ #end if\n+ #if $visualization.options.color2\n+ --color2=\'$visualization.options.color2\'\n+ #end if\n+ #if str($visualization.options.pdfWidth)\n+ --pdfWidth=$visualization.options.pdfWidth\n+ #end if\n+ #if str($visualization.options.pdfHeight)\n+ --pdfHeight=$visualization.options.pdfHeight\n+ #end if\n+ # fontFamily\n+ #if $visualization.options.fontFamily\n+ --fontFamily=$visualization.options.fontFamily\n+ #end if\n+ #if str($visualization.options.fontSize)\n+ --fontSize=$visualization.options.fontSize\n+ #end if\n+</token>\n+</macros>\n' |
b |
diff -r 000000000000 -r a24ca22b906e static/images/draw-fusions-example.png |
b |
Binary file static/images/draw-fusions-example.png has changed |
b |
diff -r 000000000000 -r a24ca22b906e test-data/Aligned.out.bam |
b |
Binary file test-data/Aligned.out.bam has changed |
b |
diff -r 000000000000 -r a24ca22b906e test-data/Aligned.out.bam.bai |
b |
Binary file test-data/Aligned.out.bam.bai has changed |
b |
diff -r 000000000000 -r a24ca22b906e test-data/Aligned.out.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Aligned.out.sam Wed Jul 27 11:24:44 2022 +0000 |
b |
b'@@ -0,0 +1,89 @@\n+@HD\tVN:1.4\tSO:coordinate\n+@SQ\tSN:22\tLN:269079\n+@SQ\tSN:9\tLN:515509\n+@PG\tID:STAR\tPN:STAR\tVN:2.7.8a\tCL:STAR --runThreadN 12 --genomeDir tempstargenomedir --genomeLoad NoSharedMemory --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --limitBAMsortRAM 122880000000 --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --outSAMattributes NH HI AS nM ch --outSAMunmapped Within --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --outSAMattrIHstart 1 --winAnchorMultimapNmax 50 --chimSegmentMin 12 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1 --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --twopassMode Basic\n+@CO\tuser command line: STAR --runThreadN 12 --genomeLoad NoSharedMemory --genomeDir tempstargenomedir --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --twopassMode Basic --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --outSAMstrandField intronMotif --outSAMattrIHstart 1 --outSAMattributes NH HI AS nM ch --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outSAMunmapped Within --chimSegmentMin 12 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --winAnchorMultimapNmax 50 --limitBAMsortRAM 122880000000 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1\n+BCR-ABL1-46\t163\t22\t225687\t60\t71M2994N7M1344N72M\t=\t225737\t5255\tAACTGGAGGCAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGGACGCTTTGAACATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTG\tCCCGGGGGG=GGGJJJGGJJJGGJJJJCJJGGJJGCJGCGGGC8J8JGGJJJJJGJJC(JGCCG=GGJJGCCCGC8GCCGGGGGG=GGCGGG1GG=GC1G=CJCJJCCCGGCGG1CGG1GGGGGGGG=GGGGGCCGCGGG8GGGCGG=GG\tNH:i:1\tHI:i:1\tAS:i:285\tnM:i:1\tXS:A:+\tNM:i:1\n+BCR-ABL1-72\t163\t22\t225696\t60\t62M2994N7M1344N81M\t=\t228752\t5264\tCAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGCACGCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCA\tCCCCGGGGGGGGGGJGJCCCJ1GJJJJGCGGGCJJJ=C1JJGGJGG8JGC=CCGJ1JGG8GGGGGJCGJCCGGGCG=CGGGGGGCGG=GGCGGG=8CCGCGGJJJ=JGGGCGGGGGCCGCCGGGGGGGGC=CCGCG8GGGGGC1GGGGCC\tNH:i:1\tHI:i:1\tAS:i:290\tnM:i:1\tXS:A:+\tNM:i:1\n+BCR-ABL1-46\t83\t22\t225737\t60\t21M2994N7M1344N105M717N17M\t=\t225687\t-5255\tGCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGG\t=GGCGGGGGGG=GGGCCCGCCCGGGGGGGGGGCCGGGGCGG8CGCGGG1JGGCCGG(C=GCCCGGGGGGCGGGGGCGCGGCGGJCGGGJJGJGGGJJCGGGJJJGJJJJJJJGJJJJGGGJJJJJGGJJJJJGCJJJCGGGGGGGGGCCC\tNH:i:1\tHI:i:1\tAS:i:285\tnM:i:1\tXS:A:+\tNM:i:0\n+BCR-ABL1-72\t83\t22\t228752\t60\t3S7M1344N105M717N35M\t=\t225696\t-5264\tTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAAT\t=GGGGGG==GGGGCCCC=GGGGG=GGGGCGGGCGGGGGGG=CGGCCGCCJGGCGGGGG=GGG8GGGCGGC=G=CCJGGGGGGCGJJGJJCGGGGGGJJJGCJCCGJG=JJJGJGJJCJJJJGJJJJJJJ=GCJGJGCGGG=GGGGGGCC=\tNH:i:1\tHI:i:1\tAS:i:290\tnM:i:1\tXS:A:+\tNM:i:0\n+BCR-ABL1-4\t99\t22\t230111\t60\t97M717N53M\t=\t230176\t889\tAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCC\tC==GGGGGGGGGGJJJJ1JJJGGJJGGJGGJJGJJCJGJGJJCGGCJGCJJJJCGJGGGGJGGGGGGCCGG8JGGCGCGG=GGGGGGGGGGGGGG=GCCGJGGGCCGGGGGG1GGGGGGCGCGGCGGGGGG=GGGGGGGGGCCGCGGGCC\tNH:i:1\tHI:i:1\tAS:i:259\tnM:i:0\tch:A:1\tXS:A:+\tNM:i:0\n+BCR-ABL1-18\t99\t22\t230118\t60\t90M717N60M\t=\t230165\t882\tCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGTAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCAATCAGCCACTGGAT\tCCCGGGCGGGCGGJGJJJJJJJJJ='..b'GGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGCCAGAGGTCCATCTCGCT\tCCGGGCGCGCGGGCG=CCCGGCGCGGGGC=CGGCGGCCGCGGGJJJJCCGCCG(GCCCCCGGCCGGG=G8GGGGGGCC=C=CGGJGJJJGC=JGGJJJGJGJ1JJJGC=JJJG=JCJJJJJJJ=JJGGGJJJCGJJJGGGGGCGG=GCCC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-74\t77\t*\t0\t0\t*\t*\t0\t0\tTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTC\tCC11GGGGGGGGGGCCJJJGCGJJGJJJJJGGGGGGJJJGGJG==GCJCJ=GGJJGGJJGGCJGG=GGGGGJGGJGC=GC=GGGCGGGCGGGGCCGCGGGJCGC=GGC8CGCGCGGGGGGCGCC1GGCGCC=GCCGCGGC8GCGGGCCCG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n+BCR-ABL1-74\t141\t*\t0\t0\t*\t*\t0\t0\tCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAGGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG\tCCCGGGGGGCGCGJGGJJGGJGJJJGJGGJJGGJGJJ1=JCJJGGGJJJJGGGJGCCJGGJGG=J1JG8JGCGGGJG=GC1CGCCGGCG(GGCGGCGGGGGCJC1CCGC==CCGGGGCGGCGGGCCGGCGCGC8CCCCGGG=GGGC=GGG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n+BCR-ABL1-66\t77\t*\t0\t0\t*\t*\t0\t0\tTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAAACCCATAGAG\tCCC=GGGGCGGGGJJJJJGJJJJ=JJJGJJ1GJJGJJJJJGJJJJJGGGGCGJJGGGJJJGGCGGGGJGCGG1JCGGG=GCCGCG=GC=G=GCCGGGGG8JGGGGGGGGGGGG=GGCGGC8GGCCGGGC=GGGGGGGGG=CGG=8GGCCG\tNH:i:0\tHI:i:0\tAS:i:159\tnM:i:0\tuT:A:1\n+BCR-ABL1-66\t141\t*\t0\t0\t*\t*\t0\t0\tCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG\tCCCGGGGGGGGGGGGJ=JGJJJJJJJGGJJCCCJGJJ1JJJGCJGGGGJJJJ=GGGJGJGC(GGGGJGGGJG1=GGGGGGGG=G=C=GG8CC8GGGGGCCCCJCCCJGCG=GGCCGGCGGCGGCG==1GCCGGC1GGGGGCGGGGGGCGG\tNH:i:0\tHI:i:0\tAS:i:159\tnM:i:0\tuT:A:1\n+BCR-ABL1-58\t77\t*\t0\t0\t*\t*\t0\t0\tATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGG\tCCCGGCGGGGGGGGJJJJJGJJGJGJGJGJJJJJJJJJCJGJJJJGCG=8GGGJGJGGCGGJGCGJJJCJGGG=CGCCGGCCGGGCGCGGGCGCG1GGGCCCGGGGCG8GCCC=C8CGCGG=CCCGCCCCGGG=CCGGCGGGCGGGGGCG\tNH:i:0\tHI:i:0\tAS:i:185\tnM:i:3\tuT:A:1\n+BCR-ABL1-58\t141\t*\t0\t0\t*\t*\t0\t0\tTTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATTCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATG\tCCCGGGGGGGGGGJJJJJJGJGJJJGGJ=JJJJJJJJGC=GJJGGJJGJJGG1GCJGGGG=JGGG8C=GCCGC==GGGCGGGGGG=GGG=(G=CCGCCGGGGCJJJJGGGC8GCGCGCG8CGGCCGGGCGCGCGG8CCGG8CGGGGGGGG\tNH:i:0\tHI:i:0\tAS:i:185\tnM:i:3\tuT:A:1\n+BCR-ABL1-24\t77\t*\t0\t0\t*\t*\t0\t0\tCGCAGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGGCTGAGTGAAG\tCC11GCGGGGGGGJCGJGJJCCJJJJGJJJJGJJGGJJJCJJJG8JJJ1GJ=JGGGGJJJCG=8GGCGCCGGGCCGGGCGGGGCGGGGCCGCGGCCGGG=J1GCCC1(CCGGCGGGCCGCGGGCGGGGC=GGCGCCGCC1GCGGGGGCGG\tNH:i:0\tHI:i:0\tAS:i:154\tnM:i:3\tuT:A:1\n+BCR-ABL1-24\t141\t*\t0\t0\t*\t*\t0\t0\tTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAA\tC=CCGGGGGGGGCJ1GGJJJJ1JJJJJGJJ=GJJG8GGJ=GJGJJGJJGGGCGJGCGGGCGGG8GG=GJJGCG1GCGGJGCCGGCGGGCCGGGCG8GGGGG8C1==CGGCCCGCGGGGC8GCGGG8GGGCGCCGCCGCGGGCGGGGGGCG\tNH:i:0\tHI:i:0\tAS:i:154\tnM:i:3\tuT:A:1\n+BCR-ABL1-10\t77\t*\t0\t0\t*\t*\t0\t0\tAGGTTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACG\tCC=GGGGGGGGGG1GJJJJJCJJJJJJJJJJJGJ=GJJJGCJJJJCJGJGCJGJJJGGJJJGGCCGGJGC=GGJ1C8GGGGGGCGCCGGGGGGCGGGCGCCCG1GGCGCGCGGGCC8GCGCGCGC8CCCGCGCGGGGGCGGGGGCGGCGG\tNH:i:0\tHI:i:0\tAS:i:181\tnM:i:2\tuT:A:1\n+BCR-ABL1-10\t141\t*\t0\t0\t*\t*\t0\t0\tATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGA\t1CCGGCGGGGGG1GGJJJGCC1JJJJCCG=JGGJJGJJJ=GGGGGJJGGGGGGC1J=CJGCGGGGCGC(CGGGGG=GGGGG(G=CGGCGGGGCCCGC=CCCCJJCC8G1GGGGCGGGGGGCGCGGGGGGGCG=GGCCGCCGCC1G=GGGG\tNH:i:0\tHI:i:0\tAS:i:181\tnM:i:2\tuT:A:1\n' |
b |
diff -r 000000000000 -r a24ca22b906e test-data/cytobands.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cytobands.tsv Wed Jul 27 11:24:44 2022 +0000 |
b |
@@ -0,0 +1,5 @@ +contig start end name giemsa +22 1 40586 q11.22 gpos25 +22 40586 269079 q11.23 gneg +9 1 21036 q34.11 gneg +9 21036 515509 q34.12 gpos25 |
b |
diff -r 000000000000 -r a24ca22b906e test-data/fusions.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fusions.tsv Wed Jul 27 11:24:44 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +#gene1 gene2 strand1(gene/fusion) strand2(gene/fusion) breakpoint1 breakpoint2 site1 site2 type split_reads1 split_reads2 discordant_mates coverage1 coverage2 confidence reading_frame tags retained_protein_domains closest_genomic_breakpoint1 closest_genomic_breakpoint2 gene_id1 gene_id2 transcript_id1 transcript_id2 direction1 direction2 filters fusion_transcript peptide_sequence read_identifiers +BCR ABL1 +/+ +/+ 22:230999 9:275100 CDS/splice-site CDS/splice-site translocation 1 3 0 3 8 low in-frame . Bcr-Abl_oncoprotein_oligomerisation_domain(100%),C2_domain(100%),RhoGEF_domain(100%)|F-actin_binding(100%),Protein_kinase_domain(100%),SH2_domain(100%),SH3_domain(100%) . . ENSG00000186716 ENSG00000097007 ENST00000305877 ENST00000372348 downstream upstream . AGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAG___ATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAA|AAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAG___GTGAAAAGCTCCGGG SFSLTSVELQMLTNSCVKLQTVHSIPLTINKEDDESPGLYGFLNVIVHSATGFKQSS|kALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLR BCR-ABL1-4,BCR-ABL1-28,BCR-ABL1-60,BCR-ABL1-76 |
b |
diff -r 000000000000 -r a24ca22b906e test-data/genome.fasta.gz |
b |
Binary file test-data/genome.fasta.gz has changed |
b |
diff -r 000000000000 -r a24ca22b906e test-data/genome.gtf.gz |
b |
Binary file test-data/genome.gtf.gz has changed |
b |
diff -r 000000000000 -r a24ca22b906e test-data/protein_domains.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/protein_domains.gff3 Wed Jul 27 11:24:44 2022 +0000 |
b |
b'@@ -0,0 +1,83 @@\n+9\tpfam\tprotein_domain\t33502\t33541\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t33992\t34063\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t35324\t35381\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t37391\t37409\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t37479\t37553\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t38833\t38931\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t41390\t41413\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t41489\t41494\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t43744\t43846\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t44647\t44729\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t47496\t47541\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t51664\t51812\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n+9\tpfam\tprotein_domain\t102331\t102396\t0\t+\t.\tName=Zinc finger%2C C2H2 type;color=#80FF00;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF00096\n+9\tpfam\tprotein_domain\t102412\t102480\t0\t+\t.\tName=C2H2-type zinc finger;color=#80FF80;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF13894\n+9\tpfam\tprotein_domain\t114903\t114949\t0\t+\t.\tName=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382\n+9\tpfam\tprotein_domain\t116528\t116596\t0\t+\t.\tName=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382\n+9\tpfam\tprotein_domain\t121951\t121971\t0\t+\t.\tName=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985\n+9\tpfam\tprotein_domain\t123179\t123300\t0\t+\t.\tName=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985\n+9\tpfam\tprotein_domain\t275219\t275273\t0\t+\t.\tName=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018\n+9\tpfam\tprotein_domain\t275837\t275922\t0\t+\t.\tName=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018\n+9\tpfam\tprotein_domain\t275962\t276132\t0\t+\t.\tName=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017\n+9\tpfam\tprotein_domain\t283799\t283855\t0\t+\t.\tName=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017\n+9\tpfam\tprotein_domain\t283973\t284071\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t293165\t293249\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t293896\t294073\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t295904\t296088\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t299451\t299603\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n+9\tpfam\tprotein_domain\t301104\t301156\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;ge'..b'd=PF00053\n+9\tpfam\tprotein_domain\t489945\t490067\t0\t+\t.\tName=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053\n+9\tpfam\tprotein_domain\t490710\t490856\t0\t+\t.\tName=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053\n+22\tpfam\tprotein_domain\t2420\t2524\t0\t-\t.\tName=Armadillo/beta-catenin-like repeat;color=#000080;gene_id=ENSG00000100218;gene_name=RSPH14;protein_domain_id=PF00514\n+22\tpfam\tprotein_domain\t36321\t37004\t0\t+\t.\tName=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503\n+22\tpfam\tprotein_domain\t63673\t63981\t0\t+\t.\tName=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503\n+22\tpfam\tprotein_domain\t90736\t90740\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t93060\t93112\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t93619\t93720\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t96554\t96622\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t98578\t98629\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t99484\t99565\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t99749\t99839\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t101465\t101502\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n+22\tpfam\tprotein_domain\t121553\t121771\t0\t+\t.\tName=Bcr-Abl oncoprotein oligomerisation domain;color=#FF0000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF09036\n+22\tpfam\tprotein_domain\t201581\t201640\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t201941\t202126\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t208994\t209101\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t212118\t212178\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t213667\t213719\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t214220\t214312\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n+22\tpfam\tprotein_domain\t230954\t230999\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t233127\t233224\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t235610\t235741\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t250010\t250018\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n+22\tpfam\tprotein_domain\t252302\t252422\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n+22\tpfam\tprotein_domain\t253473\t253607\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n+22\tpfam\tprotein_domain\t254554\t254659\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n+22\tpfam\tprotein_domain\t255138\t255228\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n' |
b |
diff -r 000000000000 -r a24ca22b906e tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed Jul 27 11:24:44 2022 +0000 |
b |
@@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# \ No newline at end of file |
b |
diff -r 000000000000 -r a24ca22b906e tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jul 27 11:24:44 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> |