Repository 'arriba'
hg clone https://toolshed.g2.bx.psu.edu/repos/jjohnson/arriba

Changeset 10:c58d1774c762 (2022-02-11)
Previous changeset 9:8c4c97fd0555 (2021-10-13) Next changeset 11:8ed8af5836d1 (2022-04-26)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit c1d05da7c2c76feae94cbc640be7b010f31397d2-dirty"
modified:
arriba.xml
macros.xml
added:
test-data/genome.fasta.gz
test-data/genome.gtf.gz
removed:
arriba_download_reference.xml
arriba_draw_fusions.xml
arriba_get_filters.xml
test-data/Aligned.out.sam
test-data/arriba_indexes.loc
test-data/cytobands.tsv
test-data/fusions.tsv
test-data/genome.fasta
test-data/genome.gtf
test-data/protein_domains.gff3
test-data/read1.fastq.gz
test-data/read2.fastq.gz
test-data/test-cache/genome.fasta
test-data/test-cache/genome.gtf
b
diff -r 8c4c97fd0555 -r c58d1774c762 arriba.xml
--- a/arriba.xml Wed Oct 13 18:45:16 2021 +0000
+++ b/arriba.xml Fri Feb 11 19:04:06 2022 +0000
b
@@ -161,6 +161,12 @@
     #if $options.max_itd_length
         -l $options.max_itd_length
     #end if
+    #if $options.min_itd_allele_fraction
+        -z $options.min_itd_allele_fraction
+    #end if
+    #if $options.min_itd_supporting_reads
+        -Z $options.min_itd_supporting_reads
+    #end if
     $options.duplicate_marking
     $options.fill_discarded_columns
     $options.fill_the_gaps
@@ -381,7 +387,7 @@
             <param name="covered_fraction" argument="-C" type="float" value="" min="0." max="1.0" optional="true" label="Covered fraction">
                 <help>Ignore virally associated events if the virus is not fully expressed, 
                       i.e., less than the given fraction of the viral contig is transcribed.
-                      Default: 0.150000
+                      Default: 0.050000
                 </help>
             </param>
             <param name="max_itd_length" argument="-l" type="integer" value="" min="1" optional="true" label="Maximum length of internal tandem duplications">
@@ -389,6 +395,14 @@
                       Default: 100
                 </help>
             </param>
+            <param name="min_itd_allele_fraction" argument="-z" type="float" value="" min="0." max="1.0" optional="true" label="Required fraction of supporting reads to report an internal tandem duplication">
+                <help> Default: 0.070000 
+                </help>
+            </param>
+            <param name="min_itd_supporting_reads" argument="-Z" type="integer" value="" min="1" optional="true" label="Required number of supporting reads to report an internal tandem duplication">
+                <help> Default: 10
+                </help>
+            </param>
             <param name="duplicate_marking" argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Use aligners duplicate marking">
                 <help>Instead of performing duplicate marking itself, Arriba relies on duplicate marking by a
                       preceding program using the BAM_FDUP flag. This makes sense when unique molecular
@@ -442,8 +456,8 @@
             </conditional>
             <conditional name="genome">
                 <param name="genome_source" value="history"/>
-                <param name="assembly" ftype="fasta" value="genome.fasta"/>
-                <param name="annotation" ftype="gtf" value="genome.gtf"/>
+                <param name="assembly" ftype="fasta" value="genome.fasta.gz"/>
+                <param name="annotation" ftype="gtf" value="genome.gtf.gz"/>
             </conditional>
             <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>
             <conditional name="visualization">
@@ -465,8 +479,8 @@
             </conditional>
             <conditional name="genome">
                 <param name="genome_source" value="history"/>
-                <param name="assembly" ftype="fasta" value="genome.fasta"/>
-                <param name="annotation" ftype="gtf" value="genome.gtf"/>
+                <param name="assembly" ftype="fasta" value="genome.fasta.gz"/>
+                <param name="annotation" ftype="gtf" value="genome.gtf.gz"/>
             </conditional>
             <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>
             <conditional name="visualization">
b
diff -r 8c4c97fd0555 -r c58d1774c762 arriba_download_reference.xml
--- a/arriba_download_reference.xml Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,108 +0,0 @@
-<tool id="arriba_download_reference" name="Arriba Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
-    <description>Download to history</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements" />
-    <expand macro="version_command" />
-    <command detect_errors="exit_code"><![CDATA[
-    echo $arriba_reference_name > '$star_index' &&
-    BASE_DIR=\$(dirname \$(dirname `which arriba`)) &&
-    REF_SCRIPT=`find \$BASE_DIR -name 'download_references.sh'` &&
-    #if $is_test != 'yes'
-    \$REF_SCRIPT '$arriba_reference_name' &&
-    cp *.fa*  '$genome_fasta' &&
-    cp *.gtf*  '$genome_gtf' &&
-    mv STAR_index_* '$star_index.extra_files_path'
-    #else
-    [[ -x \$REF_SCRIPT ]]
-    #end if
-    ]]></command>
-    <inputs>
-        <param name="is_test" type="hidden" value="no"/>
-        <param name="arriba_reference_name" type="select" label="Select reference">
-            <option value="GRCh38+ENSEMBL93">GRCh38+ENSEMBL93</option>
-            <option value="GRCh38+GENCODE28">GRCh38+GENCODE28</option>
-            <option value="GRCh38+RefSeq">GRCh38+RefSeq</option>
-            <option value="GRCh38viral+ENSEMBL93">GRCh38viral+ENSEMBL93</option>
-            <option value="GRCh38viral+GENCODE28">GRCh38viral+GENCODE28</option>
-            <option value="GRCh38viral+RefSeq">GRCh38viral+RefSeq</option>
-            <option value="hg38+ENSEMBL93">hg38+ENSEMBL93</option>
-            <option value="hg38+GENCODE28">hg38+GENCODE28</option>
-            <option value="hg38+RefSeq">hg38+RefSeq</option>
-            <option value="hg38viral+ENSEMBL93">hg38viral+ENSEMBL93</option>
-            <option value="hg38viral+GENCODE28">hg38viral+GENCODE28</option>
-            <option value="hg38viral+RefSeq">hg38viral+RefSeq</option>
-            <option value="GRCh37+ENSEMBL87">GRCh37+ENSEMBL87</option>
-            <option value="GRCh37+GENCODE19">GRCh37+GENCODE19</option>
-            <option value="GRCh37+RefSeq">GRCh37+RefSeq</option>
-            <option value="GRCh37viral+ENSEMBL87">GRCh37viral+ENSEMBL87</option>
-            <option value="GRCh37viral+GENCODE19">GRCh37viral+GENCODE19</option>
-            <option value="GRCh37viral+RefSeq">GRCh37viral+RefSeq</option>
-            <option value="hg19+ENSEMBL87">hg19+ENSEMBL87</option>
-            <option value="hg19+GENCODE19">hg19+GENCODE19</option>
-            <option value="hg19+RefSeq">hg19+RefSeq</option>
-            <option value="hg19viral+ENSEMBL87">hg19viral+ENSEMBL87</option>
-            <option value="hg19viral+GENCODE19">hg19viral+GENCODE19</option>
-            <option value="hg19viral+RefSeq">hg19viral+RefSeq</option>
-            <option value="hs37d5+ENSEMBL87">hs37d5+ENSEMBL87</option>
-            <option value="hs37d5+GENCODE19">hs37d5+GENCODE19</option>
-            <option value="hs37d5+RefSeq">hs37d5+RefSeq</option>
-            <option value="hs37d5viral+ENSEMBL87">hs37d5viral+ENSEMBL87</option>
-            <option value="hs37d5viral+GENCODE19">hs37d5viral+GENCODE19</option>
-            <option value="hs37d5viral+RefSeq">hs37d5viral+RefSeq</option>
-            <option value="GRCm38+GENCODEM25">GRCm38+GENCODEM25</option>
-            <option value="GRCm38+RefSeq">GRCm38+RefSeq</option>
-            <option value="GRCm38viral+GENCODEM25">GRCm38viral+GENCODEM25</option>
-            <option value="GRCm38viral+RefSeq">GRCm38viral+RefSeq</option>
-            <option value="mm10+GENCODEM25">mm10+GENCODEM25</option>
-            <option value="mm10+RefSeq">mm10+RefSeq</option>
-            <option value="mm10viral+GENCODEM25">mm10viral+GENCODEM25</option>
-            <option value="mm10viral+RefSeq">mm10viral+RefSeq</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="genome_fasta" format="fasta" label="${tool.name} ${arriba_reference_name} fasta"/>
-        <data name="genome_gtf" format="gtf" label="${tool.name} ${arriba_reference_name} GTF"/>
-        <data name="star_index" format="txt" label="${tool.name} ${arriba_reference_name} STAR index"/>
-    </outputs>
-    <tests>
-        <!-- Downloading a genome and annotation plus build a STAR index requires too many resources for testing. 
-              Just test that we can locate the script. -->
-        <test>
-            <param name="is_test" value="yes"/>
-            <param name="arriba_reference_name" value="GRCh38+ENSEMBL93"/>
-            <output name="star_index">
-                <assert_contents>
-                    <has_text text="GRCh38+ENSEMBL93"/>
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help><![CDATA[
-** Arriba Reference **
-
-Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions.
-It is based on chimeric alignments found by the STAR RNA-Seq aligner.
-
-**Arriba Reference** downloads a genome sequence fasta and its related annotation GTF, and then build a STAR index for the RNA STAR aligner.  
-
-These datasets will be added to your Galaxy history:
-
-    - genome assembly fasta 
-    - genome annotation GTF 
-    - STAR index
-
-See Arriba manual pages:
-
-  - https://arriba.readthedocs.io/en/latest/workflow/
-  - https://arriba.readthedocs.io/en/latest/input-files/
-
-
-**NOTE:** This is a resource intensive process, so the results should be copied to new histories as needed rather than running this in each workflow.
-
-.. _Arriba: https://arriba.readthedocs.io/en/latest/
-
-]]></help>
-    <expand macro="citations" />
-</tool>
b
diff -r 8c4c97fd0555 -r c58d1774c762 arriba_draw_fusions.xml
--- a/arriba_draw_fusions.xml Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,120 +0,0 @@
-<tool id="arriba_draw_fusions" name="Arriba Draw Fusions" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
-    <description></description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements" />
-    <expand macro="version_command" />
-    <command detect_errors="exit_code"><![CDATA[
-    #if $alignments.extension == 'sam'
-        ln -sf '$genome.assembly' input.fa &&
-        samtools faidx input.fa &&
-        samtools view -b -@ \${GALAXY_SLOTS:-1} -t input.fa.fai '$alignments' | 
-        samtools sort -O bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o Aligned.sortedByCoord.out.bam &&
-        samtools index Aligned.sortedByCoord.out.bam &&
-    #else
-        ln -sf '${alignments}' 'Aligned.sortedByCoord.out.bam' &&
-        ln -sf '$alignments.metadata.bam_index' 'Aligned.sortedByCoord.out.bam.bai' &&
-    #end if
-    @DRAW_FUSIONS@
-    ]]></command>
-    <inputs>
-        <param argument="--fusions" type="data" format="tabular" label="Arriba fusions.tsv"/>
-        <param argument="--alignments" type="data" format="sam,bam" label="STAR Aligned.out.bam"/>
-        <expand macro="genome_source" assembly_optional="true"/>
-        <param name="protein_domains" argument="-p" type="data" format="gff3" optional="true" label="File containing protein domains"/>
-        <section name="visualization" expanded="true" title="Visualization Options">
-             <expand macro="visualization_options" />
-        </section>
-    </inputs>
-    <outputs>
-        <data name="fusions_pdf" format="pdf" label="${tool.name} on ${on_string}: fusions.pdf" from_work_dir="fusions.pdf">
-            <filter>visualization['do_viz'] == "yes"</filter>
-        </data> 
-    </outputs>
-    <tests>
-        <!-- Test 1 - From exisitng BAM -->
-        <test> 
-            <param name="fusions" ftype="tabular" value="fusions.tsv"/>
-            <param name="alignments" ftype="sam" value="Aligned.out.sam"/>
-            <conditional name="genome">
-                <param name="genome_source" value="history"/>
-                <param name="assembly" ftype="fasta" value="genome.fasta"/>
-                <param name="annotation" ftype="gtf" value="genome.gtf"/>
-            </conditional>
-            <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>
-            <section name="visualization">
-                <param name="cytobands" ftype="tabular" value="cytobands.tsv"/>
-            </section>
-            <output name="fusions_pdf">
-                <assert_contents>
-                    <has_size value="64000" delta="5000" />
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help><![CDATA[
-**Arriba Draw Fusions**
-
-Arriba_Draw_Fusions_ (draw_fusions.R) renders publication-quality visualizations of the transcripts involved in predicted fusions. It generates a PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and - if the column fusion_transcript has a value - an excerpt of the sequence around the breakpoint.
-
-
-**INPUTS**
-
-See: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr
-
-  - Fusions  
-
-    File containing fusion predictions from Arriba_ (fusions.tsv) or STAR-Fusion (star-fusion.fusion_predictions.tsv or star-fusion.fusion_predictions.abridged.coding_effect.tsv).
-
-  - Annotation
-
-    Gene annotation in GTF format that was used by the STAR aligner. 
-
-  - Alignments
-
-    BAM file containing normal alignments from STAR. 
-
-  - Annotation
-
-    The gene annotation (parameter -g) is used for multiple purposes:
-
-  - Assembly (Optional)
-
-    Only required when alignments are not sorted bam format.  The genonme assembly will be used by samtools to produce a sorted bam file.
-
-  - Protein domains (Optional)
-
-    GFF3 file containing the genomic coordinates of protein domains. Distributions of Arriba offer protein domain annotations for all supported assemblies in the database directory. When this file is given, a plot is generated, which shows the protein domains retained in the fusion transcript. 
-
-  - Cytobands (Optional)
-
-    Coordinates of the Giemsa staining bands. This information is used to draw ideograms. If the argument is omitted, then no ideograms are rendered. The file must have the following columns: contig, start, end, name, giemsa. Recognized values for the Giemsa staining intensity are: gneg, gpos followed by a percentage, acen, stalk. Cytobands forahuman and mouse reference can be retrieved from the Arriba distribution with the **Arriba Get Filters** tool. 
-
-
-**OPTIONS**
-
-  See: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr
-
-
-**OUTPUTS**
-
-See: https://arriba.readthedocs.io/en/latest/visualization/
-
-  - fusions.pdf
-
-    A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint.
-
-.. image:: draw-fusions-example.png
-  :width: 800
-  :height: 467
-
-
-
-
-.. _Arriba_Draw_Fusions: https://arriba.readthedocs.io/en/latest/visualization/
-.. _Arriba: https://arriba.readthedocs.io/en/latest/
-
-    ]]></help>
-    <expand macro="citations" />
-</tool>
b
diff -r 8c4c97fd0555 -r c58d1774c762 arriba_get_filters.xml
--- a/arriba_get_filters.xml Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,71 +0,0 @@
-<tool id="arriba_get_filters" name="Arriba Get Filters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
-    <description>to history</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements" />
-    <expand macro="version_command" />
-    <command detect_errors="exit_code"><![CDATA[
-    BASE_DIR=\$(dirname \$(dirname `which arriba`)) &&
-    REF_SCRIPT=`find \$BASE_DIR -name 'download_references.sh'` &&
-    REF_DIR=\$(dirname \$REF_SCRIPT) &&
-    REF_NAME=${arriba_reference_name.split('+')[0].replace('viral','')} &&
-    echo \$REF_NAME &&
-    cp `find \$REF_DIR -name 'blacklist_*' | grep -i \$REF_NAME` '$blacklist' && 
-    cp `find \$REF_DIR -name 'known_fusions_*' | grep -i \$REF_NAME` '$known_fusions' &&
-    cp `find \$REF_DIR -name 'protein_domains_*' | grep -i \$REF_NAME` '$protein_domains' &&
-    cp `find \$REF_DIR -name 'cytobands_*' | grep -i \$REF_NAME` '$cytobands'
-    #*
-    cp "\$REF_DIR/blacklist_*${arriba_reference_name}*" '$blacklist' &&
-    cp "\$REF_DIR/known_fusions_*${arriba_reference_name}*" '$known_fusions' &&
-    cp "\$REF_DIR/protein_domains_*${arriba_reference_name}*" '$protein_domains' &&
-    cp "\$REF_DIR/cytobands_*${arriba_reference_name}*" '$cytobands'
-    *#
-    ]]></command>
-    <inputs>
-        <param name="arriba_reference_name" type="text" label="Select reference">
-            <help>GRCh38 GRCh37 hg38 hg19 GRCm38 mm10</help>
-            <option value="GRCh38">GRCh38</option>
-            <option value="GRCh37">GRCh37</option>
-            <option value="hg38">hg38</option>
-            <option value="hg19">hg19</option>
-            <option value="GRCm38">GRCm38</option>
-            <option value="mm10">mm10</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="blacklist" format="tabular.gz" label="${tool.name} ${arriba_reference_name} blacklist.tsv.gz"/>
-        <data name="known_fusions" format="tabular.gz" label="${tool.name} ${arriba_reference_name} known_fusions.tsv.gz"/>
-        <data name="protein_domains" format="gff3" label="${tool.name} ${arriba_reference_name} protein_domains.gff3"/>
-        <data name="cytobands" format="tabular" label="${tool.name} ${arriba_reference_name} cytobands.tsv"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="arriba_reference_name" value="GRCh38"/>
-            <output name="cytobands">
-                <assert_contents>
-                    <has_text_matching expression="1\t1\t\d+\tp36.33\tgneg"/>
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help><![CDATA[
-**Arriba Get Filters**
-
-Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions.
-It is based on chimeric alignments found by the STAR RNA-Seq aligner.
-
-The **Arriba Get Filters** tool adds the following Arriba distribution input_files_ to your galaxy history:
-
-  - blacklist
-  - known_fusions
-  - protein_domains
-  - cytobands
-
-
-.. _Arriba: https://arriba.readthedocs.io/en/latest/
-.. _input_files: https://arriba.readthedocs.io/en/latest/input-files/
-
-]]></help>
-    <expand macro="citations" />
-</tool>
b
diff -r 8c4c97fd0555 -r c58d1774c762 macros.xml
--- a/macros.xml Wed Oct 13 18:45:16 2021 +0000
+++ b/macros.xml Fri Feb 11 19:04:06 2022 +0000
b
@@ -1,6 +1,6 @@
 <macros>
-    <token name="@TOOL_VERSION@">2.1.0</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@TOOL_VERSION@">2.2.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
         <requirement type="package" version="@TOOL_VERSION@">arriba</requirement>
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/Aligned.out.sam
--- a/test-data/Aligned.out.sam Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,89 +0,0 @@\n-@HD\tVN:1.4\tSO:coordinate\n-@SQ\tSN:22\tLN:269079\n-@SQ\tSN:9\tLN:515509\n-@PG\tID:STAR\tPN:STAR\tVN:2.7.8a\tCL:STAR   --runThreadN 12   --genomeDir tempstargenomedir   --genomeLoad NoSharedMemory   --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat   /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat      --readFilesCommand zcat      --limitBAMsortRAM 122880000000   --outSAMtype BAM   SortedByCoordinate      --outSAMstrandField intronMotif   --outSAMattributes NH   HI   AS   nM   ch      --outSAMunmapped Within      --outSAMprimaryFlag OneBestScore   --outSAMmapqUnique 60   --outBAMsortingThreadN 12   --outBAMsortingBinsN 50   --outSAMattrIHstart 1   --winAnchorMultimapNmax 50   --chimSegmentMin 12   --chimOutType WithinBAM   Junctions      --chimOutJunctionFormat 1      --quantMode TranscriptomeSAM   GeneCounts      --quantTranscriptomeBan Singleend   --twopassMode Basic\n-@CO\tuser command line: STAR --runThreadN 12 --genomeLoad NoSharedMemory --genomeDir tempstargenomedir --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --twopassMode Basic  --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --outSAMstrandField intronMotif --outSAMattrIHstart 1 --outSAMattributes NH HI AS nM ch --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outSAMunmapped Within --chimSegmentMin 12 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --winAnchorMultimapNmax 50 --limitBAMsortRAM 122880000000 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1\n-BCR-ABL1-46\t163\t22\t225687\t60\t71M2994N7M1344N72M\t=\t225737\t5255\tAACTGGAGGCAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGGACGCTTTGAACATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTG\tCCCGGGGGG=GGGJJJGGJJJGGJJJJCJJGGJJGCJGCGGGC8J8JGGJJJJJGJJC(JGCCG=GGJJGCCCGC8GCCGGGGGG=GGCGGG1GG=GC1G=CJCJJCCCGGCGG1CGG1GGGGGGGG=GGGGGCCGCGGG8GGGCGG=GG\tNH:i:1\tHI:i:1\tAS:i:285\tnM:i:1\tXS:A:+\tNM:i:1\n-BCR-ABL1-72\t163\t22\t225696\t60\t62M2994N7M1344N81M\t=\t228752\t5264\tCAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGCACGCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCA\tCCCCGGGGGGGGGGJGJCCCJ1GJJJJGCGGGCJJJ=C1JJGGJGG8JGC=CCGJ1JGG8GGGGGJCGJCCGGGCG=CGGGGGGCGG=GGCGGG=8CCGCGGJJJ=JGGGCGGGGGCCGCCGGGGGGGGC=CCGCG8GGGGGC1GGGGCC\tNH:i:1\tHI:i:1\tAS:i:290\tnM:i:1\tXS:A:+\tNM:i:1\n-BCR-ABL1-46\t83\t22\t225737\t60\t21M2994N7M1344N105M717N17M\t=\t225687\t-5255\tGCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGG\t=GGCGGGGGGG=GGGCCCGCCCGGGGGGGGGGCCGGGGCGG8CGCGGG1JGGCCGG(C=GCCCGGGGGGCGGGGGCGCGGCGGJCGGGJJGJGGGJJCGGGJJJGJJJJJJJGJJJJGGGJJJJJGGJJJJJGCJJJCGGGGGGGGGCCC\tNH:i:1\tHI:i:1\tAS:i:285\tnM:i:1\tXS:A:+\tNM:i:0\n-BCR-ABL1-72\t83\t22\t228752\t60\t3S7M1344N105M717N35M\t=\t225696\t-5264\tTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAAT\t=GGGGGG==GGGGCCCC=GGGGG=GGGGCGGGCGGGGGGG=CGGCCGCCJGGCGGGGG=GGG8GGGCGGC=G=CCJGGGGGGCGJJGJJCGGGGGGJJJGCJCCGJG=JJJGJGJJCJJJJGJJJJJJJ=GCJGJGCGGG=GGGGGGCC=\tNH:i:1\tHI:i:1\tAS:i:290\tnM:i:1\tXS:A:+\tNM:i:0\n-BCR-ABL1-4\t99\t22\t230111\t60\t97M717N53M\t=\t230176\t889\tAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCC\tC==GGGGGGGGGGJJJJ1JJJGGJJGGJGGJJGJJCJGJGJJCGGCJGCJJJJCGJGGGGJGGGGGGCCGG8JGGCGCGG=GGGGGGGGGGGGGG=GCCGJGGGCCGGGGGG1GGGGGGCGCGGCGGGGGG=GGGGGGGGGCCGCGGGCC\tNH:i:1\tHI:i:1\tAS:i:259\tnM:i:0\tch:A:1\tXS:A:+\tNM:i:0\n-BCR-ABL1-18\t99\t22\t230118\t60\t90M717N60M\t=\t230165\t882\tCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGTAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCAATCAGCCACTGGAT\tCCCGGGCGGGCGGJGJJJJJJJJJ='..b'GGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGCCAGAGGTCCATCTCGCT\tCCGGGCGCGCGGGCG=CCCGGCGCGGGGC=CGGCGGCCGCGGGJJJJCCGCCG(GCCCCCGGCCGGG=G8GGGGGGCC=C=CGGJGJJJGC=JGGJJJGJGJ1JJJGC=JJJG=JCJJJJJJJ=JJGGGJJJCGJJJGGGGGCGG=GCCC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n-BCR-ABL1-74\t77\t*\t0\t0\t*\t*\t0\t0\tTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTC\tCC11GGGGGGGGGGCCJJJGCGJJGJJJJJGGGGGGJJJGGJG==GCJCJ=GGJJGGJJGGCJGG=GGGGGJGGJGC=GC=GGGCGGGCGGGGCCGCGGGJCGC=GGC8CGCGCGGGGGGCGCC1GGCGCC=GCCGCGGC8GCGGGCCCG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n-BCR-ABL1-74\t141\t*\t0\t0\t*\t*\t0\t0\tCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAGGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG\tCCCGGGGGGCGCGJGGJJGGJGJJJGJGGJJGGJGJJ1=JCJJGGGJJJJGGGJGCCJGGJGG=J1JG8JGCGGGJG=GC1CGCCGGCG(GGCGGCGGGGGCJC1CCGC==CCGGGGCGGCGGGCCGGCGCGC8CCCCGGG=GGGC=GGG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n-BCR-ABL1-66\t77\t*\t0\t0\t*\t*\t0\t0\tTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAAACCCATAGAG\tCCC=GGGGCGGGGJJJJJGJJJJ=JJJGJJ1GJJGJJJJJGJJJJJGGGGCGJJGGGJJJGGCGGGGJGCGG1JCGGG=GCCGCG=GC=G=GCCGGGGG8JGGGGGGGGGGGG=GGCGGC8GGCCGGGC=GGGGGGGGG=CGG=8GGCCG\tNH:i:0\tHI:i:0\tAS:i:159\tnM:i:0\tuT:A:1\n-BCR-ABL1-66\t141\t*\t0\t0\t*\t*\t0\t0\tCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG\tCCCGGGGGGGGGGGGJ=JGJJJJJJJGGJJCCCJGJJ1JJJGCJGGGGJJJJ=GGGJGJGC(GGGGJGGGJG1=GGGGGGGG=G=C=GG8CC8GGGGGCCCCJCCCJGCG=GGCCGGCGGCGGCG==1GCCGGC1GGGGGCGGGGGGCGG\tNH:i:0\tHI:i:0\tAS:i:159\tnM:i:0\tuT:A:1\n-BCR-ABL1-58\t77\t*\t0\t0\t*\t*\t0\t0\tATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGG\tCCCGGCGGGGGGGGJJJJJGJJGJGJGJGJJJJJJJJJCJGJJJJGCG=8GGGJGJGGCGGJGCGJJJCJGGG=CGCCGGCCGGGCGCGGGCGCG1GGGCCCGGGGCG8GCCC=C8CGCGG=CCCGCCCCGGG=CCGGCGGGCGGGGGCG\tNH:i:0\tHI:i:0\tAS:i:185\tnM:i:3\tuT:A:1\n-BCR-ABL1-58\t141\t*\t0\t0\t*\t*\t0\t0\tTTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATTCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATG\tCCCGGGGGGGGGGJJJJJJGJGJJJGGJ=JJJJJJJJGC=GJJGGJJGJJGG1GCJGGGG=JGGG8C=GCCGC==GGGCGGGGGG=GGG=(G=CCGCCGGGGCJJJJGGGC8GCGCGCG8CGGCCGGGCGCGCGG8CCGG8CGGGGGGGG\tNH:i:0\tHI:i:0\tAS:i:185\tnM:i:3\tuT:A:1\n-BCR-ABL1-24\t77\t*\t0\t0\t*\t*\t0\t0\tCGCAGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGGCTGAGTGAAG\tCC11GCGGGGGGGJCGJGJJCCJJJJGJJJJGJJGGJJJCJJJG8JJJ1GJ=JGGGGJJJCG=8GGCGCCGGGCCGGGCGGGGCGGGGCCGCGGCCGGG=J1GCCC1(CCGGCGGGCCGCGGGCGGGGC=GGCGCCGCC1GCGGGGGCGG\tNH:i:0\tHI:i:0\tAS:i:154\tnM:i:3\tuT:A:1\n-BCR-ABL1-24\t141\t*\t0\t0\t*\t*\t0\t0\tTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAA\tC=CCGGGGGGGGCJ1GGJJJJ1JJJJJGJJ=GJJG8GGJ=GJGJJGJJGGGCGJGCGGGCGGG8GG=GJJGCG1GCGGJGCCGGCGGGCCGGGCG8GGGGG8C1==CGGCCCGCGGGGC8GCGGG8GGGCGCCGCCGCGGGCGGGGGGCG\tNH:i:0\tHI:i:0\tAS:i:154\tnM:i:3\tuT:A:1\n-BCR-ABL1-10\t77\t*\t0\t0\t*\t*\t0\t0\tAGGTTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACG\tCC=GGGGGGGGGG1GJJJJJCJJJJJJJJJJJGJ=GJJJGCJJJJCJGJGCJGJJJGGJJJGGCCGGJGC=GGJ1C8GGGGGGCGCCGGGGGGCGGGCGCCCG1GGCGCGCGGGCC8GCGCGCGC8CCCGCGCGGGGGCGGGGGCGGCGG\tNH:i:0\tHI:i:0\tAS:i:181\tnM:i:2\tuT:A:1\n-BCR-ABL1-10\t141\t*\t0\t0\t*\t*\t0\t0\tATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGA\t1CCGGCGGGGGG1GGJJJGCC1JJJJCCG=JGGJJGJJJ=GGGGGJJGGGGGGC1J=CJGCGGGGCGC(CGGGGG=GGGGG(G=CGGCGGGGCCCGC=CCCCJJCC8G1GGGGCGGGGGGCGCGGGGGGGCG=GGCCGCCGCC1G=GGGG\tNH:i:0\tHI:i:0\tAS:i:181\tnM:i:2\tuT:A:1\n'
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/arriba_indexes.loc
--- a/test-data/arriba_indexes.loc Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of Ariba data files.
-#The Arriba script download_references.sh retrieves a genome assembly fasta
-#and a related GTF annotation file, then builds a STAR index.
-#You will need to create these data files and then create a
-#arriba_indexes.loc similar to this one (store it in this
-#directory) that points to the directories in which those files are stored.
-#The arriba_indexes.loc file has this format (longer white space
-#characters are TAB characters):
-#
-#<unique_build_id>   <display_name>   <genome_fasta_path> <genome_gtf_path> <STAR_index_path>
-#
-#Note that STAR indices can become quite large. 
-#
-#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path>
-#GRCh38+ENSEMBL93 GRCh38+ENSEMBL93 /depot/GRCh38+ENSEMBL93/genome.fa /depot/GRCh38+ENSEMBL93/genome.gtf /depot/GRCh38+ENSEMBL93/STAR_index/
-GRCh38+ENSEMBL93 GRCh38+ENSEMBL93 ${__HERE__}/test-cache/genome.fasta ${__HERE__}/test-cache/genome.gtf ${__HERE__}/test-cache/STAR_index/
-
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/cytobands.tsv
--- a/test-data/cytobands.tsv Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-contig start end name giemsa
-22 1 40586 q11.22 gpos25
-22 40586 269079 q11.23 gneg
-9 1 21036 q34.11 gneg
-9 21036 515509 q34.12 gpos25
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/fusions.tsv
--- a/test-data/fusions.tsv Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-#gene1 gene2 strand1(gene/fusion) strand2(gene/fusion) breakpoint1 breakpoint2 site1 site2 type split_reads1 split_reads2 discordant_mates coverage1 coverage2 confidence reading_frame tags retained_protein_domains closest_genomic_breakpoint1 closest_genomic_breakpoint2 gene_id1 gene_id2 transcript_id1 transcript_id2 direction1 direction2 filters fusion_transcript peptide_sequence read_identifiers
-BCR ABL1 +/+ +/+ 22:230999 9:275100 CDS/splice-site CDS/splice-site translocation 1 3 0 3 8 low in-frame . Bcr-Abl_oncoprotein_oligomerisation_domain(100%),C2_domain(100%),RhoGEF_domain(100%)|F-actin_binding(100%),Protein_kinase_domain(100%),SH2_domain(100%),SH3_domain(100%) . . ENSG00000186716 ENSG00000097007 ENST00000305877 ENST00000372348 downstream upstream . AGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAG___ATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAA|AAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAG___GTGAAAAGCTCCGGG SFSLTSVELQMLTNSCVKLQTVHSIPLTINKEDDESPGLYGFLNVIVHSATGFKQSS|kALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLR BCR-ABL1-4,BCR-ABL1-28,BCR-ABL1-60,BCR-ABL1-76
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/genome.fasta
--- a/test-data/genome.fasta Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,4 +0,0 @@\n->22\n-TAAAGAGACTTAGCACATTTATTCACTCACAGAGGTGAATGAAGGGCTCAGGGTTTGAACTCGATGACACTGATGGCGATCCGGGCTGCCCGCTGTAAGGCTTCGGCCACTTGAGGCTTTTCGTAAGTCTCCACCTCCATGGCACGGAAAGTGGGCACGTGCGTCTGCAGGGCCTTGCGGCCCTCGGGGGCCTCTGCCAGCATGGTAAGGGCCTTGGTGGCATTCAGGCGCGCTATGGTCATGGGGGAGTGCAGCAGCTCCAGGAGCAGGCCGATGGCTTGTGCCTCCAGGGCCGCATACTTCCCTGCAGGCCACCAACACAAGGCGTTCCAAACAGCCCAAGGGGCCCTCTTCTCACCCCCTCTCACCCTAGCCCTCTCCTGACCCTCCTCCTTGTGCAGTCTCAAGGGGTTTATGCCTGGTTTATGCCCCACCAACATGCCCTCACCCTCAGGGCTCTTGCACCTTCGCTGAGTCCGGCCTGGGCCTGGCCTCTTCATCTCCTGCCTTTCCGGCCTCAACTGGCTCCTCTCCATCCACCCTCGTATCCTGCCCAGGAGTTCAAGACCAGCCTGGGCAACATAATGAGATTCCATCTCTACAAAAATTTAAAAATTATCCAGATGTAGTGACAGGTGCCTACAGTCCCAGCTACTCAGGAGACTGAGCGGGGAGGATCACTAGAGCCTGGGAGTTTGAGGCTGCAGTGATCGTGACATCGTACTCTAGCCTGGGTGACACACTGAGACACTGGCTCTTTAAAAAAAGTAAGCCCAGCTGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGGTGGATCACGAGGTCAGGAGATTGTGACCATCCTGGCTAACACAGTGAAACCCCGTCTCTATTAAAAATACAAAAAATTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGCGTGAACCTGGGAGGCGGAGCTTGCAGTGAGCCAAGATGGTGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCATCTCAAAAAAAAAAAAAAAGTAAACCCAATAGTTCATATATGTTGCCAATATTTCTTGAGCACCTACTCTGCACAAAACACACATGGTTCCTGAGAGATGCCCTCCTCAACCATTCGCTGCCCCTTTCTGCTGTCCTTGCCGGCCCTATGCTCCTGCAGTGCCCAGTGGACTCTCCCCTTTGCTGGTGGAACCTCTCAGCCTGGGCCCTGCCAACTCCCATTGTCCTTGGGATTGATCCCAAGCCCTTCCCAGAGGCGACTTGTCCCCTGTGTTTGCCCCACCCATGCTGGGCTTCTCCTCCACATCCCTCATCACTGCCTCCCAGGGCCTGGCACGTGGCAGGGGCTTCAGGACCCTCAGCTGGAGTGCCAGCTTGGGAATAAGTGGCCTCATCTCCCTGGGTCTCAGCCTCTTGTGTCTCGAGCCCCGTGGCCCTCCAAGCACTCCTACAGAAAGCCTGGACCGAGAAGGACACTGGGCTGAGGCTGCCCCAAGATAAACACGCAGGGGGAGCCCCTGCCACCGTGCAGTCAGCATCCTGACTACACCAGTAAAAGGAGGGGCTGTGGACTCCAAGAAAGCCCCAACCTCTCCCATCTGCATACCTCAAGCAGCATTAGGGTCAGGTGTGCAGGGCCTGGGAGGACCTGTCTTTGGGAGGCTGATTGCAAATCCAAGCCCAGATGGTCCTCCATGGAACTCAAAGGAAGCACAGGGCCCCAGGCTGGTGGGTGTGTAGGGGCCAGAGGGAAAGTCAAATGCTTTCAGGGGTCCTGGGAGTGCCACGCCCCAGTCTGCCAGGTGTGGAAACAGCCCAGGGTTCTTCCATGAGCCCAGGGTTGGGTGACTTAGAGGGGAAGCCTGACTGGCTGGGAGTCCCAAGGTAGGGGGCTCGGGGGAGATAATTCTGAATTTGCCCCCTCCTCCAGTTTAAGAAAAATAATTAGGATGAATAAACACCATAGAGTCAAGGGCTCCAGGGTTTGCTTAGACCTCAGGGTAACAGAATCATGGATGGCAAACTAATCAACCCTTGAGTCACAGCTCACAAGGGGCCTGGGTGGGAGGCGGGAATTCCAGGCCCAGGGAACAGACTGGTGCGAAGGCACAGAGGTCCTAATGTGAGTGACTGGTGAGGGGCCACTGTGCTGGGGAGAAGAGGGAGGGCTGGGCCTTGGACCCAACCTTGAAAGGACATCAGTGATAGGCCAAGGGGAGGTTTTTTTTTTTTTTGCAAAGTGTGGAGTTTGGGGGACGATACCCAGCCCCTGAGTACAGAAAGCTCATCACGGCTGCTAGCCTGCTAGGGTCTCCCTCCCTGCGGCACCCCCCACCGCCTCACCTTCAGTGATCACTGTGGCGAACATCAGGGCACCGGCAGCGTTAGACTTCACATGCTCCACTGGGTCTTTCAGCAGATGGACCAGGATGGGGATGACGTCAAAATGACACACCTGTTTCTTGCCCTCTCGAGATATGCTGGGGCAGAGAGGGAACAGAGAGGGGCTCTGCTTGGAAGGGAGAAGAGGCGCAAGGCCCAGGAGTGCCCCAGGGAGACACAAAAGAAATAATTGTGTGGGGGCTACCCCAGGTAGTCCCAGGACTGGTCCCCATGATCCAGGACTGGTCATGGCAGGGATGCTGGGCTGCAGAGGAGCATTGGTGTGAACGCGGCCCACTGCCAGTGAAGACCACGGGGTGACAGGGCCTCACTCTGAGCCAGCACACAGAAAAGAAATAGGGCATGTTCTCCGTGGCCCCAAAATGGGGAACAGCAGGACCCAGGGGTGTCAACCACAGGGAGACCTGTTTTGACTTTGCCTAAGAGCCAGAGCTGACAGTGATGGGCCAAGCTGCCCCCAACAGCAGCAGCTGCCGGGCACTGGGCAACTGCAGCTAAGGACTCAGACCAGGTAACTTTTAGGCCACTTACTCTGGCACTGAATGAAGTTGTGGAAATGAGGATGATGAACTCCCCAAAAGTTTCCTGAGAGAGGGTCAATTCTGTCCCCTAAAAAGAAAGGGCTCCCTGGGCCAGGAAACACTGTTCACAAGCCCCACAGGAATGCCACAGTGCAAAGACCCCTGTGTCACTTAGTTTAATCAACCTTTCCCAAATGTAGCCACCAAAGAACCCTCATTTTTGTAGAGCTCTTAATGACCTCAAAGACACAGGTGTTCCCTGGGAACACAGATTGGGAAATGAGGTTGGCCAGCCACTAGCTGTGACACTGGGAGCCTTTCAGGGCAGTGGGCCTCAACCCCGAAGCCCATGATCAAAGTTGCAGAGTCCTGGGCCCCACATGCAGGGATTCCCTGATTCGTTCACACAGCAAGACACTGAGACAAGAGCGATGCAGGGATTCCCCGATTTGTTCACAAGAGTGGGGACAAGAGCAGGGTTTCTCCAGCTCAGTGTTCTTTAGGGCCAGATAATTCTCAACTGGGAGCCAAAGCTCCCCCGCTGCAAACCCTGGGTTTAGGGCAGCAAACAGAGGAAAGCCCTGTCCTCAGGAGCTCCCCTCGGGGATGACACACAAAGTAACGGCAGGGATGGTGGTCACAGTGGACATGGGAGGACAGGGGACAGCAGGGAGGTTACAGTTTCAGAGAGAGAGAGCTCTCTCCTGGAAAGCCACTCTCATGGTCCCACAGAAAGGGGACCTCTGAGCAGACCTAAGAATGATGGGGCTGGGACAGGGCCCAGGAATCTGCATTTTCACAAACCTCCCGGTGGCTTGGGTCCGGGGCCCCACTTGCAGAAATACTGGCCTGAGCCTCTAGTGAGACTCTCAGAAGAGAAGCCAGGACTTAGGGAAGGCAAGAGGGTCTCCACTAAAGAGACATAGCCACAGCTAAGGCAGAGACTCTGGGGTTCGGTGACCAGCCTGTCCTCGTGAAAAGGGGCCCAGTGTGAGAACACAC'..b'GCAGGGCCTTGGCTCCCCTCTGCCTTCTCTTGCACACTGGCTCAGTGCTTGCAGCTGCTTGCCCAGGAAATCCAGGGCCTCGGGGATCCCAGGGGCCCAGTGGAATCCTGTGGGGTTAGAAGCAGCCACTTGGGGCTGGGCATGGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATCACTGGAGGTCAGGAGTTTGAGACCAGCCTGACCAACATGGTGAAACCCTGTCTCTACAAAAAATACAAAAATTAGGCAAGGCGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACGTGGTGAAACCCTGTCTCTACTAAAAATACAAAAAAATTAGCGGAGCATGGTGGCAGGCACCTGTAATCCCAGATACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGAGGCCACAGTAAGCTGAGATCGTACCACTGCACTCCAGCCTGGACAACAGAGCGAGACTCCGTCTCAATAAATAAATAAATGCATCCATCCATCCATACATACATACAAAAATTAGCTGGGCATGGTGGTGTGGGCCTGTGGTTTACCCCAGTGATTGCCCACATCCAGGCCCACAGGCGGCATCTGTAAACGTGTGCTGTCAGTGAATTGAGCCAGCATGTTCAAGCTGGCAAAAATCACAGGGGCCTTTCTCCCACACTGTTTTGTAAATGAGTAAACTAAGACTCAGAGAGGACTCACCTGAGGCCTTCTTTTTTAACAATAATAGTAAAACCATCAACAAAAATGACATTTGTCAAGTGCCACCCAGGAGGATGTTCTAGAGTCTGCCCATGGGCCCGTTGGGATGCTATGGGTCCTCACCCACCTCCCAGCCCTTCTGCTCCCCATCTTTCCCTGGTGGATGGTGGGCCATTGGAATCCTGGGAGGCCTGAGCTGGGGAGAGGCTCAGGGGCTGGGAGGTGCCCTGGGGCCTGCAGGGCTGCTGGCTCACAGTGAGGCTGTTTGTGCCCCACCACAGGGTCGCTGGACACCCATCAAGCCCCAGCCCAGGCCCTGAACGAGACTCAGTGGGCACTAGAACGCCTGAGGCTGCAGCTGGGCTCCCCGGGGTCCTTGCAGAGGAAACTCAGTCTGCTGGAGCAGGAATCCCAGCAGCAGGAGCTGCAGATCCAGGGCTTCGAGAGTGACCTCGCCGAGATCCGCGCCGACAAACAGAACCTGGAGGCCATTCTGCACAGCCTGCCCGAGAACTGTGCCAGCTGGCAGTGAGGGCTGCCCAGATCCCCGGCACACACTCCCCCACCTGCTGTTTACATGACCCAGGGGGTGCACACTACCCCACAGGTGTGCCCATACAGACATTCCCCGGAGCCGGCTGCTGTGAACTCGCCCCCGTGTGGATAGTCACTCCCTGCCGATTCTGTCTGTGGCTTCTTCCCTGCCAGCAGGACTGAGTGTGCGTACCCAGTTCACCTGGACATGAGTGCACACTCTCACCCCTGCACATGCATAAACGGGCACACCCCAGTGTCAATAACATACACACGTGAGGGTGCATGTCTGTGTGTATGACCCACACGTGTTCAAGTCTAATCCATCCAGTCAGCAGCTTACGGTCCACACACATTACAGTCCACAGCTGTTGTGAGAGCCACCTGTGTGCTGGACACCCTCTGGATGTTGGGCAAGTTGTTACATGAGATGCCCTGGGGTGCTACATCCACTCACTCCAGATAGCAGGGAGGTCTCAGCAGATCTGCAGAGATCAAGGGGGTCAGCAACAGCCAAAGCCCCTAGTCCCAGAGCTGGCTGCCCTCTGTTTCACAGCAGCTCCCTGACCTGTGTTGCTGCGTGCACTCCCTACAGCTCGACACAGCCAGGGGACCAACAGGCCAAGAAATGCAAGATCCCGGGAGGGTTCTTAGCAGCAGAATCTGAGGCCCAGAGACCCTGAGGCCGTGGCCAGGCCTGCTAGTCTGGCTAGAGCAAGGCCCATTCCTGGCGGGGGTGTCATTGCCTTCACCGGACGCTTCCCTCTCAGGGTCCTGGGACTGCACCAGATGCCCTGAGGGAATGGCCCACCCTGGCCTGTACCCACTTCAGCCTGTGATCTATCCAAAGAGCCAGGCCCAAAAGCGCCTAGGTCAGGGTGCTCAGGCTACCAGGAGCACGCCTCTGTGCCCCCGGCAACCCAGTTGACCTTTAATTGACGCTTTCCAGACCAGCCCTGCGGCACCACTTGCCATGCGGGAGGCCACCAGGGTGTGCAAGCCTGGCTGCCATTCCAGTCTGTCCTGTCTGGGAATCGCCCTGTGGCCAGGCCCGCATGCTGGCCTCTGCCCAGGACTCCTCAGCATTTCCTCTTGGCATCCCTCCCCTCTCCCAGACCCTCTTCCAGCAGATGGCAAGGCCTCGGCATTGGGAAGTCAGGCACCTCTGCGGGCCCAGCCCCCTCCCGTGGCTCCCCTGACAGGGGCAGGGGTAGGGCAGCAGCACAGACCAATTCCGTTGAACGTGGAAATAAAGGACCCTTTCACTGGGCAGGGTGGTGTGCCTCACCCTCCCCGGCTGGTGGGCAGCCAGGGCCCTGGCTGTGGGTGTGCATATGACACACCTAGTAGGTGGCCAGCATGTGGACCGGACGTTGGTAGGAAGGTGGCAAAAGCCGAGCTCGTGGCTGGGCCAGTACCTCCCATTAGAGGGCTTTGCTGGGGTTGTGTGATCACAGGTACCTACCCTGTCCTCTCAGGCACTTACCACGTAAAGCCTAGGAGCTGGTGAGTTGGAGGGGTGGGGTGCGGAGAGGCCCTCAGCTGACCTCTGGTTCAGGCTCGAGACGAACTCACAGCCAAGTGTCCGAGGATGGTGAGGAGCAGGGAGGGGCGCCATCCAGGAGGGGGATGGTGTGGGTGGGGCCTTGAAGGGTGGGGAGGCAGAGAAGGAAGCATTCCAGGCAAGAGGGTGGACAACAGTCCGGGGCCCGCAGGGTTGGGGCTCGGCCAGCTTGCATCACTCCAGGACCCCAGGTTGAATGGGGTGGGATGTTGGAGCTGCTCAGTCAGGGCTCTTGGCCGCAGGCCTCAAACCCCTCCTGAGGTGGTTTCAGCAGAAAAGGGGTGTTGGGAGGGTCGCTTGGAACCCTGGAGTAAAAACGGCTGCCACGTGTTGGAGATAGCCTAGGGAGGGGAGCCTGAGGCTTCCGGGATAGGTTGGCTTCCCTCTTCCCCCCTCCCGCCTCTCTTCTTGGTCTGTGTCTCTGCTCTCCTCTCCTGTATCTGCTTCGTTCTTTTCTCTTTATTTATTGATTTTTTTTGAGATGGAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCATGATCTCTGCCCACTGCATCCTCCACCTCCCAGGTTTAAGCAATTCTCCTGCCTCAGCCTCCCGAGTAGCTAGGACTACAGGTGCTTGCCACCATGCCCAGCTAATTTTGGTATTTTTAGTAGAGACAGGGTTTCACCACGTTGGTCAGGCTGGTCTCAAACTCCTGACCTCAGATGATATACCTGCATTGGCCTCTTAAAGTTCTGGGATTTCAGGCATGAGCCACTGCAACCGGCCCATTCTTTTCTCTTTGCAGAGTGGCTTTCTTTGTTTTTCTTGTGCCTGATAGGAGAGGACACCCACCCCTACCGCCATCCCCCATAATGGCCCCAGGTGTACATGTCATCAGGTCCAGTGCTTGCAAGAGACAAGCTGGTGACTCTGTCCTGATTCCAGCTTCTCAGCTTAGGTGAAGTCCCACCAACCCCCGTTCAGGATAATGAGGATCTCTGGATCTAAGGCCAATAATGGATGACCGGTGCCACCCCCCAACCTAATGGGAGATGGTGTTCAGAGAAGAGGTGTGCTCCTCCACAGAAAACTGTAAAATCAAGGCTACGGTGGGGGATTGACATGATTAAACTGAGCTAGGAGTGA\n'
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/genome.fasta.gz
b
Binary file test-data/genome.fasta.gz has changed
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/genome.gtf
--- a/test-data/genome.gtf Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,860 +0,0 @@\n-#!genome-build GRCh38.p12\n-#!genome-version GRCh38\n-#!genome-date 2013-12\n-#!genome-build-accession NCBI:GCA_000001405.27\n-#!genebuild-last-updated 2018-01\n-9\tensembl_havana\tgene\t1\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding";\n-9\thavana\ttranscript\t1\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "2";\n-9\thavana\texon\t1\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001817525"; exon_version "1"; transcript_support_level "2";\n-9\thavana\texon\t1695\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001916817"; exon_version "1"; transcript_support_level "2";\n-9\tensembl_havana\ttranscript\t642\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\texon\t642\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00001809698"; exon_version "1"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\tCDS\t717\t800\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; protein_id "ENSP00000318177"; protein_version "9"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\tstart_codon\t717\t719\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\texon\t16519\t16624\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00003666938"; exon_version "1"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\tCDS\t16519\t16624\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "e'..b'NA";\n-22\tensembl\ttranscript\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; tag "basic"; transcript_support_level "NA";\n-22\tensembl\texon\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; exon_number "1"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; exon_id "ENSE00001875334"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n-22\thavana\tgene\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n-22\thavana\ttranscript\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n-22\thavana\texon\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; exon_number "1"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001782951"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n-22\thavana\tgene\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n-22\thavana\ttranscript\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n-22\thavana\texon\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; exon_number "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001526946"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n-22\thavana\tgene\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA";\n-22\thavana\ttranscript\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "3";\n-22\thavana\texon\t267202\t267377\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001785308"; exon_version "1"; tag "basic"; transcript_support_level "3";\n-22\thavana\texon\t268910\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "2"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001710203"; exon_version "1"; tag "basic"; transcript_support_level "3";\n'
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/genome.gtf.gz
b
Binary file test-data/genome.gtf.gz has changed
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/protein_domains.gff3
--- a/test-data/protein_domains.gff3 Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,83 +0,0 @@\n-9\tpfam\tprotein_domain\t33502\t33541\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t33992\t34063\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t35324\t35381\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t37391\t37409\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t37479\t37553\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t38833\t38931\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t41390\t41413\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t41489\t41494\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t43744\t43846\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t44647\t44729\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t47496\t47541\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t51664\t51812\t0\t+\t.\tName=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013\n-9\tpfam\tprotein_domain\t102331\t102396\t0\t+\t.\tName=Zinc finger%2C C2H2 type;color=#80FF00;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF00096\n-9\tpfam\tprotein_domain\t102412\t102480\t0\t+\t.\tName=C2H2-type zinc finger;color=#80FF80;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF13894\n-9\tpfam\tprotein_domain\t114903\t114949\t0\t+\t.\tName=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382\n-9\tpfam\tprotein_domain\t116528\t116596\t0\t+\t.\tName=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382\n-9\tpfam\tprotein_domain\t121951\t121971\t0\t+\t.\tName=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985\n-9\tpfam\tprotein_domain\t123179\t123300\t0\t+\t.\tName=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985\n-9\tpfam\tprotein_domain\t275219\t275273\t0\t+\t.\tName=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018\n-9\tpfam\tprotein_domain\t275837\t275922\t0\t+\t.\tName=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018\n-9\tpfam\tprotein_domain\t275962\t276132\t0\t+\t.\tName=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017\n-9\tpfam\tprotein_domain\t283799\t283855\t0\t+\t.\tName=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017\n-9\tpfam\tprotein_domain\t283973\t284071\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n-9\tpfam\tprotein_domain\t293165\t293249\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n-9\tpfam\tprotein_domain\t293896\t294073\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n-9\tpfam\tprotein_domain\t295904\t296088\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n-9\tpfam\tprotein_domain\t299451\t299603\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069\n-9\tpfam\tprotein_domain\t301104\t301156\t0\t+\t.\tName=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;ge'..b'd=PF00053\n-9\tpfam\tprotein_domain\t489945\t490067\t0\t+\t.\tName=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053\n-9\tpfam\tprotein_domain\t490710\t490856\t0\t+\t.\tName=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053\n-22\tpfam\tprotein_domain\t2420\t2524\t0\t-\t.\tName=Armadillo/beta-catenin-like repeat;color=#000080;gene_id=ENSG00000100218;gene_name=RSPH14;protein_domain_id=PF00514\n-22\tpfam\tprotein_domain\t36321\t37004\t0\t+\t.\tName=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503\n-22\tpfam\tprotein_domain\t63673\t63981\t0\t+\t.\tName=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503\n-22\tpfam\tprotein_domain\t90736\t90740\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t93060\t93112\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t93619\t93720\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t96554\t96622\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t98578\t98629\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t99484\t99565\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t99749\t99839\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t101465\t101502\t0\t+\t.\tName=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071\n-22\tpfam\tprotein_domain\t121553\t121771\t0\t+\t.\tName=Bcr-Abl oncoprotein oligomerisation domain;color=#FF0000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF09036\n-22\tpfam\tprotein_domain\t201581\t201640\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n-22\tpfam\tprotein_domain\t201941\t202126\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n-22\tpfam\tprotein_domain\t208994\t209101\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n-22\tpfam\tprotein_domain\t212118\t212178\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n-22\tpfam\tprotein_domain\t213667\t213719\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n-22\tpfam\tprotein_domain\t214220\t214312\t0\t+\t.\tName=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621\n-22\tpfam\tprotein_domain\t230954\t230999\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n-22\tpfam\tprotein_domain\t233127\t233224\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n-22\tpfam\tprotein_domain\t235610\t235741\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n-22\tpfam\tprotein_domain\t250010\t250018\t0\t+\t.\tName=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168\n-22\tpfam\tprotein_domain\t252302\t252422\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n-22\tpfam\tprotein_domain\t253473\t253607\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n-22\tpfam\tprotein_domain\t254554\t254659\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n-22\tpfam\tprotein_domain\t255138\t255228\t0\t+\t.\tName=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620\n'
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/read1.fastq.gz
b
Binary file test-data/read1.fastq.gz has changed
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/read2.fastq.gz
b
Binary file test-data/read2.fastq.gz has changed
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/test-cache/genome.fasta
--- a/test-data/test-cache/genome.fasta Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,4 +0,0 @@\n->22\n-TAAAGAGACTTAGCACATTTATTCACTCACAGAGGTGAATGAAGGGCTCAGGGTTTGAACTCGATGACACTGATGGCGATCCGGGCTGCCCGCTGTAAGGCTTCGGCCACTTGAGGCTTTTCGTAAGTCTCCACCTCCATGGCACGGAAAGTGGGCACGTGCGTCTGCAGGGCCTTGCGGCCCTCGGGGGCCTCTGCCAGCATGGTAAGGGCCTTGGTGGCATTCAGGCGCGCTATGGTCATGGGGGAGTGCAGCAGCTCCAGGAGCAGGCCGATGGCTTGTGCCTCCAGGGCCGCATACTTCCCTGCAGGCCACCAACACAAGGCGTTCCAAACAGCCCAAGGGGCCCTCTTCTCACCCCCTCTCACCCTAGCCCTCTCCTGACCCTCCTCCTTGTGCAGTCTCAAGGGGTTTATGCCTGGTTTATGCCCCACCAACATGCCCTCACCCTCAGGGCTCTTGCACCTTCGCTGAGTCCGGCCTGGGCCTGGCCTCTTCATCTCCTGCCTTTCCGGCCTCAACTGGCTCCTCTCCATCCACCCTCGTATCCTGCCCAGGAGTTCAAGACCAGCCTGGGCAACATAATGAGATTCCATCTCTACAAAAATTTAAAAATTATCCAGATGTAGTGACAGGTGCCTACAGTCCCAGCTACTCAGGAGACTGAGCGGGGAGGATCACTAGAGCCTGGGAGTTTGAGGCTGCAGTGATCGTGACATCGTACTCTAGCCTGGGTGACACACTGAGACACTGGCTCTTTAAAAAAAGTAAGCCCAGCTGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGGTGGATCACGAGGTCAGGAGATTGTGACCATCCTGGCTAACACAGTGAAACCCCGTCTCTATTAAAAATACAAAAAATTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGCGTGAACCTGGGAGGCGGAGCTTGCAGTGAGCCAAGATGGTGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCATCTCAAAAAAAAAAAAAAAGTAAACCCAATAGTTCATATATGTTGCCAATATTTCTTGAGCACCTACTCTGCACAAAACACACATGGTTCCTGAGAGATGCCCTCCTCAACCATTCGCTGCCCCTTTCTGCTGTCCTTGCCGGCCCTATGCTCCTGCAGTGCCCAGTGGACTCTCCCCTTTGCTGGTGGAACCTCTCAGCCTGGGCCCTGCCAACTCCCATTGTCCTTGGGATTGATCCCAAGCCCTTCCCAGAGGCGACTTGTCCCCTGTGTTTGCCCCACCCATGCTGGGCTTCTCCTCCACATCCCTCATCACTGCCTCCCAGGGCCTGGCACGTGGCAGGGGCTTCAGGACCCTCAGCTGGAGTGCCAGCTTGGGAATAAGTGGCCTCATCTCCCTGGGTCTCAGCCTCTTGTGTCTCGAGCCCCGTGGCCCTCCAAGCACTCCTACAGAAAGCCTGGACCGAGAAGGACACTGGGCTGAGGCTGCCCCAAGATAAACACGCAGGGGGAGCCCCTGCCACCGTGCAGTCAGCATCCTGACTACACCAGTAAAAGGAGGGGCTGTGGACTCCAAGAAAGCCCCAACCTCTCCCATCTGCATACCTCAAGCAGCATTAGGGTCAGGTGTGCAGGGCCTGGGAGGACCTGTCTTTGGGAGGCTGATTGCAAATCCAAGCCCAGATGGTCCTCCATGGAACTCAAAGGAAGCACAGGGCCCCAGGCTGGTGGGTGTGTAGGGGCCAGAGGGAAAGTCAAATGCTTTCAGGGGTCCTGGGAGTGCCACGCCCCAGTCTGCCAGGTGTGGAAACAGCCCAGGGTTCTTCCATGAGCCCAGGGTTGGGTGACTTAGAGGGGAAGCCTGACTGGCTGGGAGTCCCAAGGTAGGGGGCTCGGGGGAGATAATTCTGAATTTGCCCCCTCCTCCAGTTTAAGAAAAATAATTAGGATGAATAAACACCATAGAGTCAAGGGCTCCAGGGTTTGCTTAGACCTCAGGGTAACAGAATCATGGATGGCAAACTAATCAACCCTTGAGTCACAGCTCACAAGGGGCCTGGGTGGGAGGCGGGAATTCCAGGCCCAGGGAACAGACTGGTGCGAAGGCACAGAGGTCCTAATGTGAGTGACTGGTGAGGGGCCACTGTGCTGGGGAGAAGAGGGAGGGCTGGGCCTTGGACCCAACCTTGAAAGGACATCAGTGATAGGCCAAGGGGAGGTTTTTTTTTTTTTTGCAAAGTGTGGAGTTTGGGGGACGATACCCAGCCCCTGAGTACAGAAAGCTCATCACGGCTGCTAGCCTGCTAGGGTCTCCCTCCCTGCGGCACCCCCCACCGCCTCACCTTCAGTGATCACTGTGGCGAACATCAGGGCACCGGCAGCGTTAGACTTCACATGCTCCACTGGGTCTTTCAGCAGATGGACCAGGATGGGGATGACGTCAAAATGACACACCTGTTTCTTGCCCTCTCGAGATATGCTGGGGCAGAGAGGGAACAGAGAGGGGCTCTGCTTGGAAGGGAGAAGAGGCGCAAGGCCCAGGAGTGCCCCAGGGAGACACAAAAGAAATAATTGTGTGGGGGCTACCCCAGGTAGTCCCAGGACTGGTCCCCATGATCCAGGACTGGTCATGGCAGGGATGCTGGGCTGCAGAGGAGCATTGGTGTGAACGCGGCCCACTGCCAGTGAAGACCACGGGGTGACAGGGCCTCACTCTGAGCCAGCACACAGAAAAGAAATAGGGCATGTTCTCCGTGGCCCCAAAATGGGGAACAGCAGGACCCAGGGGTGTCAACCACAGGGAGACCTGTTTTGACTTTGCCTAAGAGCCAGAGCTGACAGTGATGGGCCAAGCTGCCCCCAACAGCAGCAGCTGCCGGGCACTGGGCAACTGCAGCTAAGGACTCAGACCAGGTAACTTTTAGGCCACTTACTCTGGCACTGAATGAAGTTGTGGAAATGAGGATGATGAACTCCCCAAAAGTTTCCTGAGAGAGGGTCAATTCTGTCCCCTAAAAAGAAAGGGCTCCCTGGGCCAGGAAACACTGTTCACAAGCCCCACAGGAATGCCACAGTGCAAAGACCCCTGTGTCACTTAGTTTAATCAACCTTTCCCAAATGTAGCCACCAAAGAACCCTCATTTTTGTAGAGCTCTTAATGACCTCAAAGACACAGGTGTTCCCTGGGAACACAGATTGGGAAATGAGGTTGGCCAGCCACTAGCTGTGACACTGGGAGCCTTTCAGGGCAGTGGGCCTCAACCCCGAAGCCCATGATCAAAGTTGCAGAGTCCTGGGCCCCACATGCAGGGATTCCCTGATTCGTTCACACAGCAAGACACTGAGACAAGAGCGATGCAGGGATTCCCCGATTTGTTCACAAGAGTGGGGACAAGAGCAGGGTTTCTCCAGCTCAGTGTTCTTTAGGGCCAGATAATTCTCAACTGGGAGCCAAAGCTCCCCCGCTGCAAACCCTGGGTTTAGGGCAGCAAACAGAGGAAAGCCCTGTCCTCAGGAGCTCCCCTCGGGGATGACACACAAAGTAACGGCAGGGATGGTGGTCACAGTGGACATGGGAGGACAGGGGACAGCAGGGAGGTTACAGTTTCAGAGAGAGAGAGCTCTCTCCTGGAAAGCCACTCTCATGGTCCCACAGAAAGGGGACCTCTGAGCAGACCTAAGAATGATGGGGCTGGGACAGGGCCCAGGAATCTGCATTTTCACAAACCTCCCGGTGGCTTGGGTCCGGGGCCCCACTTGCAGAAATACTGGCCTGAGCCTCTAGTGAGACTCTCAGAAGAGAAGCCAGGACTTAGGGAAGGCAAGAGGGTCTCCACTAAAGAGACATAGCCACAGCTAAGGCAGAGACTCTGGGGTTCGGTGACCAGCCTGTCCTCGTGAAAAGGGGCCCAGTGTGAGAACACAC'..b'GCAGGGCCTTGGCTCCCCTCTGCCTTCTCTTGCACACTGGCTCAGTGCTTGCAGCTGCTTGCCCAGGAAATCCAGGGCCTCGGGGATCCCAGGGGCCCAGTGGAATCCTGTGGGGTTAGAAGCAGCCACTTGGGGCTGGGCATGGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATCACTGGAGGTCAGGAGTTTGAGACCAGCCTGACCAACATGGTGAAACCCTGTCTCTACAAAAAATACAAAAATTAGGCAAGGCGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACGTGGTGAAACCCTGTCTCTACTAAAAATACAAAAAAATTAGCGGAGCATGGTGGCAGGCACCTGTAATCCCAGATACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGAGGCCACAGTAAGCTGAGATCGTACCACTGCACTCCAGCCTGGACAACAGAGCGAGACTCCGTCTCAATAAATAAATAAATGCATCCATCCATCCATACATACATACAAAAATTAGCTGGGCATGGTGGTGTGGGCCTGTGGTTTACCCCAGTGATTGCCCACATCCAGGCCCACAGGCGGCATCTGTAAACGTGTGCTGTCAGTGAATTGAGCCAGCATGTTCAAGCTGGCAAAAATCACAGGGGCCTTTCTCCCACACTGTTTTGTAAATGAGTAAACTAAGACTCAGAGAGGACTCACCTGAGGCCTTCTTTTTTAACAATAATAGTAAAACCATCAACAAAAATGACATTTGTCAAGTGCCACCCAGGAGGATGTTCTAGAGTCTGCCCATGGGCCCGTTGGGATGCTATGGGTCCTCACCCACCTCCCAGCCCTTCTGCTCCCCATCTTTCCCTGGTGGATGGTGGGCCATTGGAATCCTGGGAGGCCTGAGCTGGGGAGAGGCTCAGGGGCTGGGAGGTGCCCTGGGGCCTGCAGGGCTGCTGGCTCACAGTGAGGCTGTTTGTGCCCCACCACAGGGTCGCTGGACACCCATCAAGCCCCAGCCCAGGCCCTGAACGAGACTCAGTGGGCACTAGAACGCCTGAGGCTGCAGCTGGGCTCCCCGGGGTCCTTGCAGAGGAAACTCAGTCTGCTGGAGCAGGAATCCCAGCAGCAGGAGCTGCAGATCCAGGGCTTCGAGAGTGACCTCGCCGAGATCCGCGCCGACAAACAGAACCTGGAGGCCATTCTGCACAGCCTGCCCGAGAACTGTGCCAGCTGGCAGTGAGGGCTGCCCAGATCCCCGGCACACACTCCCCCACCTGCTGTTTACATGACCCAGGGGGTGCACACTACCCCACAGGTGTGCCCATACAGACATTCCCCGGAGCCGGCTGCTGTGAACTCGCCCCCGTGTGGATAGTCACTCCCTGCCGATTCTGTCTGTGGCTTCTTCCCTGCCAGCAGGACTGAGTGTGCGTACCCAGTTCACCTGGACATGAGTGCACACTCTCACCCCTGCACATGCATAAACGGGCACACCCCAGTGTCAATAACATACACACGTGAGGGTGCATGTCTGTGTGTATGACCCACACGTGTTCAAGTCTAATCCATCCAGTCAGCAGCTTACGGTCCACACACATTACAGTCCACAGCTGTTGTGAGAGCCACCTGTGTGCTGGACACCCTCTGGATGTTGGGCAAGTTGTTACATGAGATGCCCTGGGGTGCTACATCCACTCACTCCAGATAGCAGGGAGGTCTCAGCAGATCTGCAGAGATCAAGGGGGTCAGCAACAGCCAAAGCCCCTAGTCCCAGAGCTGGCTGCCCTCTGTTTCACAGCAGCTCCCTGACCTGTGTTGCTGCGTGCACTCCCTACAGCTCGACACAGCCAGGGGACCAACAGGCCAAGAAATGCAAGATCCCGGGAGGGTTCTTAGCAGCAGAATCTGAGGCCCAGAGACCCTGAGGCCGTGGCCAGGCCTGCTAGTCTGGCTAGAGCAAGGCCCATTCCTGGCGGGGGTGTCATTGCCTTCACCGGACGCTTCCCTCTCAGGGTCCTGGGACTGCACCAGATGCCCTGAGGGAATGGCCCACCCTGGCCTGTACCCACTTCAGCCTGTGATCTATCCAAAGAGCCAGGCCCAAAAGCGCCTAGGTCAGGGTGCTCAGGCTACCAGGAGCACGCCTCTGTGCCCCCGGCAACCCAGTTGACCTTTAATTGACGCTTTCCAGACCAGCCCTGCGGCACCACTTGCCATGCGGGAGGCCACCAGGGTGTGCAAGCCTGGCTGCCATTCCAGTCTGTCCTGTCTGGGAATCGCCCTGTGGCCAGGCCCGCATGCTGGCCTCTGCCCAGGACTCCTCAGCATTTCCTCTTGGCATCCCTCCCCTCTCCCAGACCCTCTTCCAGCAGATGGCAAGGCCTCGGCATTGGGAAGTCAGGCACCTCTGCGGGCCCAGCCCCCTCCCGTGGCTCCCCTGACAGGGGCAGGGGTAGGGCAGCAGCACAGACCAATTCCGTTGAACGTGGAAATAAAGGACCCTTTCACTGGGCAGGGTGGTGTGCCTCACCCTCCCCGGCTGGTGGGCAGCCAGGGCCCTGGCTGTGGGTGTGCATATGACACACCTAGTAGGTGGCCAGCATGTGGACCGGACGTTGGTAGGAAGGTGGCAAAAGCCGAGCTCGTGGCTGGGCCAGTACCTCCCATTAGAGGGCTTTGCTGGGGTTGTGTGATCACAGGTACCTACCCTGTCCTCTCAGGCACTTACCACGTAAAGCCTAGGAGCTGGTGAGTTGGAGGGGTGGGGTGCGGAGAGGCCCTCAGCTGACCTCTGGTTCAGGCTCGAGACGAACTCACAGCCAAGTGTCCGAGGATGGTGAGGAGCAGGGAGGGGCGCCATCCAGGAGGGGGATGGTGTGGGTGGGGCCTTGAAGGGTGGGGAGGCAGAGAAGGAAGCATTCCAGGCAAGAGGGTGGACAACAGTCCGGGGCCCGCAGGGTTGGGGCTCGGCCAGCTTGCATCACTCCAGGACCCCAGGTTGAATGGGGTGGGATGTTGGAGCTGCTCAGTCAGGGCTCTTGGCCGCAGGCCTCAAACCCCTCCTGAGGTGGTTTCAGCAGAAAAGGGGTGTTGGGAGGGTCGCTTGGAACCCTGGAGTAAAAACGGCTGCCACGTGTTGGAGATAGCCTAGGGAGGGGAGCCTGAGGCTTCCGGGATAGGTTGGCTTCCCTCTTCCCCCCTCCCGCCTCTCTTCTTGGTCTGTGTCTCTGCTCTCCTCTCCTGTATCTGCTTCGTTCTTTTCTCTTTATTTATTGATTTTTTTTGAGATGGAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCATGATCTCTGCCCACTGCATCCTCCACCTCCCAGGTTTAAGCAATTCTCCTGCCTCAGCCTCCCGAGTAGCTAGGACTACAGGTGCTTGCCACCATGCCCAGCTAATTTTGGTATTTTTAGTAGAGACAGGGTTTCACCACGTTGGTCAGGCTGGTCTCAAACTCCTGACCTCAGATGATATACCTGCATTGGCCTCTTAAAGTTCTGGGATTTCAGGCATGAGCCACTGCAACCGGCCCATTCTTTTCTCTTTGCAGAGTGGCTTTCTTTGTTTTTCTTGTGCCTGATAGGAGAGGACACCCACCCCTACCGCCATCCCCCATAATGGCCCCAGGTGTACATGTCATCAGGTCCAGTGCTTGCAAGAGACAAGCTGGTGACTCTGTCCTGATTCCAGCTTCTCAGCTTAGGTGAAGTCCCACCAACCCCCGTTCAGGATAATGAGGATCTCTGGATCTAAGGCCAATAATGGATGACCGGTGCCACCCCCCAACCTAATGGGAGATGGTGTTCAGAGAAGAGGTGTGCTCCTCCACAGAAAACTGTAAAATCAAGGCTACGGTGGGGGATTGACATGATTAAACTGAGCTAGGAGTGA\n'
b
diff -r 8c4c97fd0555 -r c58d1774c762 test-data/test-cache/genome.gtf
--- a/test-data/test-cache/genome.gtf Wed Oct 13 18:45:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,860 +0,0 @@\n-#!genome-build GRCh38.p12\n-#!genome-version GRCh38\n-#!genome-date 2013-12\n-#!genome-build-accession NCBI:GCA_000001405.27\n-#!genebuild-last-updated 2018-01\n-9\tensembl_havana\tgene\t1\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding";\n-9\thavana\ttranscript\t1\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "2";\n-9\thavana\texon\t1\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001817525"; exon_version "1"; transcript_support_level "2";\n-9\thavana\texon\t1695\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001916817"; exon_version "1"; transcript_support_level "2";\n-9\tensembl_havana\ttranscript\t642\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\texon\t642\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00001809698"; exon_version "1"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\tCDS\t717\t800\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; protein_id "ENSP00000318177"; protein_version "9"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\tstart_codon\t717\t719\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\texon\t16519\t16624\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00003666938"; exon_version "1"; tag "basic"; transcript_support_level "1";\n-9\tensembl_havana\tCDS\t16519\t16624\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "e'..b'NA";\n-22\tensembl\ttranscript\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; tag "basic"; transcript_support_level "NA";\n-22\tensembl\texon\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; exon_number "1"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; exon_id "ENSE00001875334"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n-22\thavana\tgene\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n-22\thavana\ttranscript\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n-22\thavana\texon\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; exon_number "1"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001782951"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n-22\thavana\tgene\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n-22\thavana\ttranscript\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n-22\thavana\texon\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; exon_number "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001526946"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n-22\thavana\tgene\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA";\n-22\thavana\ttranscript\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "3";\n-22\thavana\texon\t267202\t267377\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001785308"; exon_version "1"; tag "basic"; transcript_support_level "3";\n-22\thavana\texon\t268910\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "2"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001710203"; exon_version "1"; tag "basic"; transcript_support_level "3";\n'