Repository 'arriba'
hg clone https://toolshed.g2.bx.psu.edu/repos/jjohnson/arriba

Changeset 4:77021ad5037d (2021-10-09)
Previous changeset 3:2d32e6c86c48 (2021-10-08) Next changeset 5:005b200c8841 (2021-10-10)
Commit message:
"planemo upload for repository https://github.com/jj-umn/tools-iuc/tree/arriba/tools/arriba commit 5a62e8a2ec56a8ce00f89c8fbe61b3f1dffbbffd"
modified:
arriba.xml
arriba_download_reference.xml
arriba_get_filters.xml
test-data/Aligned.out.sam
added:
test-data/fusions.tsv
test-data/genome.fasta
test-data/genome.gtf
test-data/read1.fastq.gz
test-data/read2.fastq.gz
b
diff -r 2d32e6c86c48 -r 77021ad5037d arriba.xml
--- a/arriba.xml Fri Oct 08 20:44:25 2021 +0000
+++ b/arriba.xml Sat Oct 09 15:41:49 2021 +0000
[
@@ -20,7 +20,7 @@
     #end if
     ln -f -s '${input_params.right_fq}' ${read2} &&
     #if str($input_params.index.index_source) == "history"
-        #set $star_index_dir = $input_params.index.star_index.files_path
+        #set $star_index_dir = $input_params.index.star_index.extra_files_path
     #end if
     STAR 
     --runThreadN \${GALAXY_SLOTS:-1} 
@@ -56,29 +56,31 @@
 #end if
     -a '$genome_assembly'
     -g '$gtf'
-    #if '$blacklist'
+    #if $blacklist
         -b '$blacklist'
+    #else
+        -f 'blacklist'
     #end if
-    #if '$protein_domains'
+    #if $protein_domains
         -p '$protein_domains'
     #end if
-    #if '$known_fusions'
+    #if $known_fusions
         -k '$known_fusions'
     #end if
-    #if '$tags'
+    #if $tags
         -t '$tags'
     #end if
     -o fusions.tsv
     -O fusions.discarded.tsv 
 #if str($input_params.input_source) == "use_fastq"
-    && samtools sort -@ "$THREADS" -m 4G -T tmp -O bam Aligned.out.bam > Aligned.sortedByCoord.out.bam
+    && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam Aligned.out.bam > Aligned.sortedByCoord.out.bam
     && samtools index Aligned.sortedByCoord.out.bam
 #elif str($visualization.do_viz) == "yes"
-    && samtools sort -@ "$THREADS" -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam
+    && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam
     && samtools index Aligned.sortedByCoord.out.bam
 #end if
 #if str($visualization.do_viz) == "yes"
-draw_fusions.R \
+&& draw_fusions.R 
     --fusions=fusions.tsv 
     --alignments=Aligned.sortedByCoord.out.bam 
     --output=fusions.pdf 
@@ -87,7 +89,7 @@
     --cytobands='$visualization.cytobands'
     #end if
     #if '$protein_domains'
-    --proteinDomains=database/protein_domains_hg19_hs37d5_GRCh37_v2.1.0.gff3
+    --proteinDomains='$protein_domains'
     #end if
 #end if
 
@@ -129,11 +131,12 @@
         <param name="genome_assembly" argument="-a" type="data" format="fasta" label="genome assembly fasta"/>
         <param name="gtf" argument="-g" type="data" format="gtf" label="GTF file with gene annotation"/>
         <param name="blacklist" argument="-b" type="data" format="tabular" optional="true" label="File containing blacklisted ranges."/>
-        <param name="protein_domains" argument="-p" type="data" format="gff3" optional="true" label="File containing blacklisted ranges."/>
+        <param name="protein_domains" argument="-p" type="data" format="gff3" optional="true" label="File containing protein domains"/>
         <param name="known_fusions" argument="-k" type="data" format="tabular"  optional="true" label="File containing known fusions">
             <help><![CDATA[ file two TAB separated columns: five-prime region three-prime region ]]></help>
         </param>
-        <param name="tags" argument="-t" type="data" format="tabular" optional="true" label="File containing tag names for a fusion."/>
+        <param name="tags" argument="-t" type="data" format="tabular" optional="true" label="File containing tag names for a fusion."
+               help="This can be the known fusions if that input has a third column with a name"/>
         <conditional name="visualization">
             <param name="do_viz" type="select" label="Generate visualization">
                 <option value="yes">Yes</option>
@@ -152,10 +155,29 @@
         <data name="aligned_bam" format="bam" label="${tool.name} on ${on_string}: Aligned.bam" from_work_dir="Aligned.sortedByCoord.out.bam">
             <filter>input_params['input_source'] == "use_fastq"</filter>
         </data> 
-        <data name="fusions_png" format="png" label="${tool.name} on ${on_string}: fusions.pdf" from_work_dir="fusions.pdf">
+        <data name="fusions_pdf" format="pdf" label="${tool.name} on ${on_string}: fusions.pdf" from_work_dir="fusions.pdf">
             <filter>visualization['do_viz'] == "yes"</filter>
         </data> 
     </outputs>
+    <tests>
+        <!-- Test 1 - From exisitng BAM -->
+        <test> 
+            <conditional name="input_params">
+                <param name="input_source" value="use_star"/>
+                <param name="input" ftype="sam" value="Aligned.out.sam"/>
+            </conditional>
+            <param name="genome_assembly" ftype="fasta" value="genome.fasta"/>
+            <param name="gtf" ftype="gtf" value="genome.gtf"/>
+            <conditional name="visualization">
+                <param name="do_viz" value="no"/>
+            </conditional>
+            <output name="fusions">
+                <assert_contents>
+                    <has_text_matching expression="BCR\tABL1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
     <help><![CDATA[
 ** Arriba **
 
b
diff -r 2d32e6c86c48 -r 77021ad5037d arriba_download_reference.xml
--- a/arriba_download_reference.xml Fri Oct 08 20:44:25 2021 +0000
+++ b/arriba_download_reference.xml Sat Oct 09 15:41:49 2021 +0000
b
@@ -12,7 +12,7 @@
     \$REF_SCRIPT '$arriba_reference_name' &&
     cp *.fa*  '$genome_fasta' &&
     cp *.gtf*  '$genome_gtf' &&
-    mv STAR*/* '$star_index.extra_files_path'
+    mv STAR_index_* '$star_index.extra_files_path'
     ]]></command>
     <inputs>
         <param name="arriba_reference_name" type="select" label="Select reference">
b
diff -r 2d32e6c86c48 -r 77021ad5037d arriba_get_filters.xml
--- a/arriba_get_filters.xml Fri Oct 08 20:44:25 2021 +0000
+++ b/arriba_get_filters.xml Sat Oct 09 15:41:49 2021 +0000
[
@@ -34,11 +34,21 @@
         </param>
     </inputs>
     <outputs>
-        <data name="blacklist" format="tabular" label="${tool.name} ${arriba_reference_name} blacklist"/>
-        <data name="known_fusions" format="tabular" label="${tool.name} ${arriba_reference_name} known_fusions"/>
-        <data name="protein_domains" format="tabular" label="${tool.name} ${arriba_reference_name} protein_domains"/>
-        <data name="cytobands" format="tabular" label="${tool.name} ${arriba_reference_name} cytobands"/>
+        <data name="blacklist" format="tabular" label="${tool.name} ${arriba_reference_name} blacklist.tsv"/>
+        <data name="known_fusions" format="tabular" label="${tool.name} ${arriba_reference_name} known_fusions.tsv"/>
+        <data name="protein_domains" format="gff3" label="${tool.name} ${arriba_reference_name} protein_domains.gff3"/>
+        <data name="cytobands" format="tabular" label="${tool.name} ${arriba_reference_name} cytobands.tsv"/>
     </outputs>
+    <tests>
+        <test>
+            <param name="arriba_reference_name" value="GRCh38"/>
+            <output name="cytobands">
+                <assert_contents>
+                    <has_text_matching expression="1\t1\t\d+\tp36.33\tgneg"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
     <help><![CDATA[
 ** Arriba **
 
b
diff -r 2d32e6c86c48 -r 77021ad5037d test-data/Aligned.out.sam
--- a/test-data/Aligned.out.sam Fri Oct 08 20:44:25 2021 +0000
+++ b/test-data/Aligned.out.sam Sat Oct 09 15:41:49 2021 +0000
b
b'@@ -1,104 +1,82 @@\n @HD\tVN:1.4\tSO:coordinate\n-@SQ\tSN:1\tLN:248956422\n-@SQ\tSN:2\tLN:242193529\n-@SQ\tSN:3\tLN:198295559\n-@SQ\tSN:4\tLN:190214555\n-@SQ\tSN:5\tLN:181538259\n-@SQ\tSN:6\tLN:170805979\n-@SQ\tSN:7\tLN:159345973\n-@SQ\tSN:8\tLN:145138636\n-@SQ\tSN:9\tLN:138394717\n-@SQ\tSN:10\tLN:133797422\n-@SQ\tSN:11\tLN:135086622\n-@SQ\tSN:12\tLN:133275309\n-@SQ\tSN:13\tLN:114364328\n-@SQ\tSN:14\tLN:107043718\n-@SQ\tSN:15\tLN:101991189\n-@SQ\tSN:16\tLN:90338345\n-@SQ\tSN:17\tLN:83257441\n-@SQ\tSN:18\tLN:80373285\n-@SQ\tSN:19\tLN:58617616\n-@SQ\tSN:20\tLN:64444167\n-@SQ\tSN:21\tLN:46709983\n-@SQ\tSN:22\tLN:50818468\n-@SQ\tSN:X\tLN:156040895\n-@SQ\tSN:Y\tLN:57227415\n-@SQ\tSN:MT\tLN:16569\n-@PG\tID:STAR\tPN:STAR\tVN:2.7.8a\tCL:STAR   --runThreadN 12   --genomeDir /panfs/roc/website/galaxy.msi.umn.edu/galaxy/tool-data/rnastar/2.7.4a/GRCh38_canon/GRCh38_canon/dataset_1367616_files   --genomeLoad NoSharedMemory   --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat   /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat      --readFilesCommand zcat      --limitBAMsortRAM 122880000000   --outSAMtype BAM   SortedByCoordinate      --outSAMstrandField intronMotif   --outSAMattributes NH   HI   AS   nM   ch      --outSAMunmapped Within      --outSAMprimaryFlag OneBestScore   --outSAMmapqUnique 60   --outBAMsortingThreadN 12   --outBAMsortingBinsN 50   --outSAMattrIHstart 1   --winAnchorMultimapNmax 50   --chimSegmentMin 12   --chimOutType WithinBAM   Junctions      --chimOutJunctionFormat 1      --quantMode GeneCounts      --twopass1readsN 50000   --twopassMode Basic\n-@CO\tuser command line: STAR --runThreadN 12 --genomeLoad NoSharedMemory --genomeDir /panfs/roc/website/galaxy.msi.umn.edu/galaxy/tool-data/rnastar/2.7.4a/GRCh38_canon/GRCh38_canon/dataset_1367616_files --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --twopassMode Basic --twopass1readsN 50000  --quantMode GeneCounts --outSAMstrandField intronMotif --outSAMattrIHstart 1 --outSAMattributes NH HI AS nM ch --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outSAMunmapped Within --chimSegmentMin 12 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --winAnchorMultimapNmax 50 --limitBAMsortRAM 122880000000 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1\n-BCR-ABL1-76\t99\t9\t130854061\t60\t24S126M\t=\t130854103\t755\tCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTG\tCCCGGGGCGGGCGJJJJJGJJJGJJJJJJJJJGJ1JCJJGJGGJJJGJJGGJJJ8GGJJGGGJJ=GGCGGGGGG=GGCCGGG8GC=GGGG=GCGGCGGGGJGG=GGGG=GGGGGGGGCGGGGCCGGGCG=GG(G=GCGCCG1CCGGCGGG\tNH:i:1\tHI:i:1\tAS:i:274\tnM:i:1\tXS:A:+\tNM:i:1\n-BCR-ABL1-64\t99\t9\t130854061\t60\t6S144M\t=\t130854104\t756\tAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGG\tCCCGGGGGCGGGGGJJJGJJJGGJJJJJCJJJJGGJJJGJJGJGJG=GGJG=JJJJCGCCC==JGGCGGGCJG1CCCCGG8CGGGGGGGGCCGC=CGCGGJGGGGCGCGGGGGGGGCCGCGGGG=GCGGGGGGG=GGGGCGGGGGGCCGG\tNH:i:1\tHI:i:1\tAS:i:290\tnM:i:2\tXS:A:+\tNM:i:1\n-BCR-ABL1-54\t99\t9\t130854061\t60\t61S89M\t=\t130854061\t140\tCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTGAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTC\tCCCGGGGGGGGGGGJJJJJGJ=JJJJJJGJJJGGJJJJJJJCJJG8JJJGJJGJ=GG=JJJGGCGGCGGJGC(GGGGGCGC8CGGCGCCGGC=GGGCGGGJG1GGGGGG1CG=GGGGC=1G1CGGGGGCCGGGGCGG=CC=C=CGGGGG8\tNH:i:1\tHI:i:1\tAS:i:219\tnM:i:4\tNM:i:2\n-BCR-ABL1-54\t147\t9\t130854061\t60\t10S140M\t=\t130854061\t-140\tAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACGTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTT\t=GGGGGGGCCCCGCCGGG=G(GGGG=CGCGGCGCCGG=GGGGGCJJJ=GC8C1GGGGGCG8GCCGC=GGG1GCCGGJC8GCGGCGCGJGJJJG1CGJGG=CJJJGGGGJG=CJGJJJJCJCJJGGJJJJJGGJGGJJCGGGGGGGGG=CC\tNH:i:1\tHI:i:1\tAS:i:219\tnM:i:4\tNM:i:2\n-BCR-ABL1-'..b'CGC=GGGGGG1JJJGJGJJJCJJCGJJJJJJJJJJCGJJJJJJGGCCJ1JGCGGCGJ8GG(CJGJGJCGGCGG1CC=CG=GCC=GGGGG=GGCGCCG1CGGGGGGG1GGGGGCGGGCGCGGCGG811G8CCGGGGCGGGGGCCG=CGC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-16\t83\t9\t275888\t60\t150M\t=\t275840\t-198\tCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGC\tCGGGGGG=GGCGGGCGGGGGGGCGGGGCGGGGCCCG=CGGGGGCGGGCCCGG8GCCCGG8GCGGC=GCCCGGGGGGCGGGCGGJGGGCCGGJCGGJJGJGGGGGJJJJGJJGGJJGG=JGGJJGGJ=JGJJJJJJGJC1GGGG=GGG1C1\tNH:i:1\tHI:i:1\tAS:i:296\tnM:i:1\tNM:i:0\n+BCR-ABL1-32\t147\t9\t275894\t60\t150M\t=\t275850\t-194\tCCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGA\t8GCCCCGGCGCGC1GGGGCGGCCGGGGCG1(GG=GG=GCGGCGJCJJCGG8=GGGCCGCGGGCGGGGGC=GC1=GGGGGJGGCCGJJCGGGJJJCGGG8CCGGGCJGJGJJGCCGCJJJJJJJJJJGCGJJJGJJGJGGGGGCGGGG=CC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-38\t83\t9\t275900\t60\t150M\t=\t275854\t-196\tATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGA\tGCGCCGCGGGCGGGGGGGCGGCGCG8CGCGGGG8GGCGGGCCCCG8CC=JGCGGGGGGCGGGGCGCCGC=GCCCGGJGGGCGGGCJGCCJJGJG=GGCJJJGGJCGJCGCGJJJC=JJGJCJGJGJJGJJJ=JJJ1GGGGGGGGGGGCCC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-6\t147\t9\t275903\t60\t150M\t=\t275846\t-207\tGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCA\tCG8GCGGGCCCCGGCGCGGGGCCGGGCGG8CG=G=GCCGCGG=1CJ8JCCGCGGGGGCGGGGGGCGG=G=8GCGJG=GGGGGGJGCCGGJJGG=G=CJ8=JJJJGG=JJJJJGJJGJGJJGJJJJJCCCJJJGGJJJG1GGG1GGGGCCC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-70\t83\t9\t275911\t60\t150M\t=\t275874\t-187\tTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGC\tCGGGCGGGCGGGCCGGGG=GGG8GCGGG8GGCGGGCGGCGGGCGGGGG=JGGGGGCGCGGGGGCGGGGCGCG1GGGGCJC8GG=JGGJJCCCJJGGGGJGJGJGGJJGGJJJJJGCJJJJGGJJGJJGJJJGGGJJJGGGGCGGGGG=CC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-36\t147\t9\t275919\t60\t150M\t=\t275872\t-197\tAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGCCAGAGGTC\tC=GGGGGGCCGGGGGGGGG=GGCCGCGGGC1GCGC1GCGGCCGJJ(CCG8GCCGGGCCGGGC=CC1CG=CGCCGGG=CGGGGGGGGCGGGGGJ==JJJJJ1CJJJGJGGJCGCJGJGJGJJJJ=GG1CJJCGJG1GC=GGGCGCGGGCCC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n+BCR-ABL1-56\t147\t9\t275928\t60\t150M\t=\t275885\t-193\tCATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGCCAGAGGTCCATCTCGCT\tCCGGGCGCGCGGGCG=CCCGGCGCGGGGC=CGGCGGCCGCGGGJJJJCCGCCG(GCCCCCGGCCGGG=G8GGGGGGCC=C=CGGJGJJJGC=JGGJJJGJGJ1JJJGC=JJJG=JCJJJJJJJ=JJGGGJJJCGJJJGGGGGCGG=GCCC\tNH:i:1\tHI:i:1\tAS:i:298\tnM:i:0\tNM:i:0\n BCR-ABL1-74\t77\t*\t0\t0\t*\t*\t0\t0\tTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTC\tCC11GGGGGGGGGGCCJJJGCGJJGJJJJJGGGGGGJJJGGJG==GCJCJ=GGJJGGJJGGCJGG=GGGGGJGGJGC=GC=GGGCGGGCGGGGCCGCGGGJCGC=GGC8CGCGCGGGGGGCGCC1GGCGCC=GCCGCGGC8GCGGGCCCG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n BCR-ABL1-74\t141\t*\t0\t0\t*\t*\t0\t0\tCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAGGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG\tCCCGGGGGGCGCGJGGJJGGJGJJJGJGGJJGGJGJJ1=JCJJGGGJJJJGGGJGCCJGGJGG=J1JG8JGCGGGJG=GC1CGCCGGCG(GGCGGCGGGGGCJC1CCGC==CCGGGGCGGCGGGCCGGCGCGC8CCCCGGG=GGGC=GGG\tNH:i:0\tHI:i:0\tAS:i:155\tnM:i:2\tuT:A:1\n BCR-ABL1-66\t77\t*\t0\t0\t*\t*\t0\t0\tTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAAACCCATAGAG\tCCC=GGGGCGGGGJJJJJGJJJJ=JJJGJJ1GJJGJJJJJGJJJJJGGGGCGJJGGGJJJGGCGGGGJGCGG1JCGGG=GCCGCG=GC=G=GCCGGGGG8JGGGGGGGGGGGG=GGCGGC8GGCCGGGC=GGGGGGGGG=CGG=8GGCCG\tNH:i:0\tHI:i:0\tAS:i:159\tnM:i:0\tuT:A:1\n'
b
diff -r 2d32e6c86c48 -r 77021ad5037d test-data/fusions.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fusions.tsv Sat Oct 09 15:41:49 2021 +0000
b
@@ -0,0 +1,2 @@
+#gene1 gene2 strand1(gene/fusion) strand2(gene/fusion) breakpoint1 breakpoint2 site1 site2 type split_reads1 split_reads2 discordant_mates coverage1 coverage2 confidence reading_frame tags retained_protein_domains closest_genomic_breakpoint1 closest_genomic_breakpoint2 gene_id1 gene_id2 transcript_id1 transcript_id2 direction1 direction2 filters fusion_transcript peptide_sequence read_identifiers
+BCR ABL1 +/+ +/+ 22:23632600 9:133729451 CDS/splice-site CDS/splice-site translocation 4 7 0 4 12 high in-frame Mitelman Bcr-Abl_oncoprotein_oligomerisation_domain(100%),C2_domain(100%),PH_domain(100%),RhoGEF_domain(100%)|F-actin_binding(100%),Protein_kinase_domain(100%),SH2_domain(100%),SH3_domain(100%),Variant_SH3_domain(100%) . . ENSG00000186716.15 ENSG00000097007.13 ENST00000305877.8 ENST00000372348.2 downstream upstream . AGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAG___ATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAA|AAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAG___GTGAAAAGCTCCGGG SFSLTSVELQMLTNSCVKLQTVHSIPLTINKEDDESPGLYGFLNVIVHSATGFKQSS|kALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLR BCR-ABL1-10,BCR-ABL1-2,BCR-ABL1-24,BCR-ABL1-28,BCR-ABL1-58,BCR-ABL1-60,BCR-ABL1-76,BCR-ABL1-12,BCR-ABL1-18,BCR-ABL1-4,BCR-ABL1-66
b
diff -r 2d32e6c86c48 -r 77021ad5037d test-data/genome.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fasta Sat Oct 09 15:41:49 2021 +0000
b
b'@@ -0,0 +1,4 @@\n+>22\n+TAAAGAGACTTAGCACATTTATTCACTCACAGAGGTGAATGAAGGGCTCAGGGTTTGAACTCGATGACACTGATGGCGATCCGGGCTGCCCGCTGTAAGGCTTCGGCCACTTGAGGCTTTTCGTAAGTCTCCACCTCCATGGCACGGAAAGTGGGCACGTGCGTCTGCAGGGCCTTGCGGCCCTCGGGGGCCTCTGCCAGCATGGTAAGGGCCTTGGTGGCATTCAGGCGCGCTATGGTCATGGGGGAGTGCAGCAGCTCCAGGAGCAGGCCGATGGCTTGTGCCTCCAGGGCCGCATACTTCCCTGCAGGCCACCAACACAAGGCGTTCCAAACAGCCCAAGGGGCCCTCTTCTCACCCCCTCTCACCCTAGCCCTCTCCTGACCCTCCTCCTTGTGCAGTCTCAAGGGGTTTATGCCTGGTTTATGCCCCACCAACATGCCCTCACCCTCAGGGCTCTTGCACCTTCGCTGAGTCCGGCCTGGGCCTGGCCTCTTCATCTCCTGCCTTTCCGGCCTCAACTGGCTCCTCTCCATCCACCCTCGTATCCTGCCCAGGAGTTCAAGACCAGCCTGGGCAACATAATGAGATTCCATCTCTACAAAAATTTAAAAATTATCCAGATGTAGTGACAGGTGCCTACAGTCCCAGCTACTCAGGAGACTGAGCGGGGAGGATCACTAGAGCCTGGGAGTTTGAGGCTGCAGTGATCGTGACATCGTACTCTAGCCTGGGTGACACACTGAGACACTGGCTCTTTAAAAAAAGTAAGCCCAGCTGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGGTGGATCACGAGGTCAGGAGATTGTGACCATCCTGGCTAACACAGTGAAACCCCGTCTCTATTAAAAATACAAAAAATTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGCGTGAACCTGGGAGGCGGAGCTTGCAGTGAGCCAAGATGGTGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCATCTCAAAAAAAAAAAAAAAGTAAACCCAATAGTTCATATATGTTGCCAATATTTCTTGAGCACCTACTCTGCACAAAACACACATGGTTCCTGAGAGATGCCCTCCTCAACCATTCGCTGCCCCTTTCTGCTGTCCTTGCCGGCCCTATGCTCCTGCAGTGCCCAGTGGACTCTCCCCTTTGCTGGTGGAACCTCTCAGCCTGGGCCCTGCCAACTCCCATTGTCCTTGGGATTGATCCCAAGCCCTTCCCAGAGGCGACTTGTCCCCTGTGTTTGCCCCACCCATGCTGGGCTTCTCCTCCACATCCCTCATCACTGCCTCCCAGGGCCTGGCACGTGGCAGGGGCTTCAGGACCCTCAGCTGGAGTGCCAGCTTGGGAATAAGTGGCCTCATCTCCCTGGGTCTCAGCCTCTTGTGTCTCGAGCCCCGTGGCCCTCCAAGCACTCCTACAGAAAGCCTGGACCGAGAAGGACACTGGGCTGAGGCTGCCCCAAGATAAACACGCAGGGGGAGCCCCTGCCACCGTGCAGTCAGCATCCTGACTACACCAGTAAAAGGAGGGGCTGTGGACTCCAAGAAAGCCCCAACCTCTCCCATCTGCATACCTCAAGCAGCATTAGGGTCAGGTGTGCAGGGCCTGGGAGGACCTGTCTTTGGGAGGCTGATTGCAAATCCAAGCCCAGATGGTCCTCCATGGAACTCAAAGGAAGCACAGGGCCCCAGGCTGGTGGGTGTGTAGGGGCCAGAGGGAAAGTCAAATGCTTTCAGGGGTCCTGGGAGTGCCACGCCCCAGTCTGCCAGGTGTGGAAACAGCCCAGGGTTCTTCCATGAGCCCAGGGTTGGGTGACTTAGAGGGGAAGCCTGACTGGCTGGGAGTCCCAAGGTAGGGGGCTCGGGGGAGATAATTCTGAATTTGCCCCCTCCTCCAGTTTAAGAAAAATAATTAGGATGAATAAACACCATAGAGTCAAGGGCTCCAGGGTTTGCTTAGACCTCAGGGTAACAGAATCATGGATGGCAAACTAATCAACCCTTGAGTCACAGCTCACAAGGGGCCTGGGTGGGAGGCGGGAATTCCAGGCCCAGGGAACAGACTGGTGCGAAGGCACAGAGGTCCTAATGTGAGTGACTGGTGAGGGGCCACTGTGCTGGGGAGAAGAGGGAGGGCTGGGCCTTGGACCCAACCTTGAAAGGACATCAGTGATAGGCCAAGGGGAGGTTTTTTTTTTTTTTGCAAAGTGTGGAGTTTGGGGGACGATACCCAGCCCCTGAGTACAGAAAGCTCATCACGGCTGCTAGCCTGCTAGGGTCTCCCTCCCTGCGGCACCCCCCACCGCCTCACCTTCAGTGATCACTGTGGCGAACATCAGGGCACCGGCAGCGTTAGACTTCACATGCTCCACTGGGTCTTTCAGCAGATGGACCAGGATGGGGATGACGTCAAAATGACACACCTGTTTCTTGCCCTCTCGAGATATGCTGGGGCAGAGAGGGAACAGAGAGGGGCTCTGCTTGGAAGGGAGAAGAGGCGCAAGGCCCAGGAGTGCCCCAGGGAGACACAAAAGAAATAATTGTGTGGGGGCTACCCCAGGTAGTCCCAGGACTGGTCCCCATGATCCAGGACTGGTCATGGCAGGGATGCTGGGCTGCAGAGGAGCATTGGTGTGAACGCGGCCCACTGCCAGTGAAGACCACGGGGTGACAGGGCCTCACTCTGAGCCAGCACACAGAAAAGAAATAGGGCATGTTCTCCGTGGCCCCAAAATGGGGAACAGCAGGACCCAGGGGTGTCAACCACAGGGAGACCTGTTTTGACTTTGCCTAAGAGCCAGAGCTGACAGTGATGGGCCAAGCTGCCCCCAACAGCAGCAGCTGCCGGGCACTGGGCAACTGCAGCTAAGGACTCAGACCAGGTAACTTTTAGGCCACTTACTCTGGCACTGAATGAAGTTGTGGAAATGAGGATGATGAACTCCCCAAAAGTTTCCTGAGAGAGGGTCAATTCTGTCCCCTAAAAAGAAAGGGCTCCCTGGGCCAGGAAACACTGTTCACAAGCCCCACAGGAATGCCACAGTGCAAAGACCCCTGTGTCACTTAGTTTAATCAACCTTTCCCAAATGTAGCCACCAAAGAACCCTCATTTTTGTAGAGCTCTTAATGACCTCAAAGACACAGGTGTTCCCTGGGAACACAGATTGGGAAATGAGGTTGGCCAGCCACTAGCTGTGACACTGGGAGCCTTTCAGGGCAGTGGGCCTCAACCCCGAAGCCCATGATCAAAGTTGCAGAGTCCTGGGCCCCACATGCAGGGATTCCCTGATTCGTTCACACAGCAAGACACTGAGACAAGAGCGATGCAGGGATTCCCCGATTTGTTCACAAGAGTGGGGACAAGAGCAGGGTTTCTCCAGCTCAGTGTTCTTTAGGGCCAGATAATTCTCAACTGGGAGCCAAAGCTCCCCCGCTGCAAACCCTGGGTTTAGGGCAGCAAACAGAGGAAAGCCCTGTCCTCAGGAGCTCCCCTCGGGGATGACACACAAAGTAACGGCAGGGATGGTGGTCACAGTGGACATGGGAGGACAGGGGACAGCAGGGAGGTTACAGTTTCAGAGAGAGAGAGCTCTCTCCTGGAAAGCCACTCTCATGGTCCCACAGAAAGGGGACCTCTGAGCAGACCTAAGAATGATGGGGCTGGGACAGGGCCCAGGAATCTGCATTTTCACAAACCTCCCGGTGGCTTGGGTCCGGGGCCCCACTTGCAGAAATACTGGCCTGAGCCTCTAGTGAGACTCTCAGAAGAGAAGCCAGGACTTAGGGAAGGCAAGAGGGTCTCCACTAAAGAGACATAGCCACAGCTAAGGCAGAGACTCTGGGGTTCGGTGACCAGCCTGTCCTCGTGAAAAGGGGCCCAGTGTGAGAACACAC'..b'GCAGGGCCTTGGCTCCCCTCTGCCTTCTCTTGCACACTGGCTCAGTGCTTGCAGCTGCTTGCCCAGGAAATCCAGGGCCTCGGGGATCCCAGGGGCCCAGTGGAATCCTGTGGGGTTAGAAGCAGCCACTTGGGGCTGGGCATGGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATCACTGGAGGTCAGGAGTTTGAGACCAGCCTGACCAACATGGTGAAACCCTGTCTCTACAAAAAATACAAAAATTAGGCAAGGCGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACGTGGTGAAACCCTGTCTCTACTAAAAATACAAAAAAATTAGCGGAGCATGGTGGCAGGCACCTGTAATCCCAGATACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGAGGCCACAGTAAGCTGAGATCGTACCACTGCACTCCAGCCTGGACAACAGAGCGAGACTCCGTCTCAATAAATAAATAAATGCATCCATCCATCCATACATACATACAAAAATTAGCTGGGCATGGTGGTGTGGGCCTGTGGTTTACCCCAGTGATTGCCCACATCCAGGCCCACAGGCGGCATCTGTAAACGTGTGCTGTCAGTGAATTGAGCCAGCATGTTCAAGCTGGCAAAAATCACAGGGGCCTTTCTCCCACACTGTTTTGTAAATGAGTAAACTAAGACTCAGAGAGGACTCACCTGAGGCCTTCTTTTTTAACAATAATAGTAAAACCATCAACAAAAATGACATTTGTCAAGTGCCACCCAGGAGGATGTTCTAGAGTCTGCCCATGGGCCCGTTGGGATGCTATGGGTCCTCACCCACCTCCCAGCCCTTCTGCTCCCCATCTTTCCCTGGTGGATGGTGGGCCATTGGAATCCTGGGAGGCCTGAGCTGGGGAGAGGCTCAGGGGCTGGGAGGTGCCCTGGGGCCTGCAGGGCTGCTGGCTCACAGTGAGGCTGTTTGTGCCCCACCACAGGGTCGCTGGACACCCATCAAGCCCCAGCCCAGGCCCTGAACGAGACTCAGTGGGCACTAGAACGCCTGAGGCTGCAGCTGGGCTCCCCGGGGTCCTTGCAGAGGAAACTCAGTCTGCTGGAGCAGGAATCCCAGCAGCAGGAGCTGCAGATCCAGGGCTTCGAGAGTGACCTCGCCGAGATCCGCGCCGACAAACAGAACCTGGAGGCCATTCTGCACAGCCTGCCCGAGAACTGTGCCAGCTGGCAGTGAGGGCTGCCCAGATCCCCGGCACACACTCCCCCACCTGCTGTTTACATGACCCAGGGGGTGCACACTACCCCACAGGTGTGCCCATACAGACATTCCCCGGAGCCGGCTGCTGTGAACTCGCCCCCGTGTGGATAGTCACTCCCTGCCGATTCTGTCTGTGGCTTCTTCCCTGCCAGCAGGACTGAGTGTGCGTACCCAGTTCACCTGGACATGAGTGCACACTCTCACCCCTGCACATGCATAAACGGGCACACCCCAGTGTCAATAACATACACACGTGAGGGTGCATGTCTGTGTGTATGACCCACACGTGTTCAAGTCTAATCCATCCAGTCAGCAGCTTACGGTCCACACACATTACAGTCCACAGCTGTTGTGAGAGCCACCTGTGTGCTGGACACCCTCTGGATGTTGGGCAAGTTGTTACATGAGATGCCCTGGGGTGCTACATCCACTCACTCCAGATAGCAGGGAGGTCTCAGCAGATCTGCAGAGATCAAGGGGGTCAGCAACAGCCAAAGCCCCTAGTCCCAGAGCTGGCTGCCCTCTGTTTCACAGCAGCTCCCTGACCTGTGTTGCTGCGTGCACTCCCTACAGCTCGACACAGCCAGGGGACCAACAGGCCAAGAAATGCAAGATCCCGGGAGGGTTCTTAGCAGCAGAATCTGAGGCCCAGAGACCCTGAGGCCGTGGCCAGGCCTGCTAGTCTGGCTAGAGCAAGGCCCATTCCTGGCGGGGGTGTCATTGCCTTCACCGGACGCTTCCCTCTCAGGGTCCTGGGACTGCACCAGATGCCCTGAGGGAATGGCCCACCCTGGCCTGTACCCACTTCAGCCTGTGATCTATCCAAAGAGCCAGGCCCAAAAGCGCCTAGGTCAGGGTGCTCAGGCTACCAGGAGCACGCCTCTGTGCCCCCGGCAACCCAGTTGACCTTTAATTGACGCTTTCCAGACCAGCCCTGCGGCACCACTTGCCATGCGGGAGGCCACCAGGGTGTGCAAGCCTGGCTGCCATTCCAGTCTGTCCTGTCTGGGAATCGCCCTGTGGCCAGGCCCGCATGCTGGCCTCTGCCCAGGACTCCTCAGCATTTCCTCTTGGCATCCCTCCCCTCTCCCAGACCCTCTTCCAGCAGATGGCAAGGCCTCGGCATTGGGAAGTCAGGCACCTCTGCGGGCCCAGCCCCCTCCCGTGGCTCCCCTGACAGGGGCAGGGGTAGGGCAGCAGCACAGACCAATTCCGTTGAACGTGGAAATAAAGGACCCTTTCACTGGGCAGGGTGGTGTGCCTCACCCTCCCCGGCTGGTGGGCAGCCAGGGCCCTGGCTGTGGGTGTGCATATGACACACCTAGTAGGTGGCCAGCATGTGGACCGGACGTTGGTAGGAAGGTGGCAAAAGCCGAGCTCGTGGCTGGGCCAGTACCTCCCATTAGAGGGCTTTGCTGGGGTTGTGTGATCACAGGTACCTACCCTGTCCTCTCAGGCACTTACCACGTAAAGCCTAGGAGCTGGTGAGTTGGAGGGGTGGGGTGCGGAGAGGCCCTCAGCTGACCTCTGGTTCAGGCTCGAGACGAACTCACAGCCAAGTGTCCGAGGATGGTGAGGAGCAGGGAGGGGCGCCATCCAGGAGGGGGATGGTGTGGGTGGGGCCTTGAAGGGTGGGGAGGCAGAGAAGGAAGCATTCCAGGCAAGAGGGTGGACAACAGTCCGGGGCCCGCAGGGTTGGGGCTCGGCCAGCTTGCATCACTCCAGGACCCCAGGTTGAATGGGGTGGGATGTTGGAGCTGCTCAGTCAGGGCTCTTGGCCGCAGGCCTCAAACCCCTCCTGAGGTGGTTTCAGCAGAAAAGGGGTGTTGGGAGGGTCGCTTGGAACCCTGGAGTAAAAACGGCTGCCACGTGTTGGAGATAGCCTAGGGAGGGGAGCCTGAGGCTTCCGGGATAGGTTGGCTTCCCTCTTCCCCCCTCCCGCCTCTCTTCTTGGTCTGTGTCTCTGCTCTCCTCTCCTGTATCTGCTTCGTTCTTTTCTCTTTATTTATTGATTTTTTTTGAGATGGAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCATGATCTCTGCCCACTGCATCCTCCACCTCCCAGGTTTAAGCAATTCTCCTGCCTCAGCCTCCCGAGTAGCTAGGACTACAGGTGCTTGCCACCATGCCCAGCTAATTTTGGTATTTTTAGTAGAGACAGGGTTTCACCACGTTGGTCAGGCTGGTCTCAAACTCCTGACCTCAGATGATATACCTGCATTGGCCTCTTAAAGTTCTGGGATTTCAGGCATGAGCCACTGCAACCGGCCCATTCTTTTCTCTTTGCAGAGTGGCTTTCTTTGTTTTTCTTGTGCCTGATAGGAGAGGACACCCACCCCTACCGCCATCCCCCATAATGGCCCCAGGTGTACATGTCATCAGGTCCAGTGCTTGCAAGAGACAAGCTGGTGACTCTGTCCTGATTCCAGCTTCTCAGCTTAGGTGAAGTCCCACCAACCCCCGTTCAGGATAATGAGGATCTCTGGATCTAAGGCCAATAATGGATGACCGGTGCCACCCCCCAACCTAATGGGAGATGGTGTTCAGAGAAGAGGTGTGCTCCTCCACAGAAAACTGTAAAATCAAGGCTACGGTGGGGGATTGACATGATTAAACTGAGCTAGGAGTGA\n'
b
diff -r 2d32e6c86c48 -r 77021ad5037d test-data/genome.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.gtf Sat Oct 09 15:41:49 2021 +0000
b
b'@@ -0,0 +1,860 @@\n+#!genome-build GRCh38.p12\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.27\n+#!genebuild-last-updated 2018-01\n+9\tensembl_havana\tgene\t1\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding";\n+9\thavana\ttranscript\t1\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "2";\n+9\thavana\texon\t1\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001817525"; exon_version "1"; transcript_support_level "2";\n+9\thavana\texon\t1695\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001916817"; exon_version "1"; transcript_support_level "2";\n+9\tensembl_havana\ttranscript\t642\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\texon\t642\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00001809698"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\tCDS\t717\t800\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; protein_id "ENSP00000318177"; protein_version "9"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\tstart_codon\t717\t719\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\texon\t16519\t16624\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00003666938"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\tCDS\t16519\t16624\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "e'..b'NA";\n+22\tensembl\ttranscript\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; tag "basic"; transcript_support_level "NA";\n+22\tensembl\texon\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; exon_number "1"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; exon_id "ENSE00001875334"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n+22\thavana\tgene\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+22\thavana\ttranscript\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+22\thavana\texon\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; exon_number "1"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001782951"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+22\thavana\tgene\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+22\thavana\ttranscript\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+22\thavana\texon\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; exon_number "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001526946"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+22\thavana\tgene\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA";\n+22\thavana\ttranscript\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "3";\n+22\thavana\texon\t267202\t267377\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001785308"; exon_version "1"; tag "basic"; transcript_support_level "3";\n+22\thavana\texon\t268910\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "2"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001710203"; exon_version "1"; tag "basic"; transcript_support_level "3";\n'
b
diff -r 2d32e6c86c48 -r 77021ad5037d test-data/read1.fastq.gz
b
Binary file test-data/read1.fastq.gz has changed
b
diff -r 2d32e6c86c48 -r 77021ad5037d test-data/read2.fastq.gz
b
Binary file test-data/read2.fastq.gz has changed