Repository 'te_finder'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/te_finder

Changeset 0:838fb3a1678f (2022-08-08)
Next changeset 1:2edb80d68a1b (2022-09-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
added:
TEfinder
TEfinder.xml
test-data/DiscordantReads.bam
test-data/List_of_TEs.txt
test-data/TEinsertions.bed
test-data/TEinsertions.gtf
test-data/TEs.gtf
test-data/reference.fa
test-data/sample.bam
b
diff -r 000000000000 -r 838fb3a1678f TEfinder
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/TEfinder Mon Aug 08 19:41:18 2022 +0000
[
b'@@ -0,0 +1,333 @@\n+#!/usr/bin/env bash\n+\n+##\n+##\n+## Authors: Vista Sohrab & Dilay Hazal Ayhan\n+## Date: January 15, 2021\n+## Description: TEfinder uses discordant reads to detect novel transposable element insertion events in short read paired-end sample sequencing data. \n+##              Software dependencies include bedtools 2.28.0 or later, samtools 1.3 or later, picard 2.0.1 or later\n+##              Required inputs include sample alignment file (.bam|.sam), reference genome FASTA (.fa), reference TE annotation in GFF/GTF or GFF3 (.gff|.gtf), and TEs of interest (.txt)\n+##\n+## University of Massachusetts Amherst\n+##\n+##\n+##\n+##\n+\n+set -e\n+\n+margs=4\n+\n+# Functions\n+function example {\n+     echo -e "example: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt"\n+}\n+\n+function help {\n+     echo -e "REQUIRED:"\n+     echo -e "  -alignment,  --alignmentFile       STR  sample reads aligned to reference genome (BAM/SAM file)"\n+     echo -e "  -fa,         --FastaFile           STR  reference genome FASTA index (FA file)"\n+     echo -e "  -gtf,        --TransposonsInGenome STR  reference genome TE annotation (GFF2/GTF file)"\n+     echo -e "  -te,         --TransposonsToSearch STR  TE names (single column text file)\\n"\n+     echo -e "OPTIONAL:"\n+     echo -e "  -bamo,       --DiscordantReads     STR  BAM output\\n"\n+     echo -e "  -bedo,       --bTEinsertions       STR  TEinsertions BED output\\n"\n+     echo -e "  -gtfo,       --gTEinsertions       STR  TEinsertions GTF output\\n"\n+     echo -e "  -fis,        --FragmentInsertSize  INT  short-read sequencing fragment insert size [400]"\n+     echo -e "  -picard,     --pathToPicardjar     STR  path to picard tools .jar file [picard.jar]"\n+     echo -e "  -md,         --MaxDistanceForMerge INT  maximum distance between reads for bedtools merge [150]"\n+     echo -e "  -k,          --MaxTSDLength        INT  maximum TE target site duplication (TSD) length [20]"\n+     echo -e "  -maxHeapMem, --MaxHeapMemory       INT  java maximum heap memory allocation for picard in Mb [2000]" \n+     echo -e "  -workingdir, --WorkingDirectory    STR  working directory name [TEfinder_<Date>]"\n+     echo -e "  -out,        --OutputFormat        STR  output format as GTF [BED]"\n+     echo -e "  -outname,    --OutputName          STR  output name prefix added to file names [null]"\n+     echo -e "  -threads,    --Threads             INT  number of threads for samtools multi-threading [1]"\n+     echo -e "  -intermed    --IntermediateFiles   STR  keep intermediate files created by pipeline [no]"  \n+     echo -e "  -h,          --help                     prints help\\n"\n+     example\n+}\n+\n+# check if mandatory args are empty\n+function margs_check {\n+     if [ $# -lt $margs ]; then\n+          echo -e "One or more required parameters are missing."\n+          example\n+          exit 1 # error\n+     fi\n+}\n+\n+# main workflow\n+ #### : comment out\n+function pipeline() {\n+     mkdir ${workingdir}/${line}\n+     currdir=${workingdir}/${line}\n+     echo -e $(date) " Transposon analysis for "${line}" has started\\n"\n+\n+     grep -P \'[^(\\w|\\d|\\-|\\_|\\#|\\.)]\'${line}\'[^(\\w|\\d|\\-|\\_|\\#|\\.)]\' $gtf > ${currdir}/${line}_TE.gff\n+     echo -e $(date) " Individual TE GFF has been created for "${line}"\\n" ####\n+     \n+     bedtools intersect -abam ${workingdir}/${outname}Alignments.bam -b ${currdir}/${line}_TE.gff -wa > ${currdir}/${line}_MappedReadsToTE.bam\n+     echo -e $(date) " Mapped reads to TE via bedtools intersect has been completed for "${line}"\\n" ####\n+     samtools view -@ $threads ${currdir}/${line}_MappedReadsToTE.bam | \\\n+          awk -v Ins=`expr $fis \\* 10` \'{if (($7 != "=") || ($9 > Ins) || ($9 < -Ins)) print $1}\' > ${currdir}/${line}_ReadID.txt\n+     echo -e $(date) " Identifying discordant read IDs has been completed for "${line}"\\n" ####\n+     \n+\t # if discordant readID file exists, then continue with remainder of TE analysis\n+\t if  [[ -s  ${currdir}/${line}_ReadID.txt ]]\n+\t then\n+\t\t  #j'..b'e been removed. \\n"\n+\n+# run pipeline for each TE\n+while IFS="" read -r line || [ -n "$line" ]\n+do\n+     pipeline &\n+done < ${workingdir}/userTE_noEmptyLines.txt\n+wait\n+echo -e $(date) " All transposons are processed. Finalizing...\\n"\n+\n+# combine discordant bam files\n+samtools merge -@ $threads -r ${workingdir}/${outname}DiscordantReads.bam ${workingdir}/*/*_DiscordantPairs.bam\n+echo -e $(date) " BAM Output: Discordant pair alignment file is now available.\\n"\n+# Sorting by position \n+samtools sort -@ $threads ${workingdir}/${outname}DiscordantReads.bam | samtools view -h -o ${workingdir}/${outname}DiscordantReads.sam\n+grep -v \'^@PG\' ${workingdir}/${outname}DiscordantReads.sam > ${workingdir}/${outname}DiscordantReadsNoPG.sam\n+rm ${workingdir}/${outname}DiscordantReads.sam\n+samtools view -hb -x "PG" --no-PG --remove-flags "PG" -O BAM ${workingdir}/${outname}DiscordantReadsNoPG.sam -o ${bamo}\n+rm ${workingdir}/${outname}DiscordantReadsNoPG.sam\n+\n+# update output BED file with TEfinder results: organize the starting file\n+awk \'{print $2"\\t"$3"\\t"$4"\\t"$1"\\t"$8+$12"\\t.\\tFR="$8";RR="$12";InsRegion="$6"-"$11";FILTER="}\' ${workingdir}/insertions.txt > ${workingdir}/TEinsertions_putative.bed\n+# find the entries in repeat regions for filtering\n+bedtools intersect -wa -u -a ${workingdir}/TEinsertions_putative.bed -b $gtf > ${workingdir}/TEinsertions_putative_inrepeat.bed\n+# filtering process\n+while IFS="" read -r line || [ -n "$line" ]\n+do\n+\t#located in repeat region\n+\tif (grep -Fxq "$line" "${workingdir}/TEinsertions_putative_inrepeat.bed")\n+\tthen\n+        line=$line"in_repeat,"\n+\tfi\n+\t\n+\t#weak evidence\n+\treadc=$(echo $line | awk \'{print $5}\')\n+\tif (( $readc < 10 ))\n+\tthen\n+\tline=$line"weak_evidence,"\n+\tfi\n+\t\n+\t#strand-biased\t\n+\tFR=$(echo $line | grep -o \'FR=[[:digit:]]*\' | cut -f2 -d\'=\')\n+\tRR=$(echo $line | grep -o \'RR=[[:digit:]]*\' | cut -f2 -d\'=\')\n+\tvar1=$(echo \'e(l(\'$FR\')*1.25)\' | bc -l)\n+\tvar2=$(echo \'e(l(\'$FR\')*0.8)\' | bc -l)\n+\t\n+\tif [ $(echo "$RR > $var1" | bc) -eq 1 ] || [ $(echo "$RR < $var2" | bc) -eq 1 ]\n+\tthen\n+\tline=$line"strand_bias,"\n+\tfi\n+\t\n+\t#pass\n+\tlastchar=${line: -1}\n+\tif [ $lastchar == "," ]\n+\tthen\n+\tline=${line::${#line}-1}\n+\telse\n+\tline=$line"PASS"\n+\tfi\n+\t\n+\t#write to final output\n+\tprintf "%s\\n" "$line" >> ${workingdir}/${outname}TEinsertions.bed\n+     \n+done < ${workingdir}/TEinsertions_putative.bed\n+wait\n+echo -e $(date) " BED Output: TEfinder output BED file is now available.\\n"\n+# Sorting\n+# cp ${workingdir}/${outname}TEinsertions.bed ${outo}\n+bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.bed > ${bedo}\n+# cat ${bedo}\n+\n+# gtf option - create output GTF files with TEfinder results\n+if [ ! -z "$out" ]\n+then\n+  awk \'FNR > 1 {print $1"\\tTEfinder\\tTIP\\t"$2 + 1"\\t"$3"\\t"$5"\\t.\\t.\\tte_name \\""$4"\\"; tags \\""$7"\\""}\' ${workingdir}/${outname}TEinsertions.bed > ${workingdir}/${outname}TEinsertions.gtf\n+  bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.gtf > ${gtfo}\n+  # awk \'FNR > 1 {print $1"\\tTEfinder\\tTIP\\t"$2 + 1"\\t"$3"\\t"$5"\\t.\\t.\\tte_name \\""$4"\\"; tags \\""$7"\\""}\' ${bedo} > ${gtfo}\n+  # Sorting\n+  # cp ${workingdir}/${outname}TEinsertions.gtf ${gtfo}\n+  echo -e "\\n\\n"\n+  # cat ${gtfo}\n+  # bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.gtf > ${gtfo}\n+  echo -e $(date) " GTF Output: TEfinder output GTF file is now available.\\n"\n+fi\n+\n+# clean working directory\n+if [ -z "$intermed" ]\n+then\n+ rm ${workingdir}/TEinsertions_putative.bed ${workingdir}/TEinsertions_putative_inrepeat.bed ${workingdir}/reference.fa ${workingdir}/reference.fa.fai \\\n+ ${workingdir}/alignmentInput.sorted.bam ${workingdir}/insertions.txt ${workingdir}/${outname}Alignments.bam ${workingdir}/userTE_noEmptyLines.txt \n+ rm -r ${workingdir}/*/\n+fi\n+\n+if [ `wc -l <${workingdir}/${outname}TEinsertions.bed` -le "1" ]\n+then\n+ echo -e $(date) " Error: TEfinder run unsuccessful."\n+else\n+ echo -e $(date) " TE insertion output files have been created. TEfinder completed successfully."\n+fi\n'
b
diff -r 000000000000 -r 838fb3a1678f TEfinder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/TEfinder.xml Mon Aug 08 19:41:18 2022 +0000
[
@@ -0,0 +1,114 @@
+<tool id="te_finder" name="TEfinder" version="1.0.1" profile="21.05">
+    <description>Transposable element insertions finder</description>
+
+    <requirements>
+        <requirement type="package" version="1.15.1">samtools</requirement>
+        <requirement type="package" version="2.30.0">bedtools</requirement>
+        <requirement type="package" version="2.27.4">picard</requirement>
+        <requirement type="package" version="3.4">grep</requirement>
+        <requirement type="package" version="1.07.1">bc</requirement>
+    </requirements>
+
+    <command>
+        <![CDATA[
+            '$__tool_directory__/TEfinder' -fa '$required_inputs.FastaFile' 
+            -alignment '$required_inputs.alignmentFile' 
+            -gtf '$required_inputs.TransposonsInGenome' 
+            -te '$required_inputs.TransposonsToSearch' 
+            -bamo '$discordantreads' 
+            -bedo '$bteinsertion' 
+            -threads '\${GALAXY_SLOTS:-1}' 
+            -fis $($advanced_options.FragmentInsertSize) 
+            -md $($advanced_options.MaxDistanceForMerge) 
+            -k $($advanced_options.MaxTSDLength)
+            #if str( $advanced_options.OutFormat) == "gtf":
+            -gtfo '$gteinsertion'
+            -out $($advanced_options.OutFormat)
+            #end if
+        ]]>
+    </command>
+
+    <inputs>
+        <!-- <param format="fasta" name="input" type="data" label="Source file"/> -->
+        <section name="required_inputs" title="Required Inputs" expanded="True">
+            <param name="FastaFile" type="data" format="fasta" label="Select reference genome FASTA index (FA/FASTA file)" />
+            <param name="alignmentFile" type="data" format="bam" label="Select sample reads aligned to reference genome (BAM/SAM file)" />
+            <param name="TransposonsInGenome" type="data" format="gtf" label="Select reference genome TE annotation (GFF/GTF file)" />
+            <param name="TransposonsToSearch" type="data" format="text" label="Select TE names" help="Single column text file" />
+        </section>
+        <!-- Advanced Options  -->
+        <section name="advanced_options" title="Advanced Options" expanded="False">
+            <param name="FragmentInsertSize" argument="-fis" type="integer" min="0" value="400" label="Short-read sequencing fragment insert size" />
+            <param name="MaxDistanceForMerge" argument="-md" type="integer" min="0" value="150" label="Maximum distance between reads for bedtools merge" />
+            <param name="MaxTSDLength" argument="-k" type="integer" min="0" value="20" label="Maximum TE target site duplication (TSD) length" />
+            <param name="OutFormat" argument="-out" type="select" display="radio" label="Select output format as BED [GTF]" help="See help below for more details">
+                <option value="bed" selected="True">Default format is BED</option>
+                <option value="gtf">Other available format is GTF (-out gtf)</option>
+            </param>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data format="bed" name="bteinsertion" label="${tool.name} on ${on_string}: BED" />
+        <data format="gtf" name="gteinsertion" label="${tool.name} on ${on_string}: GTF">
+            <filter>advanced_options['OutFormat'] and 'gtf' in advanced_options['OutFormat']</filter>
+        </data>
+        <data format="bam" name="discordantreads" />
+    </outputs>
+
+    <tests>
+        <!-- Test for the most simple case for BED output : Running TEfinder with a .bam file and a .fasta file -->
+        <test expect_num_outputs="2">
+            <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt -->
+            <param name="FastaFile" ftype="fasta" value="reference.fa"/>
+            <param name="alignmentFile" ftype="bam" value="sample.bam"/>
+            <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/>
+            <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/>
+            <param name="OutFormat" value="bed" />
+            <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/>
+            <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/>  
+        </test>
+
+        <!-- Test for the GTF output -->
+        <test expect_num_outputs="3">
+            <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt -out gtf -->
+            <param name="FastaFile" ftype="fasta" value="reference.fa"/>
+            <param name="alignmentFile" ftype="bam" value="sample.bam"/>
+            <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/>
+            <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/>
+            <param name="OutFormat" value="gtf" />
+            <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/>
+            <output name="gteinsertion" file="TEinsertions.gtf" ftype="gtf"/>
+            <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/>
+        </test>
+    </tests>
+
+    <help>
+        <![CDATA[
+        A bioinformatics tool for detecting novel transposable element insertions
+
+        Authors: Vista Sohrab & Dilay Hazal Ayhan
+
+        TEfinder uses discordant reads to detect novel transposable element insertion events in paired-end sample sequencing data.
+
+        **Output files**::
+
+            TE_insertions.bed contains identified TE insertion events in sample (in the final column, FILTER attribute with "PASS" refers to high confidence insertion events while instances labeled as "in_repeat", "weak_evidence", "strand bias" or a combination of these three labels indicate less confident insertion events)
+            
+            TE_insertions.gtf is provided with the same information as the BED file if using -out GTF
+            
+            DiscordantReads.bam contains all discordant reads that have been identified based on the TEs of interest that have been submitted to TEfinder
+
+        **Note**::
+
+            Modifying the maximum TSD length (-k) could be useful if there is an unexpected number of insertion events identified with the default parameter. The optimal maximum TSD length can vary across datasets.
+            Modifying the fragment insert size (-fis) based on the sequencing library preparation can be useful.
+
+        ]]>
+    </help>
+
+    <citations>
+        <citation type="doi">10.5281/zenodo.4479946</citation>
+    </citations>
+
+</tool>
b
diff -r 000000000000 -r 838fb3a1678f test-data/DiscordantReads.bam
b
Binary file test-data/DiscordantReads.bam has changed
b
diff -r 000000000000 -r 838fb3a1678f test-data/List_of_TEs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/List_of_TEs.txt Mon Aug 08 19:41:18 2022 +0000
b
@@ -0,0 +1,15 @@
+Foret1X65452
+ForetFv
+Fot1Active
+Foxy2
+Frodo
+Gollum-Fv4
+hAT210-short-Active
+Hop2
+HopAY267761
+Hornet
+Hornet-small
+SkippyFv
+SkippyL34658
+Strider
+Tac
b
diff -r 000000000000 -r 838fb3a1678f test-data/TEinsertions.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEinsertions.bed Mon Aug 08 19:41:18 2022 +0000
b
@@ -0,0 +1,1 @@
+U_9 10325 10327 Hornet-small 16 . FR=7;RR=9;InsRegion=9918-10634;FILTER=PASS
b
diff -r 000000000000 -r 838fb3a1678f test-data/TEinsertions.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEinsertions.gtf Mon Aug 08 19:41:18 2022 +0000
b
@@ -0,0 +1,1 @@
+U_9 TEfinder TIP 10326 10327 16 . . te_name "Hornet-small"; tags "FR=7;RR=9;InsRegion=9918-10634;FILTER=PASS"
b
diff -r 000000000000 -r 838fb3a1678f test-data/TEs.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEs.gtf Mon Aug 08 19:41:18 2022 +0000
b
@@ -0,0 +1,65 @@
+S_3n6.54 RepeatMasker similarity 1734 2492  0.0 + . Target "Motif:Hornet-small" 1 759
+S_3n6.54 RepeatMasker similarity 7002 7777  0.0 + . Target "Motif:Hornet-small" 1 759
+S_3n6.54 RepeatMasker similarity 8181 8457 23.4 + . Target "Motif:Frodo" 80 345
+S_3n6.54 RepeatMasker similarity 8538 8693 16.0 + . Target "Motif:Frodo" 440 604
+U_32 RepeatMasker similarity 1792 1954 27.0 - . Target "Motif:Hop2" 2645 2808
+U_32 RepeatMasker similarity 3324 6310  1.9 + . Target "Motif:Hornet" 1 2995
+U_32 RepeatMasker similarity 9977 10735  0.0 + . Target "Motif:Hornet-small" 1 759
+U_32 RepeatMasker similarity 12284 12531 31.2 - . Target "Motif:HopAY267761" 2295 2542
+U_32 RepeatMasker similarity 14704 14880 28.8 - . Target "Motif:SkippyFv" 26 209
+U_39 RepeatMasker similarity 11436 13291 22.2 - . Target "Motif:Fot1Active" 45 1897
+U_39 RepeatMasker similarity 13285 13331 14.9 - . Target "Motif:Fot1Active" 1859 1907
+U_39 RepeatMasker similarity 13293 13473 23.7 - . Target "Motif:hAT210-short-Active" 248 428
+U_39 RepeatMasker similarity 13616 13701 26.7 - . Target "Motif:hAT210-short-Active" 8 93
+U_39 RepeatMasker similarity 13623 13732 25.7 - . Target "Motif:Fot1Active" 28 138
+U_39 RepeatMasker similarity 13854 13914  0.0 + . Target "Motif:Strider" 4 64
+U_9 RepeatMasker similarity 3434 3621  4.8 - . Target "Motif:Gollum-Fv4" 1 188
+U_9 RepeatMasker similarity 27414 28067  4.3 + . Target "Motif:Foxy2" 3 650
+U_9 RepeatMasker similarity 34938 35293 29.1 - . Target "Motif:Hop2" 2561 2912
+U_9 RepeatMasker similarity 35614 37453 33.2 - . Target "Motif:HopAY267761" 219 2046
+U_9 RepeatMasker similarity 38327 38719 25.2 - . Target "Motif:hAT210-short-Active" 37 428
+U_9 RepeatMasker similarity 38862 38992 20.3 + . Target "Motif:Tac" 1 131
+U_9 RepeatMasker similarity 39383 39676 30.8 - . Target "Motif:SkippyFv" 1359 1652
+U_9 RepeatMasker similarity 40797 40987 26.3 + . Target "Motif:ForetFv" 2916 3106
+U_9 RepeatMasker similarity 41020 41950 33.9 - . Target "Motif:SkippyFv" 1055 2018
+U_9 RepeatMasker similarity 42804 42939 25.4 - . Target "Motif:SkippyFv" 46 183
+U_9 RepeatMasker similarity 42928 43066 20.7 + . Target "Motif:SkippyFv" 69 209
+U_9 RepeatMasker similarity 42948 43062 20.4 - . Target "Motif:SkippyL34658" 225 340
+U_9 RepeatMasker similarity 43223 43268 23.9 + . Target "Motif:SkippyFv" 405 450
+U_9 RepeatMasker similarity 43642 44404 23.7 - . Target "Motif:ForetFv" 1065 1890
+U_9 RepeatMasker similarity 44390 44567 24.2 + . Target "Motif:Foret1X65452" 671 851
+U_9 RepeatMasker similarity 44568 45541 25.7 + . Target "Motif:Foret1X65452" 918 1884
+U_9 RepeatMasker similarity 45662 46224 33.0 + . Target "Motif:SkippyFv" 1097 1659
+U_9 RepeatMasker similarity 46220 47160 32.5 + . Target "Motif:SkippyFv" 1700 2667
+U_9 RepeatMasker similarity 47444 47836 18.3 - . Target "Motif:hAT210-short-Active" 35 428
+U_9 RepeatMasker similarity 49033 49119 32.2 - . Target "Motif:SkippyFv" 375 462
+U_9 RepeatMasker similarity 49264 49427 27.6 - . Target "Motif:SkippyFv" 40 209
+U_9 RepeatMasker similarity 49297 49455 34.2 + . Target "Motif:SkippyL34658" 254 419
+U_9 RepeatMasker similarity 49455 49542 17.1 - . Target "Motif:SkippyFv" 83 172
+U_9 RepeatMasker similarity 49543 49748 23.5 + . Target "Motif:SkippyFv" 1 211
+U_9 RepeatMasker similarity 50626 51099 26.6 + . Target "Motif:SkippyFv" 1072 1548
+U_9 RepeatMasker similarity 51139 51420 27.0 + . Target "Motif:SkippyFv" 2113 2393
+U_9 RepeatMasker similarity 51695 52487 25.7 + . Target "Motif:SkippyFv" 2359 3232
+U_9 RepeatMasker similarity 52486 52569 19.5 + . Target "Motif:SkippyFv" 3485 3568
+U_9 RepeatMasker similarity 52633 52977 35.9 + . Target "Motif:SkippyFv" 1643 1987
+U_9 RepeatMasker similarity 53105 53580 24.5 - . Target "Motif:ForetFv" 2593 3096
+U_9 RepeatMasker similarity 55184 55388 21.5 + . Target "Motif:ForetFv" 2602 2807
+U_9 RepeatMasker similarity 55385 55428 15.9 + . Target "Motif:Fot1Active" 37 80
+U_9 RepeatMasker similarity 55405 55504 19.6 + . Target "Motif:hAT210-short-Active" 4 106
+U_9 RepeatMasker similarity 55759 55825 12.9 + . Target "Motif:hAT210-short-Active" 363 428
+U_9 RepeatMasker similarity 55782 55827 19.6 - . Target "Motif:Fot1Active" 35 80
+U_9 RepeatMasker similarity 55870 56016 25.0 - . Target "Motif:ForetFv" 1789 1932
+U_9 RepeatMasker similarity 55872 55997 24.6 - . Target "Motif:Foret1X65452" 1046 1173
+U_9 RepeatMasker similarity 56922 58023 24.3 + . Target "Motif:ForetFv" 633 1706
+U_9 RepeatMasker similarity 57179 58049 21.8 + . Target "Motif:Foret1X65452" 125 976
+U_9 RepeatMasker similarity 58036 58425 37.8 + . Target "Motif:SkippyL34658" 2270 2664
+U_9 RepeatMasker similarity 58950 59186 34.9 - . Target "Motif:SkippyFv" 4401 4641
+U_9 RepeatMasker similarity 60402 60921 32.0 - . Target "Motif:SkippyFv" 3576 4101
+U_9 RepeatMasker similarity 60936 61628 27.4 - . Target "Motif:SkippyFv" 2785 3479
+U_9 RepeatMasker similarity 61657 62079 25.1 + . Target "Motif:hAT210-short-Active" 9 432
+U_9 RepeatMasker similarity 62079 62351 26.4 - . Target "Motif:SkippyFv" 2493 2774
+U_9 RepeatMasker similarity 63029 63201 38.4 - . Target "Motif:SkippyL34658" 318 503
+U_9 RepeatMasker similarity 63210 63598 18.8 - . Target "Motif:hAT210-short-Active" 11 401
+U_9 RepeatMasker similarity 63622 63720 21.2 + . Target "Motif:SkippyFv" 110 209
+U_9 RepeatMasker similarity 64067 64493 19.9 + . Target "Motif:hAT210-short-Active" 1 428
+U_9 RepeatMasker similarity 64560 72405  0.0 - . Target "Motif:SkippyL34658" 1 7846
b
diff -r 000000000000 -r 838fb3a1678f test-data/reference.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fa Mon Aug 08 19:41:18 2022 +0000
b
b'@@ -0,0 +1,2116 @@\n+>S_3n6.54\n+TCAGCATCCGTGTCAATCCCATGGGTAAAAAGGTCGAGATCCAGGGACTAGCGGTTGGAG\n+ATGACAACATTTGTCGTTTTGATCGATCTATTGGCGAACTCGTGAAATCTGATAAACTCC\n+CCATTCGCATTACAATAAAGGACAATGAGGAAGATCGAAGTGACCTTGCTGAAAAGCTTC\n+GGGGCTTATTTACCTCGGAGCAAGCTATCGCTGGCATGTTCCCCCTTCTTAATGCTTTCC\n+TCTATTACCTTATACTGATCACCCTTCACAGATATACTTCATGACCTTAAACTCCACATC\n+ATCCAGAAATTAATTCCCAAGCTTCAAAGCAAAGGTTATGTCGAGCCCGCTAAAGCAGAA\n+GCCAACACCCACTCTGAGAGCAGGGCCCAGGTAGCGCAGGGCCCTAACCGACCTTGCCAT\n+GGTGGTCCTATAGTAAACTCCTATAGGCCCCCGCCTCCAGCGATTCCACACCCTGAGATG\n+GCCCgtcctcaacctcatccagTAGGCGACTTCCCTCCTCCAGGCTTTGAGGATGAGTAC\n+GAAATGCACCGGCCTTCACGAAGCGGCCTTCAAATGCCTGGTAGATCGCCTTACAATATT\n+GGCCACGATGATTTGAATCCGCCCGGTCTTGGCCCCCATGATCCTCTTCGTGGTTCTTTC\n+ACTGGTGGGTTACCTCAACCCGGTGGTGGCTCGGGTATGCATCCCACCTTTGATGACCCA\n+TTGTTTGGAGGTCAAGGAGACGATGGATCATCCGGCTTCGATCCTCAAGTACCGCCTGGA\n+GCGCGATGGGATCCCACTGGCCCTGGTGGAAACCCCAGATTCCCTGGTCCTAGTACCGGA\n+AGGGGAAACAATCGGTTTGGCGGAGACATCATTTAAGTATAGTGGTTTATCCATCAGCGG\n+GCTTCCTGgtatgaacaagaaggaggtgcaCATCGGGTTTGGTGGGCCGCAGACACTGGT\n+CATCCGTGAAAAGTCCGAGCGAACCTATACCTCAAGGAACCCTCCTGCAGGTTTTGTCGA\n+AGGCCCATCTATGCATGAAGCTATGAaaggcgatggcgatgaacACAACAAATCGCAGCC\n+TCAACAGCTAGAAAAGGGGGACAAGCATGACGAAAAGACGAAGTATTAGCTTGCTGAGCG\n+CAGCGTTGGCGGGTTCTCCTGTGCCTTCAGCTTCCCGTCTCATGTGGATCAGGATAGTGT\n+CAGTACCAGCTTCCAAGACGGCATCCTGAGTATTATTAACCTGATGGTCAAGAAACATGA\n+GTCCCCCCGCATCCATATTGAATCAACTTTGGTCAATGTTATCGGCATTTATGTTAGACG\n+GAAAATAGTTGAAGTAAAAGCCTCCTACATCTTCAGCACTACTTTCTAGGCAAGCATCCA\n+AGTCAAATCCACCAAAATCATCCGCAAACGTAGGGAAACTCGCCAAGTCCTACATCCATA\n+TTAGCAATCACCTCACCAGTCCCTAAAGAGAGAAAGGGATTTACAGTCATATTACAATTT\n+TCCATGTTTAATGTCGCGGTGTAAACCAAGTCAACAGGTGGGTTAGCCGCCATAGAACCA\n+GGCGTAAATGGCAGAGCTACTGCTAGCTGTCCCGTAGGCACGGCCTGCGAAAGATTAGAA\n+TTCTGGGCACCCTTCCTTATGCCGACGGGGTTGATATGAGGCACTGGGCTCGCAGCAGCA\n+TTCGCAGGGCGTGTCGCTGTAGAGTTGGCAAGGCAGCCTTGCTCGTCTGTgaccagggtt\n+caaatccataccacgaaatccacatatggatccataatgtggatccatatccattccatt\n+ccattccacgtgggcttcagcatttccatatccacgccacgaagccccttccacatgttc\n+cacatgtggaaatcgtggaatattttaggtggggttcgaaaataccttgatttccttgtg\n+aaatcccgcatatcctaagtactttctatcacccacaacaacacaacatcgatttgccac\n+cccattttcctccgtacacaatgagtccattctgggggcttcactgttactcgcgttact\n+accccaaatcgaaatggccactccccatcctaccaagtcaaccacatccgttccagaacg\n+aacagaagaggacaatcaacggctctttcagctctacaaaagctggatcttgacggagag\n+agacggcaaggcgcgtctataggtatatcggttcggctacgaatatccagcataacaaga\n+aacaggaacgtcggtgggtgtgttgcctttgcgtcaagcaacggcggattttaatggacc\n+aatgacaaaaacattgacgtcaccgagggcgggcgggacccattaattaccattgttgga\n+cgaagtaggtttccattccacgaattccacaatatggatccatttccataatggatccat\n+ttccacccattccacatcgaaattattagtcagcatccatatccacgccatattcacaaa\n+tgtgtcgtggagtgtcgtggatttgaaccctgtCTGTGACAGTAAGGAAAGGGTGCTCCT\n+TGTCTTGACACTGTCGTTAGCATCTGCCGGCTGGACTGGCGGGAGTCGAGACAGTTCTTG\n+CAATTTACCCTGGACACCACGCTGGGGTGTCTCGGGAATCTGGTTTTCACCTGAATCGTT\n+CTGCCACTGTACCGACCAATTGTCATCTATTTTACCTTCCTGCTGAAATGATGTTCCTTT\n+TGCCGCCACTGATGGCCGGGACGTTTGGCTACTGGGCTGTTGAAAAGTACGTGAATTAGA\n+GGCTCTTGTCCAGTCATTCATCCGCACTACACCTACGTTGCCGCCTAGGCCGCCTACCTT\n+TGCTTTAGGGAAATCAAGTACTATACTGTAGGTAAGCCATTGGGCAAAGATATCAAGGGA\n+CTCAAGATAATGGCGAGATCCGAAAAAAATAGATGGGAAAACACGCTAATGTCGTTCATT\n+GGTTAAGTGCCAAGTTTCACCTGCTTGGTAGTATCAGGCAATGCATCTGGTCTTGGTGCT\n+CGTAGAGGGggccgttgaagaagccttGAACTTTGTATAGGCTCTTGACTGCCCTGGCCA\n+CCAGGTATTCTATAATTTCCACACAGACTGCTTCTGCCTGGGCTGTAGCTCAACGCGGTT\n+ATTTCTTGCGGTGGCTCTGGCCCAGGCCGGGCCGGAAGTTCAGGCCTGCAGGTATCAGAC\n+ACACCTACCGGTGGCTGCCTTCACAACTACGTCGCCAACGGCAGCAACGGCGGCTTCGAG\n+AGCCACAGAGTACTGTTTACCTAAACCCGACCCAAGCTGTGTTTGTCGACCTAGTGAATC\n+TGCTGCCGGAAGCCAGATACCACGTGTTTCTCGACAAcctattctcttcttccaaccTAT\n+TCCGTCGGCTACGTCAGCTTGGGCACGGAGGCACAGGCACTGCCCGCCGGAACTGTGGTC\n+TTTACAGGCTGctcgtcaagctcaaagCTGGTGATAGTACTGCTGTTGGTAGTATTCCTT\n+TTAACTGCCTTAAAGCAATCCCAACAGCCGATAACCTAATTAGTCGAGGAGACAAATCTT\n+TACAGATGAGCCTTACTGATTCGACTTTGGATAGGTCAACCAGATCGCTTGGAAACACAA\n+TGCCCTTGTGTTGTTTTTATCCGCCGTATTTACAGGCAATGAACGGGTTGACCGTATAAG\n+GAAACGACCAACAACAGACCAACCCGCAGCACGGCGGCTTGAGGGCCCTAGCTTGGGACT\n+TCCTGCTAGAAATAGCCctgatcaacagcttcattctGCAGCAACGAGGAAACCCACGgt\n+ggaagccagagaagtctCAAGCGGAGTGGCGGCAGCTTCTTTTTAATGGCCTTGTTGCAA\n+'..b'CATACGCGGTTTGGACAGAGTTTGAGTTCTGCTGTT\n+GCTTTCTGAAAGGCCTTCAGGAGCTCTTCGCTATCTGCACCCATGGTTGGTGAAGGTTAT\n+ACAGGCTGTTATTTCCAGTTGTAGTAGGCCTCGCGTTATATTTCGAATGTCCCTGATGCG\n+ATGGACGGCGCTTTTGTAAAGTAACGTCTTGCTATCGATAGTCTTAGCTTTCCCGAGATG\n+AGTCGCAGGTACCAAATATGGGAAAGTGGACGCCGCTACAGGATGGAGGGAGGTATTTGT\n+CAGACCGGCTCAGCCGCTGGGCGACGTGTCACATCGGGTTCAGCTTGGCTGTCTTGAGGA\n+CCGCTGCTGGGGTTGCGAATGTGACCAAGAAAGGCAGGCAAGAGACGACGAGTGGCAGAA\n+GGATTGAGAGCAGCGCGAAGCGAACCTATCATAATTCCGGTGCGGGGTTGTGCCAATACA\n+CCGGCTTATTTCAGAAGTCGTATGGGGTGCGGGTGTGTCTGCTTGGCGGTGCGCTATCTC\n+TCCTCTTCCTAAATTTCACTATACCATCTGGGACTAAACCTCTCGGTCAAGATCGCTCAT\n+TCAACTGCCTATTCAGGTACTGCATAGGACCTGTCTACTAAGCAACAGCATTACGATTAT\n+TCACCCATGTTGTGCGTACAAATCGCCTGTGCGATAGTGGCCGATTGACCGCGATTTATC\n+GGTCTCGTTGAAAATAATATGAACCAGCTCAGCGGTTTCTTTCTCACTTATGAACAACCC\n+AACATGCATTCAGCTGACGTGGCCTTCGGAGTCGGAGCATTCAATTGGTAATTGGCACAA\n+ATGAACAACAACCGTCGGCAAGGCCGCGGACCAACCATGAACCTGCATCTCGTTTGGGAA\n+TTCTTACTGGGTCGAAAAATACTTCCATAGTGATCAACTTTGCGGCTGAGCGATTTGTGT\n+CTGACCCCCTTTCGAAGCCCTTCACCGTCCAGTTCCGAAGAATCTAAGCTACTTTGGGCG\n+GGTAGTCTAACAAGTAGGTAAATTGTGCCACTACTATGCCCGTTTTCCAAATTGAGGAAC\n+CGACCATTACCCCAACCCCGCCAGCTCCGACGCGGCTGCTCGTTACCCAGCGGCGCCCAA\n+CGCTTCGAGTTTAGCCAAATTCTGGAGACGATGCAGGTGCAAGCGGGAAGAAGCCAGTTC\n+GGCGCTGGGCAAATAGCTGACCTGACAAAAGAGAATGGAAGCAAGGCTAAATTCAACGGT\n+GAATGACTGACGAAAAATAAATTTAAATTTTCTTTCGCCAGTGAGGATTCTGGTACAGAC\n+CAGATTACTTGTTTTAATCATGATAATTCAGCCTCGGATTTACCGTTGAAGTCTTTTTGA\n+CACTGTCTCTAAATAATCGAAAAACTAACTTAGactaacgtacatgataagcgaacCGGA\n+CAGATAAGCGAACCGgacaaaaatccctcataTTACATTATTactatctgatcaattgat\n+TCTCAACCAAATAACATGTCTCAATCTAATAATGAAGCTAGAAtgcttcttgcacttcag\n+gcctaTCAGGCagacccaaaattaagtctgCGACGAGCCGCAAAGATCTATGATATTAGC\n+CACCAGCGGCTCTTTGATCGAATGAATGGAGTACAGGCTCGCAGCGATTGTATCCCGAAC\n+TCACGGAAACTGAGTGATCTGGAGGAACAGGTTATAGTCCAGTatatccttgacctagat\n+tcgcgaggatttccttccCGGCTTCgtgatgttgaagaaatggcgaatcgactgcttgct\n+gaCCGCGATGCATCACtagttggcaagcgctgggctcacAATTTTATCAAGCGGCAACTA\n+GAGCTTAAGACGCGTTTTTAGaggagatatgactatcagagggccaaatgcgaagatcca\n+atcgctattcgcaattggtttAGGCTCATACAgaatacaatcgcgaagtatggcatccga\n+tcagatgatatctggaactttgatgagaccggctttatgatgggcgtcATATCAAGCGCT\n+ATAGTTATTACTAGCTCAGAAAGGCGTGGACGGCCAAAATCAGTCCAGCCTGGAacccgg\n+gaatgggttacagcAATCcaagcgatcaatgcagaaggtcaggCGATTGATCCATTCATC\n+ATagttgcaggccaatatcaccttgctaattggtaccgagaaagcaacctcccggccacT\n+TGGGCTATTGCcacgacccaaaatggctggacagataatgagacaggccttgagtggcta\n+aagcactttaatCTATGTACAACCAaccgatcaactggtccctatcgtcttctgatcctt\n+gatggtcacgaaagccaccattcGGCCGACTTCCagatatattgtgaggagaacaatatc\n+atcacgctctgtatgccacctcattcttcccACCTGCTTCAGCCacttgatgtcgggtgc\n+tttgggctgctgaaaaaggcatatggtcgagaaatagagcatctgatcagaaggtctata\n+acccacatttccaagaccgagttcttcccagccttttatgccgcctttCAATTGACTATG\n+ACCGAGGCAAATATTaaagggggttttagaggagctggccTTGCTCCTTTTGACCCAGAA\n+GTtgtaatctcaaagcttgatgtgcagctacggactccaacgcctgttgaggaggaggcc\n+caacaagctcaatcttggacttcaaggaccccaagaaCAGTTCTTGAGGCTagatctcag\n+tctgaataccttcagagacgaatcagaagacaccatagtagctccccagagtcaattctT\n+GAAGTTTTGAGGTCTCTTGAGAAAGGAACTAAGGCAGTTATACATAAGGTCGCCTTACTT\n+GCAGCTGAGAACCGAAATCTTcgacaggcaaatgagatacttagcCGGCGGCGCAGGGCG\n+AGGAGGACACGCCTACAGAATAGAGGGAGTATGACTATACAGGAAGGTCAGgatctaatt\n+gatcagatggatgtagatataCAGGTCATAGCTGAATCatcaagaagtggtggtcaagga\n+agttcGGCGCGACTGAGAGTTCGGCGTTGCAGGACTTGCGGTAAGACTGGGCATAACGCA\n+agaacctgtcaggagggtattgaggcCTCTGAAGATGAGTGTAGTAGTTAATCTCAATTG\n+ATTAGATAGTCTGTTGCGTTTTTATTGTAATTTATCTtaagaaggttgagatttttgtcC\n+GGTTCGCTTATCTGTCCGgttcgcttatcatgtaacgtacatgataagcgagtggaaCAG\n+ACAAGCAAGTGGAACACTATACTGTACATATAAAAACAGTCTACCCTAGACAACTAAACA\n+CTAGATATAAAATTATTCAACTTATAAAAATATACTTTTTAAGCTTAGTACACTTAAATT\n+TTAAATATAGATTTTTTTAAATATTTTTAAATAAATTAATAAGCTAGATATGAAAGGGTA\n+CACAGTGCTAATAAGCTTTTTCTCAACAACTTGATAAGTAATAGTGTATATAACTAGAGA\n+AGTTTGTATCTTATATATTTTATCCTAATTAAGATATGTTTTTAATTTTATAAGATTAAA\n+GCTTGATTTAGATGTTTAAGAATTCTTTTAAAATCCCGCTTTAAGGTATAAAATTGAAGT\n+GTTCCGTACCCTTGTCTGTTCCACTCacttatcatacacgttaagcttattatataaata\n+aaagAGAGTTATATAATAGCCGCAAACTAAATTATAATCAGTCGAATTTTCCCTATATTT\n+TTACCCTAAGTTCTAGATAAGCTACTCTTATATTAGCTTAATTATTCTTGATGattcagc\n+cgaaggctgacTCTACATTAATTCATAAGTTTTCCCGAGTAATGAGGGTTTGGT\n'
b
diff -r 000000000000 -r 838fb3a1678f test-data/sample.bam
b
Binary file test-data/sample.bam has changed