Next changeset 1:2edb80d68a1b (2022-09-23) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948 |
added:
TEfinder TEfinder.xml test-data/DiscordantReads.bam test-data/List_of_TEs.txt test-data/TEinsertions.bed test-data/TEinsertions.gtf test-data/TEs.gtf test-data/reference.fa test-data/sample.bam |
b |
diff -r 000000000000 -r 838fb3a1678f TEfinder --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TEfinder Mon Aug 08 19:41:18 2022 +0000 |
[ |
b'@@ -0,0 +1,333 @@\n+#!/usr/bin/env bash\n+\n+##\n+##\n+## Authors: Vista Sohrab & Dilay Hazal Ayhan\n+## Date: January 15, 2021\n+## Description: TEfinder uses discordant reads to detect novel transposable element insertion events in short read paired-end sample sequencing data. \n+## Software dependencies include bedtools 2.28.0 or later, samtools 1.3 or later, picard 2.0.1 or later\n+## Required inputs include sample alignment file (.bam|.sam), reference genome FASTA (.fa), reference TE annotation in GFF/GTF or GFF3 (.gff|.gtf), and TEs of interest (.txt)\n+##\n+## University of Massachusetts Amherst\n+##\n+##\n+##\n+##\n+\n+set -e\n+\n+margs=4\n+\n+# Functions\n+function example {\n+ echo -e "example: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt"\n+}\n+\n+function help {\n+ echo -e "REQUIRED:"\n+ echo -e " -alignment, --alignmentFile STR sample reads aligned to reference genome (BAM/SAM file)"\n+ echo -e " -fa, --FastaFile STR reference genome FASTA index (FA file)"\n+ echo -e " -gtf, --TransposonsInGenome STR reference genome TE annotation (GFF2/GTF file)"\n+ echo -e " -te, --TransposonsToSearch STR TE names (single column text file)\\n"\n+ echo -e "OPTIONAL:"\n+ echo -e " -bamo, --DiscordantReads STR BAM output\\n"\n+ echo -e " -bedo, --bTEinsertions STR TEinsertions BED output\\n"\n+ echo -e " -gtfo, --gTEinsertions STR TEinsertions GTF output\\n"\n+ echo -e " -fis, --FragmentInsertSize INT short-read sequencing fragment insert size [400]"\n+ echo -e " -picard, --pathToPicardjar STR path to picard tools .jar file [picard.jar]"\n+ echo -e " -md, --MaxDistanceForMerge INT maximum distance between reads for bedtools merge [150]"\n+ echo -e " -k, --MaxTSDLength INT maximum TE target site duplication (TSD) length [20]"\n+ echo -e " -maxHeapMem, --MaxHeapMemory INT java maximum heap memory allocation for picard in Mb [2000]" \n+ echo -e " -workingdir, --WorkingDirectory STR working directory name [TEfinder_<Date>]"\n+ echo -e " -out, --OutputFormat STR output format as GTF [BED]"\n+ echo -e " -outname, --OutputName STR output name prefix added to file names [null]"\n+ echo -e " -threads, --Threads INT number of threads for samtools multi-threading [1]"\n+ echo -e " -intermed --IntermediateFiles STR keep intermediate files created by pipeline [no]" \n+ echo -e " -h, --help prints help\\n"\n+ example\n+}\n+\n+# check if mandatory args are empty\n+function margs_check {\n+ if [ $# -lt $margs ]; then\n+ echo -e "One or more required parameters are missing."\n+ example\n+ exit 1 # error\n+ fi\n+}\n+\n+# main workflow\n+ #### : comment out\n+function pipeline() {\n+ mkdir ${workingdir}/${line}\n+ currdir=${workingdir}/${line}\n+ echo -e $(date) " Transposon analysis for "${line}" has started\\n"\n+\n+ grep -P \'[^(\\w|\\d|\\-|\\_|\\#|\\.)]\'${line}\'[^(\\w|\\d|\\-|\\_|\\#|\\.)]\' $gtf > ${currdir}/${line}_TE.gff\n+ echo -e $(date) " Individual TE GFF has been created for "${line}"\\n" ####\n+ \n+ bedtools intersect -abam ${workingdir}/${outname}Alignments.bam -b ${currdir}/${line}_TE.gff -wa > ${currdir}/${line}_MappedReadsToTE.bam\n+ echo -e $(date) " Mapped reads to TE via bedtools intersect has been completed for "${line}"\\n" ####\n+ samtools view -@ $threads ${currdir}/${line}_MappedReadsToTE.bam | \\\n+ awk -v Ins=`expr $fis \\* 10` \'{if (($7 != "=") || ($9 > Ins) || ($9 < -Ins)) print $1}\' > ${currdir}/${line}_ReadID.txt\n+ echo -e $(date) " Identifying discordant read IDs has been completed for "${line}"\\n" ####\n+ \n+\t # if discordant readID file exists, then continue with remainder of TE analysis\n+\t if [[ -s ${currdir}/${line}_ReadID.txt ]]\n+\t then\n+\t\t #j'..b'e been removed. \\n"\n+\n+# run pipeline for each TE\n+while IFS="" read -r line || [ -n "$line" ]\n+do\n+ pipeline &\n+done < ${workingdir}/userTE_noEmptyLines.txt\n+wait\n+echo -e $(date) " All transposons are processed. Finalizing...\\n"\n+\n+# combine discordant bam files\n+samtools merge -@ $threads -r ${workingdir}/${outname}DiscordantReads.bam ${workingdir}/*/*_DiscordantPairs.bam\n+echo -e $(date) " BAM Output: Discordant pair alignment file is now available.\\n"\n+# Sorting by position \n+samtools sort -@ $threads ${workingdir}/${outname}DiscordantReads.bam | samtools view -h -o ${workingdir}/${outname}DiscordantReads.sam\n+grep -v \'^@PG\' ${workingdir}/${outname}DiscordantReads.sam > ${workingdir}/${outname}DiscordantReadsNoPG.sam\n+rm ${workingdir}/${outname}DiscordantReads.sam\n+samtools view -hb -x "PG" --no-PG --remove-flags "PG" -O BAM ${workingdir}/${outname}DiscordantReadsNoPG.sam -o ${bamo}\n+rm ${workingdir}/${outname}DiscordantReadsNoPG.sam\n+\n+# update output BED file with TEfinder results: organize the starting file\n+awk \'{print $2"\\t"$3"\\t"$4"\\t"$1"\\t"$8+$12"\\t.\\tFR="$8";RR="$12";InsRegion="$6"-"$11";FILTER="}\' ${workingdir}/insertions.txt > ${workingdir}/TEinsertions_putative.bed\n+# find the entries in repeat regions for filtering\n+bedtools intersect -wa -u -a ${workingdir}/TEinsertions_putative.bed -b $gtf > ${workingdir}/TEinsertions_putative_inrepeat.bed\n+# filtering process\n+while IFS="" read -r line || [ -n "$line" ]\n+do\n+\t#located in repeat region\n+\tif (grep -Fxq "$line" "${workingdir}/TEinsertions_putative_inrepeat.bed")\n+\tthen\n+ line=$line"in_repeat,"\n+\tfi\n+\t\n+\t#weak evidence\n+\treadc=$(echo $line | awk \'{print $5}\')\n+\tif (( $readc < 10 ))\n+\tthen\n+\tline=$line"weak_evidence,"\n+\tfi\n+\t\n+\t#strand-biased\t\n+\tFR=$(echo $line | grep -o \'FR=[[:digit:]]*\' | cut -f2 -d\'=\')\n+\tRR=$(echo $line | grep -o \'RR=[[:digit:]]*\' | cut -f2 -d\'=\')\n+\tvar1=$(echo \'e(l(\'$FR\')*1.25)\' | bc -l)\n+\tvar2=$(echo \'e(l(\'$FR\')*0.8)\' | bc -l)\n+\t\n+\tif [ $(echo "$RR > $var1" | bc) -eq 1 ] || [ $(echo "$RR < $var2" | bc) -eq 1 ]\n+\tthen\n+\tline=$line"strand_bias,"\n+\tfi\n+\t\n+\t#pass\n+\tlastchar=${line: -1}\n+\tif [ $lastchar == "," ]\n+\tthen\n+\tline=${line::${#line}-1}\n+\telse\n+\tline=$line"PASS"\n+\tfi\n+\t\n+\t#write to final output\n+\tprintf "%s\\n" "$line" >> ${workingdir}/${outname}TEinsertions.bed\n+ \n+done < ${workingdir}/TEinsertions_putative.bed\n+wait\n+echo -e $(date) " BED Output: TEfinder output BED file is now available.\\n"\n+# Sorting\n+# cp ${workingdir}/${outname}TEinsertions.bed ${outo}\n+bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.bed > ${bedo}\n+# cat ${bedo}\n+\n+# gtf option - create output GTF files with TEfinder results\n+if [ ! -z "$out" ]\n+then\n+ awk \'FNR > 1 {print $1"\\tTEfinder\\tTIP\\t"$2 + 1"\\t"$3"\\t"$5"\\t.\\t.\\tte_name \\""$4"\\"; tags \\""$7"\\""}\' ${workingdir}/${outname}TEinsertions.bed > ${workingdir}/${outname}TEinsertions.gtf\n+ bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.gtf > ${gtfo}\n+ # awk \'FNR > 1 {print $1"\\tTEfinder\\tTIP\\t"$2 + 1"\\t"$3"\\t"$5"\\t.\\t.\\tte_name \\""$4"\\"; tags \\""$7"\\""}\' ${bedo} > ${gtfo}\n+ # Sorting\n+ # cp ${workingdir}/${outname}TEinsertions.gtf ${gtfo}\n+ echo -e "\\n\\n"\n+ # cat ${gtfo}\n+ # bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.gtf > ${gtfo}\n+ echo -e $(date) " GTF Output: TEfinder output GTF file is now available.\\n"\n+fi\n+\n+# clean working directory\n+if [ -z "$intermed" ]\n+then\n+ rm ${workingdir}/TEinsertions_putative.bed ${workingdir}/TEinsertions_putative_inrepeat.bed ${workingdir}/reference.fa ${workingdir}/reference.fa.fai \\\n+ ${workingdir}/alignmentInput.sorted.bam ${workingdir}/insertions.txt ${workingdir}/${outname}Alignments.bam ${workingdir}/userTE_noEmptyLines.txt \n+ rm -r ${workingdir}/*/\n+fi\n+\n+if [ `wc -l <${workingdir}/${outname}TEinsertions.bed` -le "1" ]\n+then\n+ echo -e $(date) " Error: TEfinder run unsuccessful."\n+else\n+ echo -e $(date) " TE insertion output files have been created. TEfinder completed successfully."\n+fi\n' |
b |
diff -r 000000000000 -r 838fb3a1678f TEfinder.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TEfinder.xml Mon Aug 08 19:41:18 2022 +0000 |
[ |
@@ -0,0 +1,114 @@ +<tool id="te_finder" name="TEfinder" version="1.0.1" profile="21.05"> + <description>Transposable element insertions finder</description> + + <requirements> + <requirement type="package" version="1.15.1">samtools</requirement> + <requirement type="package" version="2.30.0">bedtools</requirement> + <requirement type="package" version="2.27.4">picard</requirement> + <requirement type="package" version="3.4">grep</requirement> + <requirement type="package" version="1.07.1">bc</requirement> + </requirements> + + <command> + <![CDATA[ + '$__tool_directory__/TEfinder' -fa '$required_inputs.FastaFile' + -alignment '$required_inputs.alignmentFile' + -gtf '$required_inputs.TransposonsInGenome' + -te '$required_inputs.TransposonsToSearch' + -bamo '$discordantreads' + -bedo '$bteinsertion' + -threads '\${GALAXY_SLOTS:-1}' + -fis $($advanced_options.FragmentInsertSize) + -md $($advanced_options.MaxDistanceForMerge) + -k $($advanced_options.MaxTSDLength) + #if str( $advanced_options.OutFormat) == "gtf": + -gtfo '$gteinsertion' + -out $($advanced_options.OutFormat) + #end if + ]]> + </command> + + <inputs> + <!-- <param format="fasta" name="input" type="data" label="Source file"/> --> + <section name="required_inputs" title="Required Inputs" expanded="True"> + <param name="FastaFile" type="data" format="fasta" label="Select reference genome FASTA index (FA/FASTA file)" /> + <param name="alignmentFile" type="data" format="bam" label="Select sample reads aligned to reference genome (BAM/SAM file)" /> + <param name="TransposonsInGenome" type="data" format="gtf" label="Select reference genome TE annotation (GFF/GTF file)" /> + <param name="TransposonsToSearch" type="data" format="text" label="Select TE names" help="Single column text file" /> + </section> + <!-- Advanced Options --> + <section name="advanced_options" title="Advanced Options" expanded="False"> + <param name="FragmentInsertSize" argument="-fis" type="integer" min="0" value="400" label="Short-read sequencing fragment insert size" /> + <param name="MaxDistanceForMerge" argument="-md" type="integer" min="0" value="150" label="Maximum distance between reads for bedtools merge" /> + <param name="MaxTSDLength" argument="-k" type="integer" min="0" value="20" label="Maximum TE target site duplication (TSD) length" /> + <param name="OutFormat" argument="-out" type="select" display="radio" label="Select output format as BED [GTF]" help="See help below for more details"> + <option value="bed" selected="True">Default format is BED</option> + <option value="gtf">Other available format is GTF (-out gtf)</option> + </param> + </section> + </inputs> + + <outputs> + <data format="bed" name="bteinsertion" label="${tool.name} on ${on_string}: BED" /> + <data format="gtf" name="gteinsertion" label="${tool.name} on ${on_string}: GTF"> + <filter>advanced_options['OutFormat'] and 'gtf' in advanced_options['OutFormat']</filter> + </data> + <data format="bam" name="discordantreads" /> + </outputs> + + <tests> + <!-- Test for the most simple case for BED output : Running TEfinder with a .bam file and a .fasta file --> + <test expect_num_outputs="2"> + <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt --> + <param name="FastaFile" ftype="fasta" value="reference.fa"/> + <param name="alignmentFile" ftype="bam" value="sample.bam"/> + <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/> + <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/> + <param name="OutFormat" value="bed" /> + <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/> + <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/> + </test> + + <!-- Test for the GTF output --> + <test expect_num_outputs="3"> + <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt -out gtf --> + <param name="FastaFile" ftype="fasta" value="reference.fa"/> + <param name="alignmentFile" ftype="bam" value="sample.bam"/> + <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/> + <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/> + <param name="OutFormat" value="gtf" /> + <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/> + <output name="gteinsertion" file="TEinsertions.gtf" ftype="gtf"/> + <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/> + </test> + </tests> + + <help> + <![CDATA[ + A bioinformatics tool for detecting novel transposable element insertions + + Authors: Vista Sohrab & Dilay Hazal Ayhan + + TEfinder uses discordant reads to detect novel transposable element insertion events in paired-end sample sequencing data. + + **Output files**:: + + TE_insertions.bed contains identified TE insertion events in sample (in the final column, FILTER attribute with "PASS" refers to high confidence insertion events while instances labeled as "in_repeat", "weak_evidence", "strand bias" or a combination of these three labels indicate less confident insertion events) + + TE_insertions.gtf is provided with the same information as the BED file if using -out GTF + + DiscordantReads.bam contains all discordant reads that have been identified based on the TEs of interest that have been submitted to TEfinder + + **Note**:: + + Modifying the maximum TSD length (-k) could be useful if there is an unexpected number of insertion events identified with the default parameter. The optimal maximum TSD length can vary across datasets. + Modifying the fragment insert size (-fis) based on the sequencing library preparation can be useful. + + ]]> + </help> + + <citations> + <citation type="doi">10.5281/zenodo.4479946</citation> + </citations> + +</tool> |
b |
diff -r 000000000000 -r 838fb3a1678f test-data/DiscordantReads.bam |
b |
Binary file test-data/DiscordantReads.bam has changed |
b |
diff -r 000000000000 -r 838fb3a1678f test-data/List_of_TEs.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/List_of_TEs.txt Mon Aug 08 19:41:18 2022 +0000 |
b |
@@ -0,0 +1,15 @@ +Foret1X65452 +ForetFv +Fot1Active +Foxy2 +Frodo +Gollum-Fv4 +hAT210-short-Active +Hop2 +HopAY267761 +Hornet +Hornet-small +SkippyFv +SkippyL34658 +Strider +Tac |
b |
diff -r 000000000000 -r 838fb3a1678f test-data/TEinsertions.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TEinsertions.bed Mon Aug 08 19:41:18 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +U_9 10325 10327 Hornet-small 16 . FR=7;RR=9;InsRegion=9918-10634;FILTER=PASS |
b |
diff -r 000000000000 -r 838fb3a1678f test-data/TEinsertions.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TEinsertions.gtf Mon Aug 08 19:41:18 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +U_9 TEfinder TIP 10326 10327 16 . . te_name "Hornet-small"; tags "FR=7;RR=9;InsRegion=9918-10634;FILTER=PASS" |
b |
diff -r 000000000000 -r 838fb3a1678f test-data/TEs.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TEs.gtf Mon Aug 08 19:41:18 2022 +0000 |
b |
@@ -0,0 +1,65 @@ +S_3n6.54 RepeatMasker similarity 1734 2492 0.0 + . Target "Motif:Hornet-small" 1 759 +S_3n6.54 RepeatMasker similarity 7002 7777 0.0 + . Target "Motif:Hornet-small" 1 759 +S_3n6.54 RepeatMasker similarity 8181 8457 23.4 + . Target "Motif:Frodo" 80 345 +S_3n6.54 RepeatMasker similarity 8538 8693 16.0 + . Target "Motif:Frodo" 440 604 +U_32 RepeatMasker similarity 1792 1954 27.0 - . Target "Motif:Hop2" 2645 2808 +U_32 RepeatMasker similarity 3324 6310 1.9 + . Target "Motif:Hornet" 1 2995 +U_32 RepeatMasker similarity 9977 10735 0.0 + . Target "Motif:Hornet-small" 1 759 +U_32 RepeatMasker similarity 12284 12531 31.2 - . Target "Motif:HopAY267761" 2295 2542 +U_32 RepeatMasker similarity 14704 14880 28.8 - . Target "Motif:SkippyFv" 26 209 +U_39 RepeatMasker similarity 11436 13291 22.2 - . Target "Motif:Fot1Active" 45 1897 +U_39 RepeatMasker similarity 13285 13331 14.9 - . Target "Motif:Fot1Active" 1859 1907 +U_39 RepeatMasker similarity 13293 13473 23.7 - . Target "Motif:hAT210-short-Active" 248 428 +U_39 RepeatMasker similarity 13616 13701 26.7 - . Target "Motif:hAT210-short-Active" 8 93 +U_39 RepeatMasker similarity 13623 13732 25.7 - . Target "Motif:Fot1Active" 28 138 +U_39 RepeatMasker similarity 13854 13914 0.0 + . Target "Motif:Strider" 4 64 +U_9 RepeatMasker similarity 3434 3621 4.8 - . Target "Motif:Gollum-Fv4" 1 188 +U_9 RepeatMasker similarity 27414 28067 4.3 + . Target "Motif:Foxy2" 3 650 +U_9 RepeatMasker similarity 34938 35293 29.1 - . Target "Motif:Hop2" 2561 2912 +U_9 RepeatMasker similarity 35614 37453 33.2 - . Target "Motif:HopAY267761" 219 2046 +U_9 RepeatMasker similarity 38327 38719 25.2 - . Target "Motif:hAT210-short-Active" 37 428 +U_9 RepeatMasker similarity 38862 38992 20.3 + . Target "Motif:Tac" 1 131 +U_9 RepeatMasker similarity 39383 39676 30.8 - . Target "Motif:SkippyFv" 1359 1652 +U_9 RepeatMasker similarity 40797 40987 26.3 + . Target "Motif:ForetFv" 2916 3106 +U_9 RepeatMasker similarity 41020 41950 33.9 - . Target "Motif:SkippyFv" 1055 2018 +U_9 RepeatMasker similarity 42804 42939 25.4 - . Target "Motif:SkippyFv" 46 183 +U_9 RepeatMasker similarity 42928 43066 20.7 + . Target "Motif:SkippyFv" 69 209 +U_9 RepeatMasker similarity 42948 43062 20.4 - . Target "Motif:SkippyL34658" 225 340 +U_9 RepeatMasker similarity 43223 43268 23.9 + . Target "Motif:SkippyFv" 405 450 +U_9 RepeatMasker similarity 43642 44404 23.7 - . Target "Motif:ForetFv" 1065 1890 +U_9 RepeatMasker similarity 44390 44567 24.2 + . Target "Motif:Foret1X65452" 671 851 +U_9 RepeatMasker similarity 44568 45541 25.7 + . Target "Motif:Foret1X65452" 918 1884 +U_9 RepeatMasker similarity 45662 46224 33.0 + . Target "Motif:SkippyFv" 1097 1659 +U_9 RepeatMasker similarity 46220 47160 32.5 + . Target "Motif:SkippyFv" 1700 2667 +U_9 RepeatMasker similarity 47444 47836 18.3 - . Target "Motif:hAT210-short-Active" 35 428 +U_9 RepeatMasker similarity 49033 49119 32.2 - . Target "Motif:SkippyFv" 375 462 +U_9 RepeatMasker similarity 49264 49427 27.6 - . Target "Motif:SkippyFv" 40 209 +U_9 RepeatMasker similarity 49297 49455 34.2 + . Target "Motif:SkippyL34658" 254 419 +U_9 RepeatMasker similarity 49455 49542 17.1 - . Target "Motif:SkippyFv" 83 172 +U_9 RepeatMasker similarity 49543 49748 23.5 + . Target "Motif:SkippyFv" 1 211 +U_9 RepeatMasker similarity 50626 51099 26.6 + . Target "Motif:SkippyFv" 1072 1548 +U_9 RepeatMasker similarity 51139 51420 27.0 + . Target "Motif:SkippyFv" 2113 2393 +U_9 RepeatMasker similarity 51695 52487 25.7 + . Target "Motif:SkippyFv" 2359 3232 +U_9 RepeatMasker similarity 52486 52569 19.5 + . Target "Motif:SkippyFv" 3485 3568 +U_9 RepeatMasker similarity 52633 52977 35.9 + . Target "Motif:SkippyFv" 1643 1987 +U_9 RepeatMasker similarity 53105 53580 24.5 - . Target "Motif:ForetFv" 2593 3096 +U_9 RepeatMasker similarity 55184 55388 21.5 + . Target "Motif:ForetFv" 2602 2807 +U_9 RepeatMasker similarity 55385 55428 15.9 + . Target "Motif:Fot1Active" 37 80 +U_9 RepeatMasker similarity 55405 55504 19.6 + . Target "Motif:hAT210-short-Active" 4 106 +U_9 RepeatMasker similarity 55759 55825 12.9 + . Target "Motif:hAT210-short-Active" 363 428 +U_9 RepeatMasker similarity 55782 55827 19.6 - . Target "Motif:Fot1Active" 35 80 +U_9 RepeatMasker similarity 55870 56016 25.0 - . Target "Motif:ForetFv" 1789 1932 +U_9 RepeatMasker similarity 55872 55997 24.6 - . Target "Motif:Foret1X65452" 1046 1173 +U_9 RepeatMasker similarity 56922 58023 24.3 + . Target "Motif:ForetFv" 633 1706 +U_9 RepeatMasker similarity 57179 58049 21.8 + . Target "Motif:Foret1X65452" 125 976 +U_9 RepeatMasker similarity 58036 58425 37.8 + . Target "Motif:SkippyL34658" 2270 2664 +U_9 RepeatMasker similarity 58950 59186 34.9 - . Target "Motif:SkippyFv" 4401 4641 +U_9 RepeatMasker similarity 60402 60921 32.0 - . Target "Motif:SkippyFv" 3576 4101 +U_9 RepeatMasker similarity 60936 61628 27.4 - . Target "Motif:SkippyFv" 2785 3479 +U_9 RepeatMasker similarity 61657 62079 25.1 + . Target "Motif:hAT210-short-Active" 9 432 +U_9 RepeatMasker similarity 62079 62351 26.4 - . Target "Motif:SkippyFv" 2493 2774 +U_9 RepeatMasker similarity 63029 63201 38.4 - . Target "Motif:SkippyL34658" 318 503 +U_9 RepeatMasker similarity 63210 63598 18.8 - . Target "Motif:hAT210-short-Active" 11 401 +U_9 RepeatMasker similarity 63622 63720 21.2 + . Target "Motif:SkippyFv" 110 209 +U_9 RepeatMasker similarity 64067 64493 19.9 + . Target "Motif:hAT210-short-Active" 1 428 +U_9 RepeatMasker similarity 64560 72405 0.0 - . Target "Motif:SkippyL34658" 1 7846 |
b |
diff -r 000000000000 -r 838fb3a1678f test-data/reference.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference.fa Mon Aug 08 19:41:18 2022 +0000 |
b |
b'@@ -0,0 +1,2116 @@\n+>S_3n6.54\n+TCAGCATCCGTGTCAATCCCATGGGTAAAAAGGTCGAGATCCAGGGACTAGCGGTTGGAG\n+ATGACAACATTTGTCGTTTTGATCGATCTATTGGCGAACTCGTGAAATCTGATAAACTCC\n+CCATTCGCATTACAATAAAGGACAATGAGGAAGATCGAAGTGACCTTGCTGAAAAGCTTC\n+GGGGCTTATTTACCTCGGAGCAAGCTATCGCTGGCATGTTCCCCCTTCTTAATGCTTTCC\n+TCTATTACCTTATACTGATCACCCTTCACAGATATACTTCATGACCTTAAACTCCACATC\n+ATCCAGAAATTAATTCCCAAGCTTCAAAGCAAAGGTTATGTCGAGCCCGCTAAAGCAGAA\n+GCCAACACCCACTCTGAGAGCAGGGCCCAGGTAGCGCAGGGCCCTAACCGACCTTGCCAT\n+GGTGGTCCTATAGTAAACTCCTATAGGCCCCCGCCTCCAGCGATTCCACACCCTGAGATG\n+GCCCgtcctcaacctcatccagTAGGCGACTTCCCTCCTCCAGGCTTTGAGGATGAGTAC\n+GAAATGCACCGGCCTTCACGAAGCGGCCTTCAAATGCCTGGTAGATCGCCTTACAATATT\n+GGCCACGATGATTTGAATCCGCCCGGTCTTGGCCCCCATGATCCTCTTCGTGGTTCTTTC\n+ACTGGTGGGTTACCTCAACCCGGTGGTGGCTCGGGTATGCATCCCACCTTTGATGACCCA\n+TTGTTTGGAGGTCAAGGAGACGATGGATCATCCGGCTTCGATCCTCAAGTACCGCCTGGA\n+GCGCGATGGGATCCCACTGGCCCTGGTGGAAACCCCAGATTCCCTGGTCCTAGTACCGGA\n+AGGGGAAACAATCGGTTTGGCGGAGACATCATTTAAGTATAGTGGTTTATCCATCAGCGG\n+GCTTCCTGgtatgaacaagaaggaggtgcaCATCGGGTTTGGTGGGCCGCAGACACTGGT\n+CATCCGTGAAAAGTCCGAGCGAACCTATACCTCAAGGAACCCTCCTGCAGGTTTTGTCGA\n+AGGCCCATCTATGCATGAAGCTATGAaaggcgatggcgatgaacACAACAAATCGCAGCC\n+TCAACAGCTAGAAAAGGGGGACAAGCATGACGAAAAGACGAAGTATTAGCTTGCTGAGCG\n+CAGCGTTGGCGGGTTCTCCTGTGCCTTCAGCTTCCCGTCTCATGTGGATCAGGATAGTGT\n+CAGTACCAGCTTCCAAGACGGCATCCTGAGTATTATTAACCTGATGGTCAAGAAACATGA\n+GTCCCCCCGCATCCATATTGAATCAACTTTGGTCAATGTTATCGGCATTTATGTTAGACG\n+GAAAATAGTTGAAGTAAAAGCCTCCTACATCTTCAGCACTACTTTCTAGGCAAGCATCCA\n+AGTCAAATCCACCAAAATCATCCGCAAACGTAGGGAAACTCGCCAAGTCCTACATCCATA\n+TTAGCAATCACCTCACCAGTCCCTAAAGAGAGAAAGGGATTTACAGTCATATTACAATTT\n+TCCATGTTTAATGTCGCGGTGTAAACCAAGTCAACAGGTGGGTTAGCCGCCATAGAACCA\n+GGCGTAAATGGCAGAGCTACTGCTAGCTGTCCCGTAGGCACGGCCTGCGAAAGATTAGAA\n+TTCTGGGCACCCTTCCTTATGCCGACGGGGTTGATATGAGGCACTGGGCTCGCAGCAGCA\n+TTCGCAGGGCGTGTCGCTGTAGAGTTGGCAAGGCAGCCTTGCTCGTCTGTgaccagggtt\n+caaatccataccacgaaatccacatatggatccataatgtggatccatatccattccatt\n+ccattccacgtgggcttcagcatttccatatccacgccacgaagccccttccacatgttc\n+cacatgtggaaatcgtggaatattttaggtggggttcgaaaataccttgatttccttgtg\n+aaatcccgcatatcctaagtactttctatcacccacaacaacacaacatcgatttgccac\n+cccattttcctccgtacacaatgagtccattctgggggcttcactgttactcgcgttact\n+accccaaatcgaaatggccactccccatcctaccaagtcaaccacatccgttccagaacg\n+aacagaagaggacaatcaacggctctttcagctctacaaaagctggatcttgacggagag\n+agacggcaaggcgcgtctataggtatatcggttcggctacgaatatccagcataacaaga\n+aacaggaacgtcggtgggtgtgttgcctttgcgtcaagcaacggcggattttaatggacc\n+aatgacaaaaacattgacgtcaccgagggcgggcgggacccattaattaccattgttgga\n+cgaagtaggtttccattccacgaattccacaatatggatccatttccataatggatccat\n+ttccacccattccacatcgaaattattagtcagcatccatatccacgccatattcacaaa\n+tgtgtcgtggagtgtcgtggatttgaaccctgtCTGTGACAGTAAGGAAAGGGTGCTCCT\n+TGTCTTGACACTGTCGTTAGCATCTGCCGGCTGGACTGGCGGGAGTCGAGACAGTTCTTG\n+CAATTTACCCTGGACACCACGCTGGGGTGTCTCGGGAATCTGGTTTTCACCTGAATCGTT\n+CTGCCACTGTACCGACCAATTGTCATCTATTTTACCTTCCTGCTGAAATGATGTTCCTTT\n+TGCCGCCACTGATGGCCGGGACGTTTGGCTACTGGGCTGTTGAAAAGTACGTGAATTAGA\n+GGCTCTTGTCCAGTCATTCATCCGCACTACACCTACGTTGCCGCCTAGGCCGCCTACCTT\n+TGCTTTAGGGAAATCAAGTACTATACTGTAGGTAAGCCATTGGGCAAAGATATCAAGGGA\n+CTCAAGATAATGGCGAGATCCGAAAAAAATAGATGGGAAAACACGCTAATGTCGTTCATT\n+GGTTAAGTGCCAAGTTTCACCTGCTTGGTAGTATCAGGCAATGCATCTGGTCTTGGTGCT\n+CGTAGAGGGggccgttgaagaagccttGAACTTTGTATAGGCTCTTGACTGCCCTGGCCA\n+CCAGGTATTCTATAATTTCCACACAGACTGCTTCTGCCTGGGCTGTAGCTCAACGCGGTT\n+ATTTCTTGCGGTGGCTCTGGCCCAGGCCGGGCCGGAAGTTCAGGCCTGCAGGTATCAGAC\n+ACACCTACCGGTGGCTGCCTTCACAACTACGTCGCCAACGGCAGCAACGGCGGCTTCGAG\n+AGCCACAGAGTACTGTTTACCTAAACCCGACCCAAGCTGTGTTTGTCGACCTAGTGAATC\n+TGCTGCCGGAAGCCAGATACCACGTGTTTCTCGACAAcctattctcttcttccaaccTAT\n+TCCGTCGGCTACGTCAGCTTGGGCACGGAGGCACAGGCACTGCCCGCCGGAACTGTGGTC\n+TTTACAGGCTGctcgtcaagctcaaagCTGGTGATAGTACTGCTGTTGGTAGTATTCCTT\n+TTAACTGCCTTAAAGCAATCCCAACAGCCGATAACCTAATTAGTCGAGGAGACAAATCTT\n+TACAGATGAGCCTTACTGATTCGACTTTGGATAGGTCAACCAGATCGCTTGGAAACACAA\n+TGCCCTTGTGTTGTTTTTATCCGCCGTATTTACAGGCAATGAACGGGTTGACCGTATAAG\n+GAAACGACCAACAACAGACCAACCCGCAGCACGGCGGCTTGAGGGCCCTAGCTTGGGACT\n+TCCTGCTAGAAATAGCCctgatcaacagcttcattctGCAGCAACGAGGAAACCCACGgt\n+ggaagccagagaagtctCAAGCGGAGTGGCGGCAGCTTCTTTTTAATGGCCTTGTTGCAA\n+'..b'CATACGCGGTTTGGACAGAGTTTGAGTTCTGCTGTT\n+GCTTTCTGAAAGGCCTTCAGGAGCTCTTCGCTATCTGCACCCATGGTTGGTGAAGGTTAT\n+ACAGGCTGTTATTTCCAGTTGTAGTAGGCCTCGCGTTATATTTCGAATGTCCCTGATGCG\n+ATGGACGGCGCTTTTGTAAAGTAACGTCTTGCTATCGATAGTCTTAGCTTTCCCGAGATG\n+AGTCGCAGGTACCAAATATGGGAAAGTGGACGCCGCTACAGGATGGAGGGAGGTATTTGT\n+CAGACCGGCTCAGCCGCTGGGCGACGTGTCACATCGGGTTCAGCTTGGCTGTCTTGAGGA\n+CCGCTGCTGGGGTTGCGAATGTGACCAAGAAAGGCAGGCAAGAGACGACGAGTGGCAGAA\n+GGATTGAGAGCAGCGCGAAGCGAACCTATCATAATTCCGGTGCGGGGTTGTGCCAATACA\n+CCGGCTTATTTCAGAAGTCGTATGGGGTGCGGGTGTGTCTGCTTGGCGGTGCGCTATCTC\n+TCCTCTTCCTAAATTTCACTATACCATCTGGGACTAAACCTCTCGGTCAAGATCGCTCAT\n+TCAACTGCCTATTCAGGTACTGCATAGGACCTGTCTACTAAGCAACAGCATTACGATTAT\n+TCACCCATGTTGTGCGTACAAATCGCCTGTGCGATAGTGGCCGATTGACCGCGATTTATC\n+GGTCTCGTTGAAAATAATATGAACCAGCTCAGCGGTTTCTTTCTCACTTATGAACAACCC\n+AACATGCATTCAGCTGACGTGGCCTTCGGAGTCGGAGCATTCAATTGGTAATTGGCACAA\n+ATGAACAACAACCGTCGGCAAGGCCGCGGACCAACCATGAACCTGCATCTCGTTTGGGAA\n+TTCTTACTGGGTCGAAAAATACTTCCATAGTGATCAACTTTGCGGCTGAGCGATTTGTGT\n+CTGACCCCCTTTCGAAGCCCTTCACCGTCCAGTTCCGAAGAATCTAAGCTACTTTGGGCG\n+GGTAGTCTAACAAGTAGGTAAATTGTGCCACTACTATGCCCGTTTTCCAAATTGAGGAAC\n+CGACCATTACCCCAACCCCGCCAGCTCCGACGCGGCTGCTCGTTACCCAGCGGCGCCCAA\n+CGCTTCGAGTTTAGCCAAATTCTGGAGACGATGCAGGTGCAAGCGGGAAGAAGCCAGTTC\n+GGCGCTGGGCAAATAGCTGACCTGACAAAAGAGAATGGAAGCAAGGCTAAATTCAACGGT\n+GAATGACTGACGAAAAATAAATTTAAATTTTCTTTCGCCAGTGAGGATTCTGGTACAGAC\n+CAGATTACTTGTTTTAATCATGATAATTCAGCCTCGGATTTACCGTTGAAGTCTTTTTGA\n+CACTGTCTCTAAATAATCGAAAAACTAACTTAGactaacgtacatgataagcgaacCGGA\n+CAGATAAGCGAACCGgacaaaaatccctcataTTACATTATTactatctgatcaattgat\n+TCTCAACCAAATAACATGTCTCAATCTAATAATGAAGCTAGAAtgcttcttgcacttcag\n+gcctaTCAGGCagacccaaaattaagtctgCGACGAGCCGCAAAGATCTATGATATTAGC\n+CACCAGCGGCTCTTTGATCGAATGAATGGAGTACAGGCTCGCAGCGATTGTATCCCGAAC\n+TCACGGAAACTGAGTGATCTGGAGGAACAGGTTATAGTCCAGTatatccttgacctagat\n+tcgcgaggatttccttccCGGCTTCgtgatgttgaagaaatggcgaatcgactgcttgct\n+gaCCGCGATGCATCACtagttggcaagcgctgggctcacAATTTTATCAAGCGGCAACTA\n+GAGCTTAAGACGCGTTTTTAGaggagatatgactatcagagggccaaatgcgaagatcca\n+atcgctattcgcaattggtttAGGCTCATACAgaatacaatcgcgaagtatggcatccga\n+tcagatgatatctggaactttgatgagaccggctttatgatgggcgtcATATCAAGCGCT\n+ATAGTTATTACTAGCTCAGAAAGGCGTGGACGGCCAAAATCAGTCCAGCCTGGAacccgg\n+gaatgggttacagcAATCcaagcgatcaatgcagaaggtcaggCGATTGATCCATTCATC\n+ATagttgcaggccaatatcaccttgctaattggtaccgagaaagcaacctcccggccacT\n+TGGGCTATTGCcacgacccaaaatggctggacagataatgagacaggccttgagtggcta\n+aagcactttaatCTATGTACAACCAaccgatcaactggtccctatcgtcttctgatcctt\n+gatggtcacgaaagccaccattcGGCCGACTTCCagatatattgtgaggagaacaatatc\n+atcacgctctgtatgccacctcattcttcccACCTGCTTCAGCCacttgatgtcgggtgc\n+tttgggctgctgaaaaaggcatatggtcgagaaatagagcatctgatcagaaggtctata\n+acccacatttccaagaccgagttcttcccagccttttatgccgcctttCAATTGACTATG\n+ACCGAGGCAAATATTaaagggggttttagaggagctggccTTGCTCCTTTTGACCCAGAA\n+GTtgtaatctcaaagcttgatgtgcagctacggactccaacgcctgttgaggaggaggcc\n+caacaagctcaatcttggacttcaaggaccccaagaaCAGTTCTTGAGGCTagatctcag\n+tctgaataccttcagagacgaatcagaagacaccatagtagctccccagagtcaattctT\n+GAAGTTTTGAGGTCTCTTGAGAAAGGAACTAAGGCAGTTATACATAAGGTCGCCTTACTT\n+GCAGCTGAGAACCGAAATCTTcgacaggcaaatgagatacttagcCGGCGGCGCAGGGCG\n+AGGAGGACACGCCTACAGAATAGAGGGAGTATGACTATACAGGAAGGTCAGgatctaatt\n+gatcagatggatgtagatataCAGGTCATAGCTGAATCatcaagaagtggtggtcaagga\n+agttcGGCGCGACTGAGAGTTCGGCGTTGCAGGACTTGCGGTAAGACTGGGCATAACGCA\n+agaacctgtcaggagggtattgaggcCTCTGAAGATGAGTGTAGTAGTTAATCTCAATTG\n+ATTAGATAGTCTGTTGCGTTTTTATTGTAATTTATCTtaagaaggttgagatttttgtcC\n+GGTTCGCTTATCTGTCCGgttcgcttatcatgtaacgtacatgataagcgagtggaaCAG\n+ACAAGCAAGTGGAACACTATACTGTACATATAAAAACAGTCTACCCTAGACAACTAAACA\n+CTAGATATAAAATTATTCAACTTATAAAAATATACTTTTTAAGCTTAGTACACTTAAATT\n+TTAAATATAGATTTTTTTAAATATTTTTAAATAAATTAATAAGCTAGATATGAAAGGGTA\n+CACAGTGCTAATAAGCTTTTTCTCAACAACTTGATAAGTAATAGTGTATATAACTAGAGA\n+AGTTTGTATCTTATATATTTTATCCTAATTAAGATATGTTTTTAATTTTATAAGATTAAA\n+GCTTGATTTAGATGTTTAAGAATTCTTTTAAAATCCCGCTTTAAGGTATAAAATTGAAGT\n+GTTCCGTACCCTTGTCTGTTCCACTCacttatcatacacgttaagcttattatataaata\n+aaagAGAGTTATATAATAGCCGCAAACTAAATTATAATCAGTCGAATTTTCCCTATATTT\n+TTACCCTAAGTTCTAGATAAGCTACTCTTATATTAGCTTAATTATTCTTGATGattcagc\n+cgaaggctgacTCTACATTAATTCATAAGTTTTCCCGAGTAATGAGGGTTTGGT\n' |
b |
diff -r 000000000000 -r 838fb3a1678f test-data/sample.bam |
b |
Binary file test-data/sample.bam has changed |