Galaxy |

Changeset 0:269c627ae9f4 (2018-06-20)

Next changeset 1:9a811adb714f (2023-01-25)

Commit message:
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/fasta_regex_finder commit 8e118a4d24047e2c62912b962e854f789d6ff559

added:
fastaregexfinder.py
fastaregexfinder.xml
test-data/TestSeqGroup-G4-sub.bed
test-data/TestSeqGroup-G4.bed
test-data/TestSeqGroup-G4.fasta
test-data/test-1.bed
test-data/test-2.bed
test-data/test-3.bed
test-data/test-4.bed
test-data/test.fas

diff -r 000000000000 -r 269c627ae9f4 fastaregexfinder.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaregexfinder.py Wed Jun 20 11:06:57 2018 -0400

[

b'@@ -0,0 +1,254 @@\n+#!/usr/bin/env python\n+\n+import re\n+import sys\n+import string\n+import argparse\n+import operator\n+\n+VERSION=\'0.1.1\'\n+\n+parser = argparse.ArgumentParser(description="""\n+\n+DESCRIPTION\n+ \n+ Search a fasta file for matches to a regex and return a bed file with the\n+ coordinates of the match and the matched sequence itself. \n+ \n+ Output bed file has columns:\n+ 1. Name of fasta sequence (e.g. chromosome)\n+ 2. Start of the match\n+ 3. End of the match\n+ 4. ID of the match\n+ 5. Length of the match\n+ 6. Strand \n+ 7. Matched sequence as it appears on the forward strand\n+ \n+ For matches on the reverse strand it is reported the start and end position on the\n+ forward strand and the matched string on the forward strand (so the G4 \'GGGAGGGT\'\n+ present on the reverse strand is reported as ACCCTCCC).\n+ \n+ Note: Fasta sequences (chroms) are read in memory one at a time along with the\n+ matches for that chromosome.\n+ The order of the output is: chroms as they are found in the inut fasta, matches\n+ sorted within chroms by positions.\n+\n+EXAMPLE:\n+ ## Test data:\n+ echo \'>mychr\' > /tmp/mychr.fa\n+ echo \'ACTGnACTGnACTGnTGAC\' >> /tmp/mychr.fa\n+ \n+ fastaRegexFinder.py -f /tmp/mychr.fa -r \'ACTG\'\n+ mychr\t0\t4\tmychr_0_4_for\t4\t+\tACTG\n+ mychr\t5\t9\tmychr_5_9_for\t4\t+\tACTG\n+ mychr\t10\t14\tmychr_10_14_for\t4\t+\tACTG\n+\n+ fastaRegexFinder.py -f /tmp/mychr.fa -r \'ACTG\' --maxstr 3\n+ mychr\t0\t4\tmychr_0_4_for\t4\t+\tACT[3,4]\n+ mychr\t5\t9\tmychr_5_9_for\t4\t+\tACT[3,4]\n+ mychr\t10\t14\tmychr_10_14_for\t4\t+\tACT[3,4]\n+ \n+ less /tmp/mychr.fa | fastaRegexFinder.py -f - -r \'A\\w\\wGn\'\n+ mychr\t0\t5\tmychr_0_5_for\t5\t+\tACTGn\n+ mychr\t5\t10\tmychr_5_10_for\t5\t+\tACTGn\n+ mychr\t10\t15\tmychr_10_15_for\t5\t+\tACTGn\n+\n+DOWNLOAD\n+ fastaRegexFinder.py is hosted at https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder\n+\n+ """, formatter_class= argparse.RawTextHelpFormatter)\n+\n+parser.add_argument(\'--fasta\', \'-f\',\n+ type= str,\n+ help=\'\'\'Input fasta file to search. Use \'-\' to read the file from stdin.\n+ \n+ \'\'\',\n+ required= True)\n+\n+parser.add_argument(\'--regex\', \'-r\',\n+ type= str,\n+ help=\'\'\'Regex to be searched in the fasta input.\n+Matches to the reverse complement will have - strand.\n+The default regex is \'([gG]{3,}\\w{1,7}){3,}[gG]{3,}\' which searches\n+for G-quadruplexes. \n+ \'\'\',\n+ default= \'([gG]{3,}\\w{1,7}){3,}[gG]{3,}\')\n+\n+parser.add_argument(\'--matchcase\', \'-m\',\n+ action= \'store_true\',\n+ help=\'\'\'Match case while searching for matches. Default is\n+to ignore case (I.e. \'ACTG\' will match \'actg\').\n+ \'\'\')\n+\n+parser.add_argument(\'--noreverse\',\n+ action= \'store_true\',\n+ help=\'\'\'Do not search the reverse complement of the input fasta.\n+Use this flag to search protein sequences. \n+ \'\'\')\n+\n+parser.add_argument(\'--maxstr\',\n+ type= int,\n+ required= False,\n+ default= 10000,\n+ help=\'\'\'Maximum length of the match to report in the 7th column of the output.\n+Default is to report up to 10000nt.\n+Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]\n+ \'\'\')\n+\n+parser.add_argument(\'--seqnames\', \'-s\',\n+ type= str,\n+ nargs= \'+\',\n+ default= [None],\n+ required= False,\n+\t\t help=\'\'\'List of fasta sequences in --fasta to\n+search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes.\n+Default is to search all the sequences in input.\n+ \'\'\')\n+parser.add_argument(\'--quiet\', \'-q\',\n+ '..b'+ table: a list of lists (or tuple of tuples) where each inner list \n+ represents a row\n+ cols: a list (or tuple) specifying the column numbers to sort by\n+ e.g. (1,0) would sort by column 1, then by column 0\n+ """\n+ for col in reversed(cols):\n+ table = sorted(table, key=operator.itemgetter(col))\n+ return(table)\n+\n+def trimMatch(x, n):\n+ """ Trim the string x to be at most length n. Trimmed matches will be reported\n+ with the syntax ACTG[a,b] where Ns are the beginning of x, a is the length of\n+ the trimmed strng (e.g 4 here) and b is the full length of the match\n+ EXAMPLE:\n+ trimMatch(\'ACTGNNNN\', 4)\n+ >>>\'ACTG[4,8]\'\n+ trimMatch(\'ACTGNNNN\', 8)\n+ >>>\'ACTGNNNN\'\n+ """\n+ if len(x) > n and n is not None:\n+ m= x[0:n] + \'[\' + str(n) + \',\' + str(len(x)) + \']\'\n+ else:\n+ m= x\n+ return(m)\n+\n+def revcomp(x):\n+ """Reverse complement string x. Ambiguity codes are handled and case conserved.\n+ \n+ Test\n+ x= \'ACGTRYSWKMBDHVNacgtryswkmbdhvn\'\n+ revcomp(x)\n+ """\n+ compdict= {\'A\':\'T\',\n+ \'C\':\'G\',\n+ \'G\':\'C\',\n+ \'T\':\'A\',\n+ \'R\':\'Y\',\n+ \'Y\':\'R\',\n+ \'S\':\'W\',\n+ \'W\':\'S\',\n+ \'K\':\'M\',\n+ \'M\':\'K\',\n+ \'B\':\'V\',\n+ \'D\':\'H\',\n+ \'H\':\'D\',\n+ \'V\':\'B\',\n+ \'N\':\'N\',\n+ \'a\':\'t\',\n+ \'c\':\'g\',\n+ \'g\':\'c\',\n+ \'t\':\'a\',\n+ \'r\':\'y\',\n+ \'y\':\'r\',\n+ \'s\':\'w\',\n+ \'w\':\'s\',\n+ \'k\':\'m\',\n+ \'m\':\'k\',\n+ \'b\':\'v\',\n+ \'d\':\'h\',\n+ \'h\':\'d\',\n+ \'v\':\'b\',\n+ \'n\':\'n\'}\n+ xrc= []\n+ for n in x:\n+ xrc.append(compdict[n])\n+ xrc= \'\'.join(xrc)[::-1]\n+ return(xrc)\n+# -----------------------------------------------------------------------------\n+\n+psq_re_f= re.compile(args.regex, flags= flag)\n+## psq_re_r= re.compile(regexrev)\n+\n+if args.fasta != \'-\':\n+ ref_seq_fh= open(args.fasta)\n+else:\n+ ref_seq_fh= sys.stdin \n+\n+ref_seq=[]\n+line= (ref_seq_fh.readline()).strip()\n+chr= re.sub(\'^>\', \'\', line)\n+line= (ref_seq_fh.readline()).strip()\n+gquad_list= []\n+while True:\n+ if not args.quiet:\n+ sys.stderr.write(\'Processing %s\\n\' %(chr))\n+ while line.startswith(\'>\') is False:\n+ ref_seq.append(line)\n+ line= (ref_seq_fh.readline()).strip()\n+ if line == \'\':\n+ break\n+ ref_seq= \'\'.join(ref_seq)\n+ if args.seqnames == [None] or chr in args.seqnames:\n+ for m in re.finditer(psq_re_f, ref_seq):\n+ matchstr= trimMatch(m.group(0), args.maxstr)\n+ quad_id= str(chr) + \'_\' + str(m.start()) + \'_\' + str(m.end()) + \'_for\'\n+ gquad_list.append([chr, m.start(), m.end(), quad_id, len(m.group(0)), \'+\', matchstr])\n+ if args.noreverse is False:\n+ ref_seq= revcomp(ref_seq)\n+ seqlen= len(ref_seq)\n+ for m in re.finditer(psq_re_f, ref_seq):\n+ matchstr= trimMatch(revcomp(m.group(0)), args.maxstr)\n+ mstart= seqlen - m.end()\n+ mend= seqlen - m.start()\n+ quad_id= str(chr) + \'_\' + str(mstart) + \'_\' + str(mend) + \'_rev\'\n+ gquad_list.append([chr, mstart, mend, quad_id, len(m.group(0)), \'-\', matchstr])\n+ gquad_sorted= sort_table(gquad_list, (1,2,3))\n+ gquad_list= []\n+ for xline in gquad_sorted:\n+ xline= \'\\t\'.join([str(x) for x in xline])\n+ print(xline)\n+ chr= re.sub(\'^>\', \'\', line)\n+ ref_seq= []\n+ line= (ref_seq_fh.readline()).strip()\n+ if line == \'\':\n+ break\n+\n+#gquad_sorted= sort_table(gquad_list, (0,1,2,3))\n+#\n+#for line in gquad_sorted:\n+# line= \'\\t\'.join([str(x) for x in line])\n+# print(line)\n+sys.exit()\n'

diff -r 000000000000 -r 269c627ae9f4 fastaregexfinder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaregexfinder.xml Wed Jun 20 11:06:57 2018 -0400

[

@@ -0,0 +1,161 @@
+<tool id="fasta_regex_finder" name="fasta_regex_finder" version="0.1.0">
+    <description>
+        Search in fasta for regexp match
+    </description>
+    <requirements>
+    </requirements>
+    <version_command>python $__tool_directory__/fastaregexfinder.py --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+python $__tool_directory__/fastaregexfinder.py
+--fasta "$input"
+--regex "$regex"
+#if $settings.advanced == "advanced"
+    $settings.matchcase
+    $settings.noreverse
+    --maxstr $settings.maxstr
+    #if $settings.seqnames != ""
+        --seqnames $settings.seqnames
+    #end if
+#end if
+--quiet
+> $output
+    ]]></command>
+    <inputs>
+        <param type="data" name="input" format="fasta" />
+        <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="'"/>
+                </valid>
+                <mapping initial="none">
+                    <add source="'" target="__sq__"/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <conditional name="settings">
+            <param name="advanced" type="select" label="Specify advanced parameters">
+                <option value="simple" selected="true">No, use program defaults.</option>
+                <option value="advanced">Yes, see full parameter list.</option>
+            </param>
+            <when value="simple">
+            </when>
+            <when value="advanced">
+                <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" />
+                <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" />
+                <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" />
+                <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <output name="output" file="TestSeqGroup-G4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <output name="output" file="test-1.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="matchcase" value="--matchcase"/>
+            <output name="output" file="test-2.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="noreverse" value="--noreverse"/>
+            <output name="output" file="test-3.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="maxstr" value="3"/>
+            <output name="output" file="test-4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <param name="advanced" value="advanced"/>
+            <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/>
+            <output name="output" file="TestSeqGroup-G4-sub.bed"/>
+        </test>
+</tests>
+    <help><![CDATA[
+DESCRIPTION
+
+Search a fasta file for matches to a regular expression and return a bed file with the
+coordinates of the match and the matched sequence itself.
+
+Output bed file has columns:
+
+1. Name of fasta sequence (e.g. chromosome)
+2. Start of the match
+3. End of the match
+4. ID of the match
+5. Length of the match
+6. Strand
+7. Matched sequence as it appears on the forward strand
+
+For matches on the reverse strand it is reported the start and end position on the
+forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT'
+present on the reverse strand is reported as ACCCTCCC).
+
+
+Note: Fasta sequences (chroms) are read in memory one at a time along with the
+matches for that chromosome.
+The order of the output is: chroms as they are found in the inut fasta, matches
+sorted within chroms by positions.
+
+ARGUMENTS:
+
+- regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes.
+- matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg').
+- noreverse           Do not search the reverse complement of the input fasta. Use this flag to search protein sequences.
+- maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]
+- seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input.
+
+EXAMPLE:
+
+Test data::
+>mychr
+ACTGnACTGnACTGnTGAC
+
+Example1 regex=ACTG::
+
+        mychr 0 4 mychr_0_4_for 4 + ACTG
+        mychr 5 9 mychr_5_9_for 4 + ACTG
+        mychr 10 14 mychr_10_14_for 4 + ACTG
+
+Example2 regex=ACTG maxstr=3::
+
+        mychr 0 4 mychr_0_4_for 4 + ACT[3,4]
+        mychr 5 9 mychr_5_9_for 4 + ACT[3,4]
+        mychr 10 14 mychr_10_14_for 4 + ACT[3,4]
+
+Example3 regex=A\w\wG::
+
+        mychr 0 5 mychr_0_5_for 5 + ACTGn
+        mychr 5 10 mychr_5_10_for 5 + ACTGn
+        mychr 10 15 mychr_10_15_for 5 + ACTGn
+
+   ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubfastaRegexFinder,
+  author = {Dario Beraldi},
+  year = {2017},
+  title = {fastaRegexFinder},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder},
+}</citation>
+    </citations>
+</tool>

diff -r 000000000000 -r 269c627ae9f4 test-data/TestSeqGroup-G4-sub.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4-sub.bed Wed Jun 20 11:06:57 2018 -0400

@@ -0,0 +1,2 @@
+HJ24-Shp2_oncogenicProtein 17 58 HJ24-Shp2_oncogenicProtein_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein2 17 58 HJ24-Shp2_oncogenicProtein2_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG

diff -r 000000000000 -r 269c627ae9f4 test-data/TestSeqGroup-G4.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4.bed Wed Jun 20 11:06:57 2018 -0400

@@ -0,0 +1,3 @@
+HJ24-Shp2_oncogenicProtein 17 58 HJ24-Shp2_oncogenicProtein_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein2 17 58 HJ24-Shp2_oncogenicProtein2_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein3 17 58 HJ24-Shp2_oncogenicProtein3_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG

diff -r 000000000000 -r 269c627ae9f4 test-data/TestSeqGroup-G4.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4.fasta Wed Jun 20 11:06:57 2018 -0400

@@ -0,0 +1,56 @@
+>PA#2/8
+ATACCAGCTTATTCAATTAGCAACATGAGGGGGATAGAGGGGGTGGGTTCTCTCGGCTACAATCGTAATCAGTTAG
+>PA#14/89
+ATACCAGCTTATTCAATTGGGCACCACGGGAGTCGGCCACATTTGGAGTTGTTTTTGCACAATCGTAATCAGTTAG
+>PA-C10
+ATACCAGCTTATTCAATTGCGCACCACGGGAGTTGGCCACATTTGGAGTTGTTTTTGCACAATCGTAATCAGTTAG
+>PA#4/22
+ATACCAGCTTATTCAATTGCAGTACTGATGAGTGTAGCCGTATGATTATCGTTTGTGGACAATCGTAATCAGTTAG
+>PA#4/34
+ATACCAGCTTATTCAATTCCCCAACGAGTCGATATGTAGCCCACACTCTGATTCGTCCACAATCGTAATCAGTTAG
+>PA#2/11
+ATACCAGCTTATTCAATTGGAGACGACAAACTATTACGTACTACGGCATGCACTTGGTACAATCGTAATCAGTTAG
+>PA-C8
+ATACCAGCTTATTCAATTAGGCCAGATGAGGGGTGCCCATGGCGGGGTGGCTGCTCCAACAATCGTAATCAGTTAG
+>PA#14/82
+ATACCAGCTTATTCAATTCCACAACCGAACTCGTAAGACGTATGTAGCCGCCAACTGTACAATCGTAATCAGTTAG
+>PA#2/3
+ATACCAGCTTATTCAATTCGACAAGTGGGCATTACGATTCTAGCCCTGATTATGTTCCACAATCGTAATCAGTTAG
+>PA-C9
+ATACCAGCTTATTCAATTACCGAGGAGATAACGTTGTAGCCGTCCATCATCTGATTCGACAATCGTAATCAGTTAG
+>PA#2/6
+ATACCAGCTTATTCAATTACCGATCACTAGCCGACTAATTGGTTTCCGATCGCAGTCCACAATCGTAATCAGTTAG
+>PA-C11
+ATACCAGCTTATTCAATTCGATGGAGCTGATGATTGTTGCCGATCTGACTGTTGTTCCACAATCGTAATCAGTTAG
+>PA-C13
+ATACCAGCTTATTCAATTCCCCTAACGTTACTGGATGTAGTCCGACTAACTTATGCGTACAATCGTAATCAGTTAG
+>PA-C42
+ATACCAGCTTATTCAATTGCAGATTACGCCTTGTAGCCCGCACTGATCTCGATGTTTGGACAATCGTAATCAGTTAG
+>PA-C16
+ATACCAGCTTATTCAATTCCCACGAGTGTAGCCGATTCTTCTGTACTCTTGTCCTCGTACAATCGTAATCAGTTAG
+>PA-C15
+ATACCAGCTTATTCAATTACGTGTTGTAGCCGACCCCTGTTGATTGTTTTCCTGTACCACAATCGTAATCAGTTAG
+>EA#14.3
+ATACCAGCTTATTCAATTTGAGGCGGGTGGGTGGGTTGAATACGCTGATTACCCCATCGGAGAACGTTAAGGCGCTTCAGATAGTAAGTGCAATCT
+>EA#9.4
+ATACCAGCTTATTCAATTGCTGCGAGGTGGGTGGGTGGGAGCAATTGATCCTCGCTTAGCTTCTACGGTGGGCTATCTAGATAGTAAGTGCAATCT
+>EA#5.10
+ATACCAGCTTATTCAATTCACCACACCTGCACCCCTGACTTCCCACTTATATCTACTACTCCGTCTCAAGCCCGTTTGAGATAGTAAGTGCAATCT
+>EA#14.5
+ATACCAGCTTATTCAATTCCGAGTTTGGGTGGGAGTGGTGGGTTCGGAATTGTTAGTTATTTGGGTTTATGCGAGGTGAGATAGTAAGTGCAATCT
+>EA#14.8
+ATACCAGCTTATTCAATTGACGGGGTGTTGTCGTATGCTGTAGAAGCCGTAATTTTTTTTGTTTTCCCTGCCCACCTAGATAGTAAGTGCAATCT
+>EA#14.4
+ATACCAGCTTATTCAATTCCACAGGTTGTATGGGGAATAAGGTGGGTGCGCGAGATAGTAAGTGCAATCT
+>EA#11.5
+ATACCAGCTTATTCAATTCCCACACCCTAACCGTAGAGCTAAGCTTTTCTTACTACTGACAGTGCTTTACCGTTTGCAAGATAGTAAGTGCAATCT
+>HJ24-Shp2_oncogenicProtein
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>HJ24-Shp2_oncogenicProtein2
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>HJ24-Shp2_oncogenicProtein3
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>RT6-HIVRT
+ATCCGCCTGATTAGCGATACTCAGGCGTTAGGGAAGGGCGTCGAAAGCAGGGTGGGACTTGAGCAAAATCACCTGCAGGGG
+>AptG4-HumanRNaseH1
+CGGTCGCTCCGTGTGGCTTGGGTTGGGTGTGGCAGTGAC

diff -r 000000000000 -r 269c627ae9f4 test-data/test-1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-1.bed Wed Jun 20 11:06:57 2018 -0400

@@ -0,0 +1,3 @@
+mychr 0 4 mychr_0_4_for 4 + ACTG
+mychr 5 9 mychr_5_9_for 4 + actg
+mychr 10 14 mychr_10_14_rev 4 - CAGT

diff -r 000000000000 -r 269c627ae9f4 test-data/test-2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-2.bed Wed Jun 20 11:06:57 2018 -0400

@@ -0,0 +1,2 @@
+mychr 0 4 mychr_0_4_for 4 + ACTG
+mychr 10 14 mychr_10_14_rev 4 - CAGT

diff -r 000000000000 -r 269c627ae9f4 test-data/test-3.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-3.bed Wed Jun 20 11:06:57 2018 -0400

@@ -0,0 +1,2 @@
+mychr 0 4 mychr_0_4_for 4 + ACTG
+mychr 5 9 mychr_5_9_for 4 + actg

diff -r 000000000000 -r 269c627ae9f4 test-data/test-4.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-4.bed Wed Jun 20 11:06:57 2018 -0400

[

@@ -0,0 +1,3 @@
+mychr 0 4 mychr_0_4_for 4 + ACT[3,4]
+mychr 5 9 mychr_5_9_for 4 + act[3,4]
+mychr 10 14 mychr_10_14_rev 4 - CAG[3,4]

diff -r 000000000000 -r 269c627ae9f4 test-data/test.fas
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fas Wed Jun 20 11:06:57 2018 -0400

@@ -0,0 +1,2 @@
+>mychr
+ACTGnactgnCAGTnTGAC