Repository 'fasta_regex_finder'
hg clone https://toolshed.g2.bx.psu.edu/repos/mbernt/fasta_regex_finder

Changeset 0:269c627ae9f4 (2018-06-20)
Next changeset 1:9a811adb714f (2023-01-25)
Commit message:
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/fasta_regex_finder commit 8e118a4d24047e2c62912b962e854f789d6ff559
added:
fastaregexfinder.py
fastaregexfinder.xml
test-data/TestSeqGroup-G4-sub.bed
test-data/TestSeqGroup-G4.bed
test-data/TestSeqGroup-G4.fasta
test-data/test-1.bed
test-data/test-2.bed
test-data/test-3.bed
test-data/test-4.bed
test-data/test.fas
b
diff -r 000000000000 -r 269c627ae9f4 fastaregexfinder.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaregexfinder.py Wed Jun 20 11:06:57 2018 -0400
[
b'@@ -0,0 +1,254 @@\n+#!/usr/bin/env python\n+\n+import re\n+import sys\n+import string\n+import argparse\n+import operator\n+\n+VERSION=\'0.1.1\'\n+\n+parser = argparse.ArgumentParser(description="""\n+\n+DESCRIPTION\n+    \n+    Search a fasta file for matches to a regex and return a bed file with the\n+    coordinates of the match and the matched sequence itself. \n+    \n+    Output bed file has columns:\n+    1. Name of fasta sequence (e.g. chromosome)\n+    2. Start of the match\n+    3. End of the match\n+    4. ID of the match\n+    5. Length of the match\n+    6. Strand \n+    7. Matched sequence as it appears on the forward strand\n+    \n+    For matches on the reverse strand it is reported the start and end position on the\n+    forward strand and the matched string on the forward strand (so the G4 \'GGGAGGGT\'\n+    present on the reverse strand is reported as ACCCTCCC).\n+    \n+    Note: Fasta sequences (chroms) are read in memory one at a time along with the\n+    matches for that chromosome.\n+    The order of the output is: chroms as they are found in the inut fasta, matches\n+    sorted within chroms by positions.\n+\n+EXAMPLE:\n+    ## Test data:\n+    echo \'>mychr\' > /tmp/mychr.fa\n+    echo \'ACTGnACTGnACTGnTGAC\' >> /tmp/mychr.fa\n+    \n+    fastaRegexFinder.py -f /tmp/mychr.fa -r \'ACTG\'\n+        mychr\t0\t4\tmychr_0_4_for\t4\t+\tACTG\n+        mychr\t5\t9\tmychr_5_9_for\t4\t+\tACTG\n+        mychr\t10\t14\tmychr_10_14_for\t4\t+\tACTG\n+\n+    fastaRegexFinder.py -f /tmp/mychr.fa -r \'ACTG\' --maxstr 3\n+        mychr\t0\t4\tmychr_0_4_for\t4\t+\tACT[3,4]\n+        mychr\t5\t9\tmychr_5_9_for\t4\t+\tACT[3,4]\n+        mychr\t10\t14\tmychr_10_14_for\t4\t+\tACT[3,4]\n+    \n+    less /tmp/mychr.fa | fastaRegexFinder.py -f - -r \'A\\w\\wGn\'\n+        mychr\t0\t5\tmychr_0_5_for\t5\t+\tACTGn\n+        mychr\t5\t10\tmychr_5_10_for\t5\t+\tACTGn\n+        mychr\t10\t15\tmychr_10_15_for\t5\t+\tACTGn\n+\n+DOWNLOAD\n+    fastaRegexFinder.py is hosted at https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder\n+\n+    """, formatter_class= argparse.RawTextHelpFormatter)\n+\n+parser.add_argument(\'--fasta\', \'-f\',\n+                   type= str,\n+                   help=\'\'\'Input fasta file to search. Use \'-\' to read the file from stdin.\n+                                   \n+                   \'\'\',\n+                   required= True)\n+\n+parser.add_argument(\'--regex\', \'-r\',\n+                   type= str,\n+                   help=\'\'\'Regex to be searched in the fasta input.\n+Matches to the reverse complement will have - strand.\n+The default regex is \'([gG]{3,}\\w{1,7}){3,}[gG]{3,}\' which searches\n+for G-quadruplexes.                                   \n+                   \'\'\',\n+                   default= \'([gG]{3,}\\w{1,7}){3,}[gG]{3,}\')\n+\n+parser.add_argument(\'--matchcase\', \'-m\',\n+                   action= \'store_true\',\n+                   help=\'\'\'Match case while searching for matches. Default is\n+to ignore case (I.e. \'ACTG\' will match \'actg\').\n+                   \'\'\')\n+\n+parser.add_argument(\'--noreverse\',\n+                   action= \'store_true\',\n+                   help=\'\'\'Do not search the reverse complement of the input fasta.\n+Use this flag to search protein sequences.                                   \n+                   \'\'\')\n+\n+parser.add_argument(\'--maxstr\',\n+                   type= int,\n+                   required= False,\n+                   default= 10000,\n+                   help=\'\'\'Maximum length of the match to report in the 7th column of the output.\n+Default is to report up to 10000nt.\n+Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]\n+                   \'\'\')\n+\n+parser.add_argument(\'--seqnames\', \'-s\',\n+                   type= str,\n+                   nargs= \'+\',\n+                   default= [None],\n+                   required= False,\n+\t\t   help=\'\'\'List of fasta sequences in --fasta to\n+search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes.\n+Default is to search all the sequences in input.\n+                   \'\'\')\n+parser.add_argument(\'--quiet\', \'-q\',\n+             '..b'+        table: a list of lists (or tuple of tuples) where each inner list \n+               represents a row\n+        cols:  a list (or tuple) specifying the column numbers to sort by\n+               e.g. (1,0) would sort by column 1, then by column 0\n+    """\n+    for col in reversed(cols):\n+        table = sorted(table, key=operator.itemgetter(col))\n+    return(table)\n+\n+def trimMatch(x, n):\n+    """ Trim the string x to be at most length n. Trimmed matches will be reported\n+    with the syntax ACTG[a,b] where Ns are the beginning of x, a is the length of\n+    the trimmed strng (e.g 4 here) and b is the full length of the match\n+    EXAMPLE:\n+        trimMatch(\'ACTGNNNN\', 4)\n+        >>>\'ACTG[4,8]\'\n+        trimMatch(\'ACTGNNNN\', 8)\n+        >>>\'ACTGNNNN\'\n+    """\n+    if len(x) > n and n is not None:\n+        m= x[0:n] + \'[\' + str(n) + \',\' + str(len(x)) + \']\'\n+    else:\n+        m= x\n+    return(m)\n+\n+def revcomp(x):\n+    """Reverse complement string x. Ambiguity codes are handled and case conserved.\n+    \n+    Test\n+    x= \'ACGTRYSWKMBDHVNacgtryswkmbdhvn\'\n+    revcomp(x)\n+    """\n+    compdict=  {\'A\':\'T\',\n+                \'C\':\'G\',\n+                \'G\':\'C\',\n+                \'T\':\'A\',\n+                \'R\':\'Y\',\n+                \'Y\':\'R\',\n+                \'S\':\'W\',\n+                \'W\':\'S\',\n+                \'K\':\'M\',\n+                \'M\':\'K\',\n+                \'B\':\'V\',\n+                \'D\':\'H\',\n+                \'H\':\'D\',\n+                \'V\':\'B\',\n+                \'N\':\'N\',\n+                \'a\':\'t\',\n+                \'c\':\'g\',\n+                \'g\':\'c\',\n+                \'t\':\'a\',\n+                \'r\':\'y\',\n+                \'y\':\'r\',\n+                \'s\':\'w\',\n+                \'w\':\'s\',\n+                \'k\':\'m\',\n+                \'m\':\'k\',\n+                \'b\':\'v\',\n+                \'d\':\'h\',\n+                \'h\':\'d\',\n+                \'v\':\'b\',\n+                \'n\':\'n\'}\n+    xrc= []\n+    for n in x:\n+        xrc.append(compdict[n])\n+    xrc= \'\'.join(xrc)[::-1]\n+    return(xrc)\n+# -----------------------------------------------------------------------------\n+\n+psq_re_f= re.compile(args.regex, flags= flag)\n+## psq_re_r= re.compile(regexrev)\n+\n+if args.fasta != \'-\':\n+    ref_seq_fh= open(args.fasta)\n+else:\n+    ref_seq_fh= sys.stdin    \n+\n+ref_seq=[]\n+line= (ref_seq_fh.readline()).strip()\n+chr= re.sub(\'^>\', \'\', line)\n+line= (ref_seq_fh.readline()).strip()\n+gquad_list= []\n+while True:\n+    if not args.quiet:\n+        sys.stderr.write(\'Processing %s\\n\' %(chr))\n+    while line.startswith(\'>\') is False:\n+        ref_seq.append(line)\n+        line= (ref_seq_fh.readline()).strip()\n+        if line == \'\':\n+            break\n+    ref_seq= \'\'.join(ref_seq)\n+    if args.seqnames == [None] or chr in args.seqnames:\n+        for m in re.finditer(psq_re_f, ref_seq):\n+            matchstr= trimMatch(m.group(0), args.maxstr)\n+            quad_id= str(chr) + \'_\' + str(m.start()) + \'_\' + str(m.end()) + \'_for\'\n+            gquad_list.append([chr, m.start(), m.end(), quad_id, len(m.group(0)), \'+\', matchstr])\n+        if args.noreverse is False:\n+            ref_seq= revcomp(ref_seq)\n+            seqlen= len(ref_seq)\n+            for m in re.finditer(psq_re_f, ref_seq):\n+                matchstr= trimMatch(revcomp(m.group(0)), args.maxstr)\n+                mstart= seqlen - m.end()\n+                mend= seqlen - m.start()\n+                quad_id= str(chr) + \'_\' + str(mstart) + \'_\' + str(mend) + \'_rev\'\n+                gquad_list.append([chr, mstart, mend, quad_id, len(m.group(0)), \'-\', matchstr])\n+        gquad_sorted= sort_table(gquad_list, (1,2,3))\n+        gquad_list= []\n+        for xline in gquad_sorted:\n+            xline= \'\\t\'.join([str(x) for x in xline])\n+            print(xline)\n+    chr= re.sub(\'^>\', \'\', line)\n+    ref_seq= []\n+    line= (ref_seq_fh.readline()).strip()\n+    if line == \'\':\n+        break\n+\n+#gquad_sorted= sort_table(gquad_list, (0,1,2,3))\n+#\n+#for line in gquad_sorted:\n+#    line= \'\\t\'.join([str(x) for x in line])\n+#    print(line)\n+sys.exit()\n'
b
diff -r 000000000000 -r 269c627ae9f4 fastaregexfinder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaregexfinder.xml Wed Jun 20 11:06:57 2018 -0400
[
@@ -0,0 +1,161 @@
+<tool id="fasta_regex_finder" name="fasta_regex_finder" version="0.1.0">
+    <description>
+        Search in fasta for regexp match
+    </description>
+    <requirements>
+    </requirements>
+    <version_command>python $__tool_directory__/fastaregexfinder.py --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+python $__tool_directory__/fastaregexfinder.py 
+--fasta "$input"
+--regex "$regex"
+#if $settings.advanced == "advanced"
+    $settings.matchcase
+    $settings.noreverse
+    --maxstr $settings.maxstr
+    #if $settings.seqnames != ""
+        --seqnames $settings.seqnames
+    #end if
+#end if
+--quiet
+> $output
+    ]]></command>
+    <inputs>
+        <param type="data" name="input" format="fasta" />
+        <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+                <mapping initial="none">
+                    <add source="&apos;" target="__sq__"/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <conditional name="settings">
+            <param name="advanced" type="select" label="Specify advanced parameters">
+                <option value="simple" selected="true">No, use program defaults.</option>
+                <option value="advanced">Yes, see full parameter list.</option>
+            </param>
+            <when value="simple">
+            </when>
+            <when value="advanced">
+                <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" />
+                <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" />
+                <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" />
+                <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <output name="output" file="TestSeqGroup-G4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <output name="output" file="test-1.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="matchcase" value="--matchcase"/>
+            <output name="output" file="test-2.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="noreverse" value="--noreverse"/>
+            <output name="output" file="test-3.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="maxstr" value="3"/>
+            <output name="output" file="test-4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <param name="advanced" value="advanced"/>
+            <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/>
+            <output name="output" file="TestSeqGroup-G4-sub.bed"/>
+        </test>
+</tests>
+    <help><![CDATA[
+DESCRIPTION
+    
+Search a fasta file for matches to a regular expression and return a bed file with the
+coordinates of the match and the matched sequence itself. 
+    
+Output bed file has columns:
+
+1. Name of fasta sequence (e.g. chromosome)
+2. Start of the match
+3. End of the match
+4. ID of the match
+5. Length of the match
+6. Strand 
+7. Matched sequence as it appears on the forward strand
+    
+For matches on the reverse strand it is reported the start and end position on the
+forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT'
+present on the reverse strand is reported as ACCCTCCC).
+    
+
+Note: Fasta sequences (chroms) are read in memory one at a time along with the
+matches for that chromosome.
+The order of the output is: chroms as they are found in the inut fasta, matches
+sorted within chroms by positions.
+
+ARGUMENTS:
+
+- regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes.
+- matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg').
+- noreverse           Do not search the reverse complement of the input fasta. Use this flag to search protein sequences.                                   
+- maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]
+- seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input.
+
+EXAMPLE:
+
+Test data::
+>mychr
+ACTGnACTGnACTGnTGAC
+
+Example1 regex=ACTG::
+
+        mychr 0 4 mychr_0_4_for 4 + ACTG
+        mychr 5 9 mychr_5_9_for 4 + ACTG
+        mychr 10 14 mychr_10_14_for 4 + ACTG
+
+Example2 regex=ACTG maxstr=3::
+
+        mychr 0 4 mychr_0_4_for 4 + ACT[3,4]
+        mychr 5 9 mychr_5_9_for 4 + ACT[3,4]
+        mychr 10 14 mychr_10_14_for 4 + ACT[3,4]
+    
+Example3 regex=A\w\wG::
+
+        mychr 0 5 mychr_0_5_for 5 + ACTGn
+        mychr 5 10 mychr_5_10_for 5 + ACTGn
+        mychr 10 15 mychr_10_15_for 5 + ACTGn
+
+   ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubfastaRegexFinder,
+  author = {Dario Beraldi},
+  year = {2017},
+  title = {fastaRegexFinder},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder},
+}</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 269c627ae9f4 test-data/TestSeqGroup-G4-sub.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4-sub.bed Wed Jun 20 11:06:57 2018 -0400
b
@@ -0,0 +1,2 @@
+HJ24-Shp2_oncogenicProtein 17 58 HJ24-Shp2_oncogenicProtein_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein2 17 58 HJ24-Shp2_oncogenicProtein2_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
b
diff -r 000000000000 -r 269c627ae9f4 test-data/TestSeqGroup-G4.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4.bed Wed Jun 20 11:06:57 2018 -0400
b
@@ -0,0 +1,3 @@
+HJ24-Shp2_oncogenicProtein 17 58 HJ24-Shp2_oncogenicProtein_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein2 17 58 HJ24-Shp2_oncogenicProtein2_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein3 17 58 HJ24-Shp2_oncogenicProtein3_17_58_for 41 + GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
b
diff -r 000000000000 -r 269c627ae9f4 test-data/TestSeqGroup-G4.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4.fasta Wed Jun 20 11:06:57 2018 -0400
b
@@ -0,0 +1,56 @@
+>PA#2/8
+ATACCAGCTTATTCAATTAGCAACATGAGGGGGATAGAGGGGGTGGGTTCTCTCGGCTACAATCGTAATCAGTTAG
+>PA#14/89
+ATACCAGCTTATTCAATTGGGCACCACGGGAGTCGGCCACATTTGGAGTTGTTTTTGCACAATCGTAATCAGTTAG
+>PA-C10
+ATACCAGCTTATTCAATTGCGCACCACGGGAGTTGGCCACATTTGGAGTTGTTTTTGCACAATCGTAATCAGTTAG
+>PA#4/22
+ATACCAGCTTATTCAATTGCAGTACTGATGAGTGTAGCCGTATGATTATCGTTTGTGGACAATCGTAATCAGTTAG
+>PA#4/34
+ATACCAGCTTATTCAATTCCCCAACGAGTCGATATGTAGCCCACACTCTGATTCGTCCACAATCGTAATCAGTTAG
+>PA#2/11
+ATACCAGCTTATTCAATTGGAGACGACAAACTATTACGTACTACGGCATGCACTTGGTACAATCGTAATCAGTTAG
+>PA-C8
+ATACCAGCTTATTCAATTAGGCCAGATGAGGGGTGCCCATGGCGGGGTGGCTGCTCCAACAATCGTAATCAGTTAG
+>PA#14/82
+ATACCAGCTTATTCAATTCCACAACCGAACTCGTAAGACGTATGTAGCCGCCAACTGTACAATCGTAATCAGTTAG
+>PA#2/3
+ATACCAGCTTATTCAATTCGACAAGTGGGCATTACGATTCTAGCCCTGATTATGTTCCACAATCGTAATCAGTTAG
+>PA-C9
+ATACCAGCTTATTCAATTACCGAGGAGATAACGTTGTAGCCGTCCATCATCTGATTCGACAATCGTAATCAGTTAG
+>PA#2/6
+ATACCAGCTTATTCAATTACCGATCACTAGCCGACTAATTGGTTTCCGATCGCAGTCCACAATCGTAATCAGTTAG
+>PA-C11
+ATACCAGCTTATTCAATTCGATGGAGCTGATGATTGTTGCCGATCTGACTGTTGTTCCACAATCGTAATCAGTTAG
+>PA-C13
+ATACCAGCTTATTCAATTCCCCTAACGTTACTGGATGTAGTCCGACTAACTTATGCGTACAATCGTAATCAGTTAG
+>PA-C42
+ATACCAGCTTATTCAATTGCAGATTACGCCTTGTAGCCCGCACTGATCTCGATGTTTGGACAATCGTAATCAGTTAG
+>PA-C16
+ATACCAGCTTATTCAATTCCCACGAGTGTAGCCGATTCTTCTGTACTCTTGTCCTCGTACAATCGTAATCAGTTAG
+>PA-C15
+ATACCAGCTTATTCAATTACGTGTTGTAGCCGACCCCTGTTGATTGTTTTCCTGTACCACAATCGTAATCAGTTAG
+>EA#14.3
+ATACCAGCTTATTCAATTTGAGGCGGGTGGGTGGGTTGAATACGCTGATTACCCCATCGGAGAACGTTAAGGCGCTTCAGATAGTAAGTGCAATCT 
+>EA#9.4
+ATACCAGCTTATTCAATTGCTGCGAGGTGGGTGGGTGGGAGCAATTGATCCTCGCTTAGCTTCTACGGTGGGCTATCTAGATAGTAAGTGCAATCT
+>EA#5.10
+ATACCAGCTTATTCAATTCACCACACCTGCACCCCTGACTTCCCACTTATATCTACTACTCCGTCTCAAGCCCGTTTGAGATAGTAAGTGCAATCT
+>EA#14.5
+ATACCAGCTTATTCAATTCCGAGTTTGGGTGGGAGTGGTGGGTTCGGAATTGTTAGTTATTTGGGTTTATGCGAGGTGAGATAGTAAGTGCAATCT
+>EA#14.8
+ATACCAGCTTATTCAATTGACGGGGTGTTGTCGTATGCTGTAGAAGCCGTAATTTTTTTTGTTTTCCCTGCCCACCTAGATAGTAAGTGCAATCT
+>EA#14.4
+ATACCAGCTTATTCAATTCCACAGGTTGTATGGGGAATAAGGTGGGTGCGCGAGATAGTAAGTGCAATCT
+>EA#11.5
+ATACCAGCTTATTCAATTCCCACACCCTAACCGTAGAGCTAAGCTTTTCTTACTACTGACAGTGCTTTACCGTTTGCAAGATAGTAAGTGCAATCT
+>HJ24-Shp2_oncogenicProtein
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>HJ24-Shp2_oncogenicProtein2
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>HJ24-Shp2_oncogenicProtein3
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>RT6-HIVRT
+ATCCGCCTGATTAGCGATACTCAGGCGTTAGGGAAGGGCGTCGAAAGCAGGGTGGGACTTGAGCAAAATCACCTGCAGGGG
+>AptG4-HumanRNaseH1
+CGGTCGCTCCGTGTGGCTTGGGTTGGGTGTGGCAGTGAC
b
diff -r 000000000000 -r 269c627ae9f4 test-data/test-1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-1.bed Wed Jun 20 11:06:57 2018 -0400
b
@@ -0,0 +1,3 @@
+mychr 0 4 mychr_0_4_for 4 + ACTG
+mychr 5 9 mychr_5_9_for 4 + actg
+mychr 10 14 mychr_10_14_rev 4 - CAGT
b
diff -r 000000000000 -r 269c627ae9f4 test-data/test-2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-2.bed Wed Jun 20 11:06:57 2018 -0400
b
@@ -0,0 +1,2 @@
+mychr 0 4 mychr_0_4_for 4 + ACTG
+mychr 10 14 mychr_10_14_rev 4 - CAGT
b
diff -r 000000000000 -r 269c627ae9f4 test-data/test-3.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-3.bed Wed Jun 20 11:06:57 2018 -0400
b
@@ -0,0 +1,2 @@
+mychr 0 4 mychr_0_4_for 4 + ACTG
+mychr 5 9 mychr_5_9_for 4 + actg
b
diff -r 000000000000 -r 269c627ae9f4 test-data/test-4.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-4.bed Wed Jun 20 11:06:57 2018 -0400
[
@@ -0,0 +1,3 @@
+mychr 0 4 mychr_0_4_for 4 + ACT[3,4]
+mychr 5 9 mychr_5_9_for 4 + act[3,4]
+mychr 10 14 mychr_10_14_rev 4 - CAG[3,4]
b
diff -r 000000000000 -r 269c627ae9f4 test-data/test.fas
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fas Wed Jun 20 11:06:57 2018 -0400
b
@@ -0,0 +1,2 @@
+>mychr
+ACTGnactgnCAGTnTGAC