Repository 'quality_filter'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/quality_filter

Changeset 0:8d65bbc52dfe (2014-04-01)
Next changeset 1:64f5401327e8 (2014-10-30)
Commit message:
Imported from capsule None
added:
quality_filter.py
quality_filter.xml
test-data/6.maf
test-data/6_quality_filter.maf
tool_dependencies.xml
b
diff -r 000000000000 -r 8d65bbc52dfe quality_filter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/quality_filter.py Tue Apr 01 10:52:42 2014 -0400
[
b'@@ -0,0 +1,238 @@\n+#!/usr/bin/env python\n+#Guruprasad Ananda\n+"""\n+Filter based on nucleotide quality (PHRED score).\n+\n+usage: %prog input out_file primary_species mask_species score mask_char mask_region mask_region_length\n+"""\n+\n+\n+from __future__ import division\n+from galaxy import eggs\n+import pkg_resources\n+pkg_resources.require( "lrucache" )\n+import numpy\n+\n+import sys\n+import os, os.path\n+from UserDict import DictMixin\n+from bx.binned_array import FileBinnedArray\n+from bx.bitset import *\n+from bx.bitset_builders import *\n+from bx.cookbook import doc_optparse\n+from galaxy.tools.exception_handling import *\n+import bx.align.maf\n+\n+class FileBinnedArrayDir( DictMixin ):\n+    """\n+    Adapter that makes a directory of FileBinnedArray files look like\n+    a regular dict of BinnedArray objects.\n+    """\n+    def __init__( self, dir ):\n+        self.dir = dir\n+        self.cache = dict()\n+    def __getitem__( self, key ):\n+        value = None\n+        if key in self.cache:\n+            value = self.cache[key]\n+        else:\n+            fname = os.path.join( self.dir, "%s.qa.bqv" % key )\n+            if os.path.exists( fname ):\n+                value = FileBinnedArray( open( fname ) )\n+                self.cache[key] = value\n+        if value is None:\n+            raise KeyError( "File does not exist: " + fname )\n+        return value\n+\n+def stop_err(msg):\n+    sys.stderr.write(msg)\n+    sys.exit()\n+\n+def load_scores_ba_dir( dir ):\n+    """\n+    Return a dict-like object (keyed by chromosome) that returns\n+    FileBinnedArray objects created from "key.ba" files in `dir`\n+    """\n+    return FileBinnedArrayDir( dir )\n+\n+def bitwise_and ( string1, string2, maskch ):\n+    result = []\n+    for i, ch in enumerate(string1):\n+        try:\n+            ch = int(ch)\n+        except:\n+            pass\n+        if string2[i] == \'-\':\n+            ch = 1\n+        if ch and string2[i]:\n+            result.append(string2[i])\n+        else:\n+            result.append(maskch)\n+    return \'\'.join(result)\n+\n+def main():\n+    # Parsing Command Line here\n+    options, args = doc_optparse.parse( __doc__ )\n+    \n+    try:\n+        #chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )\n+        inp_file, out_file, pri_species, mask_species, qual_cutoff, mask_chr, mask_region, mask_length, loc_file = args\n+        qual_cutoff = int(qual_cutoff)\n+        mask_chr = int(mask_chr)\n+        mask_region = int(mask_region)\n+        if mask_region != 3:\n+            mask_length = int(mask_length)\n+        else:\n+            mask_length_r = int(mask_length.split(\',\')[0])\n+            mask_length_l = int(mask_length.split(\',\')[1])\n+    except:\n+        stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )\n+    \n+    if pri_species == \'None\':\n+        stop_err( "No primary species selected, try again by selecting at least one primary species." )\n+    if mask_species == \'None\':\n+        stop_err( "No mask species selected, try again by selecting at least one species to mask." )\n+\n+    mask_chr_count = 0\n+    mask_chr_dict = {0:\'#\', 1:\'$\', 2:\'^\', 3:\'*\', 4:\'?\', 5:\'N\'}\n+    mask_reg_dict = {0:\'Current pos\', 1:\'Current+Downstream\', 2:\'Current+Upstream\', 3:\'Current+Both sides\'}\n+\n+    #ensure dbkey is present in the twobit loc file\n+    try:\n+        pspecies_all = pri_species.split(\',\')\n+        pspecies_all2 = pri_species.split(\',\')\n+        pspecies = []\n+        filepaths = []\n+        for line in open(loc_file):\n+            if pspecies_all2 == []:\n+                break\n+            if line[0:1] == "#":\n+                continue\n+            fields = line.split(\'\\t\')\n+            try:\n+                build = fields[0]\n+                for i, dbkey in enumerate(pspecies_all2):\n+                    if dbkey == build:\n+                        pspecies.append(build)\n+                        filepaths.append(fields[1])\n+                        del pspecies_all2[i]\n+  '..b'ex(dbkey)\n+                sequence = block.components[seq].text\n+                s_start = block.components[seq].start\n+                size = len(sequence)    #this includes the gaps too\n+                status_str = \'1\'*size\n+                status_list = list(status_str)\n+                if status_strings == []:\n+                    status_strings.append(status_str)\n+                ind = 0\n+                s_end = block.components[seq].end\n+                #Get scores for the entire sequence\n+                try:\n+                    scores = scores_by_chrom[index][chr][s_start:s_end]\n+                except:\n+                    continue\n+                pos = 0\n+                while pos < (s_end-s_start):\n+                    if sequence[ind] == \'-\':  #No score for GAPS\n+                        ind += 1\n+                        continue\n+                    score = scores[pos]\n+                    if score < qual_cutoff:\n+                        score = 0\n+                        \n+                    if not(score):\n+                        if mask_region == 0:    #Mask Corresponding position only\n+                            status_list[ind] = \'0\'\n+                            ind += 1\n+                            pos += 1\n+                        elif mask_region == 1:    #Mask Corresponding position + downstream neighbors\n+                            for n in range(mask_length+1):\n+                                try:\n+                                    status_list[ind+n] = \'0\'\n+                                except:\n+                                    pass\n+                            ind = ind + mask_length + 1\n+                            pos = pos + mask_length + 1\n+                        elif mask_region == 2:    #Mask Corresponding position + upstream neighbors\n+                            for n in range(mask_length+1):\n+                                try:\n+                                    status_list[ind-n] = \'0\'\n+                                except:\n+                                    pass\n+                            ind += 1\n+                            pos += 1\n+                        elif mask_region == 3:    #Mask Corresponding position + neighbors on both sides\n+                            for n in range(-mask_length_l, mask_length_r+1):\n+                                try:\n+                                    status_list[ind+n] = \'0\'\n+                                except:\n+                                    pass\n+                            ind = ind + mask_length_r + 1\n+                            pos = pos + mask_length_r + 1\n+                    else:\n+                        pos += 1\n+                        ind += 1\n+                    \n+                status_strings.append(\'\'.join(status_list))\n+        \n+        if status_strings == []:    #this block has no primary species\n+            continue\n+        output_status_str = status_strings[0]\n+        for stat in status_strings[1:]:\n+            try:\n+                output_status_str = bitwise_and (status_strings[0], stat, \'0\')\n+            except Exception, e:\n+                break\n+            \n+        for seq in range (len(block.components)):\n+            src = block.components[seq].src\n+            dbkey = src.split(\'.\')[0]\n+            if dbkey not in mask_species.split(\',\'):\n+                continue\n+            sequence = block.components[seq].text\n+            sequence = bitwise_and (output_status_str, sequence, mask_chr_dict[mask_chr])\n+            block.components[seq].text = sequence\n+            mask_chr_count += output_status_str.count(\'0\')\n+        maf_writer.write(block)\n+        maf_count += 1\n+        \n+    maf_reader.close()\n+    maf_writer.close()\n+    print "No. of blocks = %d; No. of masked nucleotides = %s; Mask character = %s; Mask region = %s; Cutoff used = %d" % (maf_count, mask_chr_count, mask_chr_dict[mask_chr], mask_reg_dict[mask_region], qual_cutoff)\n+\n+\n+if __name__ == "__main__":\n+    main()\n'
b
diff -r 000000000000 -r 8d65bbc52dfe quality_filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/quality_filter.xml Tue Apr 01 10:52:42 2014 -0400
b
@@ -0,0 +1,119 @@
+<tool id="qualityFilter" name="Filter nucleotides" version="1.0.1">
+  <description> based on quality scores</description>
+  <requirements>
+    <requirement type="package" version="0.7.1">bx-python</requirement>
+    <requirement type="package" version="1.7.1">numpy</requirement>
+  </requirements>
+  <command interpreter="python">
+   quality_filter.py 
+   $input 
+   $out_file1 
+   $primary_species 
+   $mask_species 
+   $score 
+   $mask_char 
+   ${mask_region.region} 
+   #if $mask_region.region == "3"
+   ${mask_region.lengthr},${mask_region.lengthl}
+   #elif $mask_region.region == "0"
+   1
+   #else
+   ${mask_region.length}
+ #end if
+   ${GALAXY_DATA_INDEX_DIR}/quality_scores.loc
+  </command>
+  <inputs>
+    <param format="maf" name="input" type="data" label="Select data"/>
+    <param name="primary_species" type="select" label="Use quality scores of" display="checkboxes" multiple="true">
+      <options>
+        <filter type="data_meta" ref="input" key="species" />
+      </options>  
+    </param>
+ <param name="mask_species" type="select" label="Mask Species" display="checkboxes" multiple="true">
+      <options>
+        <filter type="data_meta" ref="input" key="species" />
+      </options>  
+ </param>
+ <param name="score" size="10" type="integer" value="20" label="Quality score cut-off" help="Cut-off value of 20 means mask all nucleotides having quality score less than or equal to 20"/>
+ <param name="mask_char" size="5" type="select" label="Mask character">
+      <option value="0" selected="true">#</option>
+      <option value="1">$</option>
+      <option value="2">^</option>
+      <option value="3">*</option>
+      <option value="4">?</option>
+      <option value="5">N</option>
+    </param>
+ <conditional name="mask_region">
+      <param name="region" type="select" label="Mask region">
+        <option value="0" selected="true">Only the corresponding nucleotide </option>
+        <option value="1">Corresponding column + right-side neighbors</option>
+        <option value="2">Corresponding column + left-side neighbors</option>
+        <option value="3">Corresponding column + neighbors on both sides</option>
+      </param>
+      <when value="0">
+      </when>
+      <when value="1">
+        <param name="length" size="10" type="integer" value="2" label="Number of right-side neighbors"/>
+      </when>
+      <when value="2">
+        <param name="length" size="10" type="integer" value="2" label="Number of left-side neighbors"/>
+      </when>
+      <when value="3">
+        <param name="lengthr" size="10" type="integer" value="2" label="Number of neighbors on right-side" />
+        <param name="lengthl" size="10" type="integer" value="2" label="Number of neighbors on left-side" />
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="maf" name="out_file1" metadata_source="input"/>
+  </outputs>
+  <requirements>
+    <requirement type="python-module">numpy</requirement>
+  </requirements>
+  <tests>
+    <test>
+      <param name="input" value="6.maf"/>
+      <param name="primary_species" value="panTro2"/>
+      <param name="mask_species" value="hg18"/>
+      <param name="score" value="50"/>
+      <param name="mask_char" value="0"/>
+      <param name="region" value="0" />
+      <output name="out_file1" file="6_quality_filter.maf"/>
+    </test>
+  </tests>
+ <help> 
+
+.. class:: infomark
+
+**What it does**
+
+This tool takes a MAF file as input and filters nucleotides in every alignment block of the MAF file based on their quality/PHRED scores. 
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+Any block/s not containing the primary species (species whose quality scores is to be used), will be omitted. 
+Also, any primary species whose quality scores are not available in Galaxy will be considered as a non-primary species. This info will appear as a message in the job history panel. 
+
+-----
+
+**Example**
+
+- For the following alignment block::
+
+   a score=4050.0
+   s hg18.chrX    3719221 48 - 154913754 tattttacatttaaaataaatatgtaaatatatattttatatttaaaa 
+   s panTro2.chrX 3560945 48 - 155361357 tattttatatttaaaataaagatgtaaatatatattttatatttaaaa 
+
+- running this tool with **Primary species as panTro2**, **Mask species as hg18, panTro2**, **Quality cutoff as 20**, **Mask character as #** and **Mask region as only the corresponding position** will return::
+
+   a score=4050.0
+   s hg18.chrX    3719221 48 - 154913754 ###tttac#####a###a#atatgtaaat###tattt#####ttaaaa 
+   s panTro2.chrX 3560945 48 - 155361357 ###tttat#####a###a#agatgtaaat###tattt#####ttaaaa 
+   
+   where, the positions containing # represent panTro2 nucleotides having quality scores less than 20.
+  </help>  
+</tool>
b
diff -r 000000000000 -r 8d65bbc52dfe test-data/6.maf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/6.maf Tue Apr 01 10:52:42 2014 -0400
b
b'@@ -0,0 +1,57 @@\n+##maf version=1\n+a score=356676.0\n+s hg18.chr1      2817 1438 + 247249719 GTCATCCCCTTCACTCCCAGCTCAGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAAGCCTGGCTGCCTCCAGCTGGGTCGACAGACAGGGGCTGGAGAAGGGGAGAAGAGGAAAGTGAGGTTGCCTGCCCTGTCTCCTACCTGAGGCTGAGGAAGGAGAAGGGGATGCACTGTTGGGGAGGCAGCTGTAACTCAAAGCCTTAGCCTCTGTTCCCACGAAGGCAGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACACGCTGTTGGCCTGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAAGTTCACTCCTGCCTTTTCCTTTCCCTAGAGCCTCCACCACCCCGAGATCACATTTCTCACTGCCTTTTGTCTGCCCAGTTTCACCAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGAATCCCGAAGAAATGGTGGGTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCCTCTGTGGAATCCAATCTGTCTTCCATCCTGCGTGGCCGAGGGCCAGGCTTCTCACTGGGCCTCTGCAGGAGGCTGCCATT--TGTCCTGCCCACCTTCTTAGAAGCGAGACGGAGCAGACCCATCTGCTACTGCCCTTTCTATAATAACTAAAGTTAGCTGCCCTGGACTATTCACCCCCTAGTCTCAATTTAAGAAGATCCCCATGGCCACAGGGCCCCTGCCTGGGGGCTTGTCACCTCCCCCACCTTCTTCCTGAGTCATTCCTGCAGCCTTGCTCCCTAACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCTCTCAACCACTTGAGCAAACTCCAAGACATCTTCTACCCCAACACCAGCAATTGTGCCAAGGGCCATTAGGCTCTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTTTTTTGTGGGAGACTATTCCTCCCATCTGCAACAGCTGCCCCTGCTGACTGCCCTTCTCTCCTCCCTCTCATCCCAGAGAAACAGGTCAGCTGGGAGCTTCTGCCCCCACTGCCTAGGGACCAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCATCCTGCACAGCTAGAGATCCTTTATTAAAAG \n+s panTro2.chr15 16051 1440 - 100063422 GTCATCCCCTTCACTCCCAGCTCTGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAAGCCTGGCTGCCTCCAGCTGGGTGGACAGACAGGCGCTGGAGAAGGGGAGAAGAGGAAAGGGGGGCTGCCTGCCCTGTCTCCCACCTGAGGCTGAGGAAGGAGAGGGGGATGCACTGTTGGGGAGGCAGCTGTAACTCAAAGCCCTAGCCTCTGTTCCCACGAAGGCAGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAATGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACAAGCTGTTGGCCTGGATCTGAGCCCTCGTGGAGCTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAAGTTCAGTCCTGCCTTTTCCTTTCCCGAGAGGCTCCACCACCCCGAGATCGCACTTCTCACTGCCTTTTGTCTGCCCAGTTTCACCAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGCGCCCTTCCTTTGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGAATCCCAAAGAAATGGTGAGTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCCTCTGTGGAATCCAATCTGTCTTCCATCCTGCGTGGCAGAGGGCCAGGCTTCTCACTGGGCCTCTGCAGGAGGCTGCCATTTCTGTCCTGCCCACCTTCTAAGAAGCGAGACGGAGGACACCCATCTGCTTCTGCCCTTTCCATAATAACTAAAGTTAGCTGCCCTGGACTATTCACCCCCTAGTCTCTATTTAAAAAGATCCCCATGGCCACAGGGCCCCTGCCTGGGGGCTTGTCACCTCCCCCACCTTCTTCCTGAGTCACTTCTGCAGCCTTGCTCCCTAACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCTCTCAACTACTTGAGCAAACTTGAAGACATCTTCAACCCCAACACCAGCAATTGTGCCAAGGGCCATTAGGCTCTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTTTTTCGTGGGAGACTATTCCTCCCATCTGCAACAGCTGCCCCTGCTGACTGCCCTTCTCTCCTCCCTCTCATCCCAGAGAAACAGGTCAGCTGGGAGCTTCTGCCCCCACTGCCTAGGCACCAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCATCCTGCACAGCTAGAGGTCCTTAATTAAAAG \n+\n+a score=33380.0\n+s hg18.chr1      4393 135 + 247249719 GTGTGGCCTCAAGCCAGCCTTCCGCTCCTTGAAGCTGGTCTCCACACAGTGCTGGTTCCGTCACCCCCTCCCAAGGAAGTAGGTCTGAGCAGCTTGTCCTGGCTGTGTCCATGTCAGAGCAACGGCCCAAGTCTG \n+s panTro2.chr15 17634 135 - 100063422 GTGTGGCCTCAAGCCAGCCTTCCGCTCCTTGAAGCTGGTCTCCACACAGTGCTGGTTCCGTCACCCCCTCCCAGGGAAGCAGGTCTCAGCAGCTTGTCCTGGCTGTGTCCGTGTCAGAGCAACGGCCCAAGTCTG \n+\n+a score=364526.0\n+s hg18.chr1      4528 1436 + 247249719 GGTCTGGGGGGGAAGGTGTCATGGAGCCCCCTACGATTCCCAGTCGTCCTCGTCCTCCTCTGCCTGTGGCTGCTGCGGTGGCGGCAGAGGAGGGATGGAGTCTGACACGCGGGCAAAGGCTCCTCCGGGCCCCTCACCAGCCCCAGGTCCTTTCCCAGAGATGCCTGGAGGGAAAAGGCTGAGTGAGGGTGGTTGGTGGGAAACCCTGGTTCCCCCAGCCCCCGGAGACTTAAATACAGGAAGAAAAAGGCAGGACAGAATTACAAGGTGCTGGCCCAGGGCGGGCAGCGGCCCTGCCTCCTACCCTTGCGCCTCATGACCAGCTTGTTGAAGAGATCCGACATCAAGTGCCCACCTTGGCTCGTGGCTCTCACTGCAACGGGAAAGCCACAGACTGGGGTGAAGAGTTCAGTCACATGCGACCGGTGACTCCCTGTCCCCACCCCCATGACACTCCCCAGCCCTCCAAGGCCACTGTGTTTCCCAGTTAGCTCAGAGCCTCAGTCGATCCCTGACCCAGCACCGGGCACTGATGAGACAGCGGCTGTTTGA'..b'ttctctcctctgtcttttcccaccaagtgaggatgcgaagagaaggtggctgtctgcaaaccaggaagagagccctcaccgggaacccgtccagctgccaccttgaacttggacttccaagcctccagaactgtgagggataaatgtatgattttaaagtcgcccagtgtgtggtattttgttttgactaatacaaCCTGAAAACATTTTCCCCTCACTCCACCTGAGCAATATCTGAGTGGCTTAAGGTACTCAGGACACAACAAAGGAGAAATGTCCCATGCACAAGGTGCACCCATGCCTGGGTAAAGCAGCCTGGCACAGAGGGAAGCACACAGGCTCAGggatctgctattcattctttgtgtgaccctgggcaagccatgaatggagcttcagtcaccccatttgtaatgggatttaattgtgcttgccctgcctccttttgagggctgtagagaaaagatgtcaaagtattttgtaATCTggctgggcgtggtggctcatgcctgtaatcctagcactttggtaggctgacgcgagaggactgcttgagcccaagagtttgagatcagcctgggcaatattgtgagattccatctctacaaaaataaaataaaatagccagtcatggtgtcacacacctgtagtcccagctacatgggaggctgaggcgggaggatcacttgagcttgggagatcgaggctgcagtgagctatgattgtaccactgcactccaggctgggcgacagagagagaccctgtctcagaaaaaaaaaaaaaaGTACTTTGTAATCTGTAAGGTTTATTTCAACACACACAAAAAAAGTGTATATGCTCCACGATGCCTGTGAATATACACACACACCACATCATATACCAAGCCTGGCTGTGTCTTCTCACAAATGCACTGCTAGGCACCACCC \n+s panTro2.chrUn 57103829 1814 -  58616431 tgtgttcaatgggtagagtttcaggctggggtgatggaagggtgctggaaatgagtggtagcgatggcggcgcaacagtgtgaatctacttaatcccactgaactgtatgcttaaaaatggtttagacggtgaattttaggttatgtatgttttaccacaatttttaaaaaGCTAGTGAAAAGCTGGTAAAAAGAAAGAAAAGAGGCTTTTTTAAAAAGTTAAATATATAAAAAGAGCATCATCAGTCCAAAGTCCAGCAGTTGTCCCTCCTGGAATCCGTTGGCTTGCCTCCGGCATTTTTGGCCCTTGCCTTTtagggttgccagattaaaatacaggatgcccagttagtttgaattttagataaacaacgaataatttcgtagcataaatatgtcccaagcttagtttgggacatacttatgctaaaaaacattattggttgtttatctgagattcaaaattaagcattttatattttatttgctgcctctggccaccctaCTCTCTTCCTGACACTCTCTCCCTCTCCCAGTTTTGTCCGCCTTCCTTGCCTCCTCTTCTGGGGGAGTTAGATCGAGTTGTAACAAGAACATGCCACTGTCTCACTGGCTGCAGCGTGTGGTCCCCTTACCAGAGGTAAGGAAGAGATGGATCTCCACTCAtgttgtagacagaatgtttatgtcctctccaaattcttatgttgaaaccctaacccctaatgtgatggtatgtggagatgggcctttggtaggtaattacggttagatgaggtcatggggtggggccctcattatagatctggtaagaaaagagagcattgtctctctgtctccctctc-----------------------tctctctatgtcatttctctctct----ctatctca--tttctctctctctctctctctcctctgtcttttcccaccaagtgaggatgcgaagagaaggtggctgtctgcaaaccaggaagagagccctcaccaggaacccgtccagctgccaccttgaacttggacttccaagcctccagaactgtgagggataaatgtatgattttaaagtcgcccagtgtgtggtattttgttttgactaatacaaCCTGAAAACATTTTCCCCTCACTCCACCTGAGGAATATCTGAGTGGCTTAATGTACTCAGGACACAACAA---AGAAATGTCCCATGCACAAGGTGCACCCATGCCtgggtaaagcagcctggcacagagggaagcacacaggctcagggctctgctattcattctttgtgtgaccctgggcaagccatgaatggagcttcagtcaccccatttgtaatgggatttaattgagcttgccctgcctccttctgagggctgtagagaaaagatgtcaaagtattttgtaatctggctgggcgtggtggctcatgcctgtaatcccagcactttggtaggctgacgcgagaggactgcttgagcccaagagtttgagatcagcctgggcaatattgtgagattccatctctacaaaaataaaataaaatagccagtcatggtgtcacacacctgtagtcccagctacatgggaggctgaggcgggaggatcacttgagcttgggagatcgaggctgcagtgagctatgattgtaccactgcactccaggctgggcgacagagagagaccctgtctcag--aaaaaagaaaaagtactttgtaatctgtaaggtttatttcaacacACACAAAAAAAGTGTATATGCTCCACGATGCCTGTGAATATACACACACACCACATCATATACCAAGCCTGGCTGTGTCTTCTCACAAATGCGCTGCTGGGCACCACCC \n+\n+a score=97796.0\n+s hg18.chr1        21738 409 + 247249719 CCAGTTCTAGAATCACACCAGCCAGTTCACCCTCCAGATGGTTCACCCTCAACTTCATAAAAGTTCCCTACCTAATCTACT---GACAGGCTCATCCCCGACCTAATTTTAAAGATTTCCTAGGA----------------------------------GCTGCAGTGGGAATCCTGGACCTCAGCCTGGACAAAGAACAGCTGCAGGTCATTCTCATGTGTGGACACAGAAGCTCTGCCTGCCTTTGCTGGCCAGCTGGGCTGAGCGGGCCTGGGAA-TTAAGGCTGCAGGGTTGGTCCCAGGCAGTCTTGCTGAAGCTTGCCACATCCCCCAGCCTCCTGGATTTGCCAGGATCCAAGAGCATGGACTTTAGGAATTCCTGGTGGAGGAGTGAAGAAAATGTGACAGGGTGTCCTAAGCCCCGATCTACAGGAAGAAAACTGGAA \n+s panTro2.chrUn 57107006 446 -  58616431 CCAGTTCTAGAACCAGACCAGACAGTTCACCCTCCAGACGGTTCACCCTCAACTGTATAAAAGTTCTCTACCTAATCTACTACTGACAGACTCATCTCAGACCTAATTTTAAAGATTTCCTAGGAACTGAATTGCAAAAACTTTTACCCTCACCCTAGAGCTGCAATGGGAATCCTG-ACCTCAGCGTAGACAAAGAAAAGCTGCAGGTCATTCTCATGTGTGGACACGGAAGCCCTGCCTGCCTTTGCTGGCCAGCTGGGCTGAGTGGGCCTGGGAAATTAAGGCTGCAGGGTTGGTCCCAGGCAGTCTTGCTGAAGCTTGCCACATCCCCCAGCCTCCTGGATTTGCCAGGATCCAAGAGCATGGACTTTAGGAATTCCTGGTGGAGGAGTGAAGAAAATGTGACAGGGTGTCCTAGGCCCCAATCTACAGGAAGAAAACTGGAA \n+\n+a score=7233.0\n+s hg18.chr1        22147 35 + 247249719 ATAAGACTGAGGACTTAGTTTAAGATGTTCCTACT \n+s panTro2.chrUn 57107452 35 -  58616431 ATAAGACTGAGGACTTAGTTTAAGATGTTCCTACT \n+\n'
b
diff -r 000000000000 -r 8d65bbc52dfe test-data/6_quality_filter.maf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/6_quality_filter.maf Tue Apr 01 10:52:42 2014 -0400
b
b'@@ -0,0 +1,57 @@\n+##maf version=1\n+a score=356676.0\n+s hg18.chr1      2817 1438 + 247249719  \n+s panTro2.chr15 16051 1440 - 100063422 GTCATCCCCTTCACTCCCAGCTCTGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAAGCCTGGCTGCCTCCAGCTGGGTGGACAGACAGGCGCTGGAGAAGGGGAGAAGAGGAAAGGGGGGCTGCCTGCCCTGTCTCCCACCTGAGGCTGAGGAAGGAGAGGGGGATGCACTGTTGGGGAGGCAGCTGTAACTCAAAGCCCTAGCCTCTGTTCCCACGAAGGCAGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAATGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACAAGCTGTTGGCCTGGATCTGAGCCCTCGTGGAGCTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAAGTTCAGTCCTGCCTTTTCCTTTCCCGAGAGGCTCCACCACCCCGAGATCGCACTTCTCACTGCCTTTTGTCTGCCCAGTTTCACCAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGCGCCCTTCCTTTGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGAATCCCAAAGAAATGGTGAGTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCCTCTGTGGAATCCAATCTGTCTTCCATCCTGCGTGGCAGAGGGCCAGGCTTCTCACTGGGCCTCTGCAGGAGGCTGCCATTTCTGTCCTGCCCACCTTCTAAGAAGCGAGACGGAGGACACCCATCTGCTTCTGCCCTTTCCATAATAACTAAAGTTAGCTGCCCTGGACTATTCACCCCCTAGTCTCTATTTAAAAAGATCCCCATGGCCACAGGGCCCCTGCCTGGGGGCTTGTCACCTCCCCCACCTTCTTCCTGAGTCACTTCTGCAGCCTTGCTCCCTAACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCTCTCAACTACTTGAGCAAACTTGAAGACATCTTCAACCCCAACACCAGCAATTGTGCCAAGGGCCATTAGGCTCTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTTTTTCGTGGGAGACTATTCCTCCCATCTGCAACAGCTGCCCCTGCTGACTGCCCTTCTCTCCTCCCTCTCATCCCAGAGAAACAGGTCAGCTGGGAGCTTCTGCCCCCACTGCCTAGGCACCAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCATCCTGCACAGCTAGAGGTCCTTAATTAAAAG \n+\n+a score=33380.0\n+s hg18.chr1      4393 135 + 247249719 ####################################################################################################################################### \n+s panTro2.chr15 17634 135 - 100063422 GTGTGGCCTCAAGCCAGCCTTCCGCTCCTTGAAGCTGGTCTCCACACAGTGCTGGTTCCGTCACCCCCTCCCAGGGAAGCAGGTCTCAGCAGCTTGTCCTGGCTGTGTCCGTGTCAGAGCAACGGCCCAAGTCTG \n+\n+a score=364526.0\n+s hg18.chr1      4528 1436 + 247249719 b'ttctctcctctgtcttttc#cacca#gtgaggatgcgaagagaaggtggctgtctgc#a##c##gaagagag#c#tcac#ggg#ac##gtc###############a###g#######################g#g#ga###a###########a#######cca#t##################################################################################################AGG###################G############################################C####G################################c#t#############################################################################################################################################################################################################################################################################################################################################################################################################aa###########################################################TATATGC##############################CA##############################CAC#######CTGC#A##CACCACCC \n+s panTro2.chrUn 57103829 1814 -  58616431 tgtgttcaatgggtagagtttcaggctggggtgatggaagggtgctggaaatgagtggtagcgatggcggcgcaacagtgtgaatctacttaatcccactgaactgtatgcttaaaaatggtttagacggtgaattttaggttatgtatgttttaccacaatttttaaaaaGCTAGTGAAAAGCTGGTAAAAAGAAAGAAAAGAGGCTTTTTTAAAAAGTTAAATATATAAAAAGAGCATCATCAGTCCAAAGTCCAGCAGTTGTCCCTCCTGGAATCCGTTGGCTTGCCTCCGGCATTTTTGGCCCTTGCCTTTtagggttgccagattaaaatacaggatgcccagttagtttgaattttagataaacaacgaataatttcgtagcataaatatgtcccaagcttagtttgggacatacttatgctaaaaaacattattggttgtttatctgagattcaaaattaagcattttatattttatttgctgcctctggccaccctaCTCTCTTCCTGACACTCTCTCCCTCTCCCAGTTTTGTCCGCCTTCCTTGCCTCCTCTTCTGGGGGAGTTAGATCGAGTTGTAACAAGAACATGCCACTGTCTCACTGGCTGCAGCGTGTGGTCCCCTTACCAGAGGTAAGGAAGAGATGGATCTCCACTCAtgttgtagacagaatgtttatgtcctctccaaattcttatgttgaaaccctaacccctaatgtgatggtatgtggagatgggcctttggtaggtaattacggttagatgaggtcatggggtggggccctcattatagatctggtaagaaaagagagcattgtctctctgtctccctctc-----------------------tctctctatgtcatttctctctct----ctatctca--tttctctctctctctctctctcctctgtcttttcccaccaagtgaggatgcgaagagaaggtggctgtctgcaaaccaggaagagagccctcaccaggaacccgtccagctgccaccttgaacttggacttccaagcctccagaactgtgagggataaatgtatgattttaaagtcgcccagtgtgtggtattttgttttgactaatacaaCCTGAAAACATTTTCCCCTCACTCCACCTGAGGAATATCTGAGTGGCTTAATGTACTCAGGACACAACAA---AGAAATGTCCCATGCACAAGGTGCACCCATGCCtgggtaaagcagcctggcacagagggaagcacacaggctcagggctctgctattcattctttgtgtgaccctgggcaagccatgaatggagcttcagtcaccccatttgtaatgggatttaattgagcttgccctgcctccttctgagggctgtagagaaaagatgtcaaagtattttgtaatctggctgggcgtggtggctcatgcctgtaatcccagcactttggtaggctgacgcgagaggactgcttgagcccaagagtttgagatcagcctgggcaatattgtgagattccatctctacaaaaataaaataaaatagccagtcatggtgtcacacacctgtagtcccagctacatgggaggctgaggcgggaggatcacttgagcttgggagatcgaggctgcagtgagctatgattgtaccactgcactccaggctgggcgacagagagagaccctgtctcag--aaaaaagaaaaagtactttgtaatctgtaaggtttatttcaacacACACAAAAAAAGTGTATATGCTCCACGATGCCTGTGAATATACACACACACCACATCATATACCAAGCCTGGCTGTGTCTTCTCACAAATGCGCTGCTGGGCACCACCC \n+\n+a score=97796.0\n+s hg18.chr1        21738 409 + 247249719 #################CCA##CA###########AGA###########################################---################ACC#AATTT################----------------------------------##################G#########################G#A######TCT#####G#################C#######TGC#######CTGGGCTGA###########AA-TTAAGGC###AGGGTTGG#########G############TT###ACATCCCCCA#CCTCCTG##TTTGCCA#####CAAGAGCA#G#A#TTTA#########G#####G####GAAGA##AT#T##CAGGGTGTCCTA#GCCCCGATCTACAGGAAGAAAACT#GA# \n+s panTro2.chrUn 57107006 446 -  58616431 CCAGTTCTAGAACCAGACCAGACAGTTCACCCTCCAGACGGTTCACCCTCAACTGTATAAAAGTTCTCTACCTAATCTACTACTGACAGACTCATCTCAGACCTAATTTTAAAGATTTCCTAGGAACTGAATTGCAAAAACTTTTACCCTCACCCTAGAGCTGCAATGGGAATCCTG-ACCTCAGCGTAGACAAAGAAAAGCTGCAGGTCATTCTCATGTGTGGACACGGAAGCCCTGCCTGCCTTTGCTGGCCAGCTGGGCTGAGTGGGCCTGGGAAATTAAGGCTGCAGGGTTGGTCCCAGGCAGTCTTGCTGAAGCTTGCCACATCCCCCAGCCTCCTGGATTTGCCAGGATCCAAGAGCATGGACTTTAGGAATTCCTGGTGGAGGAGTGAAGAAAATGTGACAGGGTGTCCTAGGCCCCAATCTACAGGAAGAAAACTGGAA \n+\n+a score=7233.0\n+s hg18.chr1        22147 35 + 247249719 #####ACTG#GG##T##GTTTAAGATGTTCCTACT \n+s panTro2.chrUn 57107452 35 -  58616431 ATAAGACTGAGGACTTAGTTTAAGATGTTCCTACT \n+\n'
b
diff -r 000000000000 -r 8d65bbc52dfe tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Apr 01 10:52:42 2014 -0400
b
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="bx-python" version="0.7.1">
+      <repository changeset_revision="41eb9d9f667d" name="package_bx_python_0_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="numpy" version="1.7.1">
+      <repository changeset_revision="0c288abd2a1e" name="package_numpy_1_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>