Mercurial > repos > xuebing > sharplabtool

--- a/fimo2.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-<tool id="fimo" name="motif search">
-  <description>using FIMO</description>
-  <command> fimo
-    #if $background_select.bg_select == "fromfile":
-    -bgfile $bgfile
-    #end if
-
-  $norc --max-stored-scores 5000000 --output-pthresh $pth --verbosity 1 $motif $database
-  &amp;&amp; mv fimo_out/fimo.html ${html_outfile}
-
-  &amp;&amp; mv fimo_out/fimo.txt ${txt_outfile}
-
-  &amp;&amp; rm -rf fimo_out
-
-  </command>
-  <inputs>
-
-            <param name="motif" type="data" format="txt" label="Motif file" help="created using the tool create-motif-file, or import from Shared Data"/>
-    <param name="database" type="data" format="fasta" label="Sequence file (FASTA)"/>
-
-    <conditional name="background_select">
-    	<param name="bg_select" type="select" label="Background model" >
-		  <option value="uniform" selected="true">uniform</option>
-		  <option value="fromfile">load from file</option>
-	    </param>
-	    <when value="fromfile">
-		    <param name="bgfile" type="data" format="txt" label="File for background model"/>
-	    </when>
-    </conditional>
-
-      <param name="pth" size="10" type="float" value="0.0001" label="p-value threshold"/>
-    <param name="norc" label="Do not score the reverse complement DNA strand. Both strands are scored by default" type="boolean" truevalue="-norc" falsevalue="" checked="False"/>
-  </inputs>
-  <outputs>
-    <data format="html" name="html_outfile" label="${tool.name} on ${on_string} (html)"/>
-    <data format="txt" name="txt_outfile" label="${tool.name} on ${on_string} (txt)"/>
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool uses FIMO to find matches of a motif in a fasta file. See more details:
-
-http://meme.sdsc.edu/meme/fimo-intro.html
-
-  </help>
-</tool>
Binary file mytools.zip has changed
Binary file tools/.DS_Store has changed
Binary file tools/._.DS_Store has changed
Binary file tools/._mytools has changed
Binary file tools/._tool_conf.xml has changed
--- a/tools/annotation_profiler/annotation_profiler.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-<tool id="Annotation_Profiler_0" name="Profile Annotations" version="1.0.0">
-  <description>for a set of genomic intervals</description>
-  <command interpreter="python">annotation_profiler_for_interval.py -i $input1 -c ${input1.metadata.chromCol} -s ${input1.metadata.startCol} -e ${input1.metadata.endCol} -o $out_file1 $keep_empty -p ${GALAXY_DATA_INDEX_DIR}/annotation_profiler/$dbkey $summary -b 3 -t $table_names</command>
-  <inputs>
-    <param format="interval" name="input1" type="data" label="Choose Intervals">
-      <validator type="dataset_metadata_in_file" filename="annotation_profiler_valid_builds.txt" metadata_name="dbkey" metadata_column="0" message="Profiling is not currently available for this species."/>
-    </param>
-    <param name="keep_empty" type="select" label="Keep Region/Table Pairs with 0 Coverage">
-      <option value="-k">Keep</option>
-      <option value="" selected="true">Discard</option>
-    </param>
-    <param name="summary" type="select" label="Output per Region/Summary">
-      <option value="-S">Summary</option>
-      <option value="" selected="true">Per Region</option>
-    </param>
-    <param name="table_names" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true" label="Choose Tables to Use" help="Selecting no tables will result in using all tables." from_file="annotation_profiler_options.xml"/>
-   </inputs>
-   <outputs>
-     <data format="input" name="out_file1">
-       <change_format>
-         <when input="summary" value="-S" format="tabular" />
-       </change_format>
-     </data>
-   </outputs>
-   <tests>
-     <test>
-       <param name="input1" value="4.bed" dbkey="hg18"/>
-       <param name="keep_empty" value=""/>
-       <param name="summary" value=""/>
-       <param name="table_names" value="acembly,affyGnf1h,knownAlt,knownGene,mrna,multiz17way,multiz28way,refGene,snp126"/>
-       <output name="out_file1" file="annotation_profiler_1.out" />
-     </test>
-     <test>
-       <param name="input1" value="3.bed" dbkey="hg18"/>
-       <param name="keep_empty" value=""/>
-       <param name="summary" value="Summary"/>
-       <param name="table_names" value="acembly,affyGnf1h,knownAlt,knownGene,mrna,multiz17way,multiz28way,refGene,snp126"/>
-       <output name="out_file1" file="annotation_profiler_2.out" />
-     </test>
-   </tests>
-   <help>
-**What it does**
-
-Takes an input set of intervals and for each interval determines the base coverage of the interval by a set of features (tables) available from UCSC. Genomic regions from the input feature data have been merged by overlap / direct adjacency (e.g. a table having ranges of: 1-10, 6-12, 12-20 and 25-28 results in two merged ranges of: 1-20 and 25-28).
-
-By default, this tool will check the coverage of your intervals against all available features; you may, however, choose to select only those tables that you want to include. Selecting a section heading will effectively cause all of its children to be selected.
-
-You may alternatively choose to receive a summary across all of the intervals that you provide.
-
------
-
-**Example**
-
-Using the interval below and selecting several tables::
-
- chr1 4558 14764 uc001aab.1 0 -
-
-results in::
-
- chr1 4558 14764 uc001aab.1 0 - snp126Exceptions 151 142
- chr1 4558 14764 uc001aab.1 0 - genomicSuperDups 10206 1
- chr1 4558 14764 uc001aab.1 0 - chainOryLat1 3718 1
- chr1 4558 14764 uc001aab.1 0 - multiz28way 10206 1
- chr1 4558 14764 uc001aab.1 0 - affyHuEx1 3553 32
- chr1 4558 14764 uc001aab.1 0 - netXenTro2 3050 1
- chr1 4558 14764 uc001aab.1 0 - intronEst 10206 1
- chr1 4558 14764 uc001aab.1 0 - xenoMrna 10203 1
- chr1 4558 14764 uc001aab.1 0 - ctgPos 10206 1
- chr1 4558 14764 uc001aab.1 0 - clonePos 10206 1
- chr1 4558 14764 uc001aab.1 0 - chainStrPur2Link 1323 29
- chr1 4558 14764 uc001aab.1 0 - affyTxnPhase3HeLaNuclear 9011 8
- chr1 4558 14764 uc001aab.1 0 - snp126orthoPanTro2RheMac2 61 58
- chr1 4558 14764 uc001aab.1 0 - snp126 205 192
- chr1 4558 14764 uc001aab.1 0 - chainEquCab1 10206 1
- chr1 4558 14764 uc001aab.1 0 - netGalGal3 3686 1
- chr1 4558 14764 uc001aab.1 0 - phastCons28wayPlacMammal 10172 3
-
-Where::
-
- The first added column is the table name.
- The second added column is the number of bases covered by the table.
- The third added column is the number of regions from the table that is covered by the interval.
-
-Alternatively, requesting a summary, using the intervals below and selecting several tables::
-
- chr1 4558 14764 uc001aab.1 0 -
- chr1 4558 19346 uc001aac.1 0 -
-
-results in::
-
- #tableName tableSize tableRegionCount allIntervalCount allIntervalSize allCoverage allTableRegionsOverlaped allIntervalsOverlapingTable nrIntervalCount nrIntervalSize nrCoverage nrTableRegionsOverlaped nrIntervalsOverlapingTable
- snp126Exceptions 133601 92469 2 24994 388 359 2 1 14788 237 217 1
- genomicSuperDups 12268847 657 2 24994 24994 2 2 1 14788 14788 1 1
- chainOryLat1 70337730 2542 2 24994 7436 2 2 1 14788 3718 1 1
- affyHuEx1 15703901 112274 2 24994 7846 70 2 1 14788 4293 38 1
- netXenTro2 111440392 1877 2 24994 6100 2 2 1 14788 3050 1 1
- snp126orthoPanTro2RheMac2 700436 690674 2 24994 124 118 2 1 14788 63 60 1
- intronEst 135796064 2332 2 24994 24994 2 2 1 14788 14788 1 1
- xenoMrna 129031327 1586 2 24994 20406 2 2 1 14788 10203 1 1
- snp126 956976 838091 2 24994 498 461 2 1 14788 293 269 1
- clonePos 224999719 39 2 24994 24994 2 2 1 14788 14788 1 1
- chainStrPur2Link 7948016 119841 2 24994 2646 58 2 1 14788 1323 29 1
- affyTxnPhase3HeLaNuclear 136797870 140244 2 24994 22601 17 2 1 14788 13590 9 1
- multiz28way 225928588 38 2 24994 24994 2 2 1 14788 14788 1 1
- ctgPos 224999719 39 2 24994 24994 2 2 1 14788 14788 1 1
- chainEquCab1 246306414 141 2 24994 24994 2 2 1 14788 14788 1 1
- netGalGal3 203351973 461 2 24994 7372 2 2 1 14788 3686 1 1
- phastCons28wayPlacMammal 221017670 22803 2 24994 24926 6 2 1 14788 14754 3 1
-
-Where::
-
- tableName is the name of the table
- tableChromosomeCoverage is the number of positions existing in the table for only the chromosomes that were referenced by the interval file
- tableChromosomeCount is the number of regions existing in the table for only the chromosomes that were referenced by the interval file
- tableRegionCoverage is the number of positions existing in the table between the minimal and maximal bounding regions that were referenced by the interval file
- tableRegionCount is the number of regions existing in the table between the minimal and maximal bounding regions that were referenced by the interval file
-
- allIntervalCount is the number of provided intervals
- allIntervalSize is the sum of the lengths of the provided interval file
- allCoverage is the sum of the coverage for each provided interval
- allTableRegionsOverlapped is the sum of the number of regions of the table (non-unique) that were overlapped for each interval
- allIntervalsOverlappingTable is the number of provided intervals which overlap the table
-
- nrIntervalCount is the number of non-redundant intervals
- nrIntervalSize is the sum of the lengths of non-redundant intervals
- nrCoverage is the sum of the coverage of non-redundant intervals
- nrTableRegionsOverlapped is the number of regions of the table (unique) that were overlapped by the non-redundant intervals
- nrIntervalsOverlappingTable is the number of non-redundant intervals which overlap the table
-
-
-.. class:: infomark
-
-**TIP:** non-redundant (nr) refers to the set of intervals that remains after the intervals provided have been merged to resolve overlaps
-
-  </help>
-</tool>
--- a/tools/annotation_profiler/annotation_profiler_for_interval.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,360 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-#For a set of intervals, this tool returns the same set of intervals
-#with 2 additional fields: the name of a Table/Feature and the number of
-#bases covered. The original intervals are repeated for each Table/Feature.
-
-import sys, struct, optparse, os, random
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.intervals.io
-import bx.bitset
-try:
-    import psyco
-    psyco.full()
-except:
-    pass
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-class CachedRangesInFile:
-    DEFAULT_STRUCT_FORMAT = '<I'
-    def __init__( self, filename, profiler_info ):
-        self.file_size = os.stat( filename ).st_size
-        self.file = open( filename, 'rb' )
-        self.filename = filename
-        self.fmt = profiler_info.get( 'profiler_struct_format', self.DEFAULT_STRUCT_FORMAT )
-        self.fmt_size = int( profiler_info.get( 'profiler_struct_size', struct.calcsize( self.fmt ) ) )
-        self.length = int( self.file_size / self.fmt_size / 2 )
-        self._cached_ranges = [ None for i in xrange( self.length ) ]
-    def __getitem__( self, i ):
-        if self._cached_ranges[i] is not None:
-            return self._cached_ranges[i]
-        if i < 0: i = self.length + i
-        offset = i * self.fmt_size * 2
-        self.file.seek( offset )
-        try:
-            start = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
-            end = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
-        except Exception, e:
-            raise IndexError, e
-        self._cached_ranges[i] = ( start, end )
-        return start, end
-    def __len__( self ):
-        return self.length
-
-class RegionCoverage:
-    def __init__( self, filename_base, profiler_info ):
-        try:
-            self._coverage = CachedRangesInFile( "%s.covered" % filename_base, profiler_info )
-        except Exception, e:
-            #print "Error loading coverage file %s: %s" % ( "%s.covered" % filename_base, e )
-            self._coverage = []
-        try:
-            self._total_coverage = int( open( "%s.total_coverage" % filename_base ).read() )
-        except Exception, e:
-            #print "Error loading total coverage file %s: %s" % ( "%s.total_coverage" % filename_base, e )
-            self._total_coverage = 0
-    def get_start_index( self, start ):
-        #binary search: returns index of range closest to start
-        if start > self._coverage[-1][1]:
-            return len( self._coverage ) - 1
-        i = 0
-        j = len( self._coverage) - 1
-        while i < j:
-            k = ( i + j ) / 2
-            if start <= self._coverage[k][1]:
-                j = k
-            else:
-                i = k + 1
-        return i
-    def get_coverage( self, start, end ):
-        return self.get_coverage_regions_overlap( start, end )[0]
-    def get_coverage_regions_overlap( self, start, end ):
-        return self.get_coverage_regions_index_overlap( start, end )[0:2]
-    def get_coverage_regions_index_overlap( self, start, end ):
-        if len( self._coverage ) < 1 or start > self._coverage[-1][1] or end < self._coverage[0][0]:
-            return 0, 0, 0
-        if self._total_coverage and start <= self._coverage[0][0] and end >= self._coverage[-1][1]:
-            return self._total_coverage, len( self._coverage ), 0
-        coverage = 0
-        region_count = 0
-        start_index = self.get_start_index( start )
-        for i in xrange( start_index, len( self._coverage ) ):
-            c_start, c_end = self._coverage[i]
-            if c_start > end:
-                break
-            if c_start <= end and c_end >= start:
-                coverage += min( end, c_end ) - max( start, c_start )
-                region_count += 1
-        return coverage, region_count, start_index
-
-class CachedCoverageReader:
-    def __init__( self, base_file_path, buffer = 10, table_names = None, profiler_info = None ):
-        self._base_file_path = base_file_path
-        self._buffer = buffer #number of chromosomes to keep in memory at a time
-        self._coverage = {}
-        if table_names is None: table_names = [ table_dir for table_dir in os.listdir( self._base_file_path ) if os.path.isdir( os.path.join( self._base_file_path, table_dir ) ) ]
-        for tablename in table_names: self._coverage[tablename] = {}
-        if profiler_info is None: profiler_info = {}
-        self._profiler_info = profiler_info
-    def iter_table_coverage_by_region( self, chrom, start, end ):
-        for tablename, coverage, regions in self.iter_table_coverage_regions_by_region( chrom, start, end ):
-            yield tablename, coverage
-    def iter_table_coverage_regions_by_region( self, chrom, start, end ):
-        for tablename, coverage, regions, index in self.iter_table_coverage_regions_index_by_region( chrom, start, end ):
-            yield tablename, coverage, regions
-    def iter_table_coverage_regions_index_by_region( self, chrom, start, end ):
-        for tablename, chromosomes in self._coverage.iteritems():
-            if chrom not in chromosomes:
-                if len( chromosomes ) >= self._buffer:
-                    #randomly remove one chromosome from this table
-                    del chromosomes[ chromosomes.keys().pop( random.randint( 0, self._buffer - 1 ) ) ]
-                chromosomes[chrom] = RegionCoverage( os.path.join ( self._base_file_path, tablename, chrom ), self._profiler_info )
-            coverage, regions, index = chromosomes[chrom].get_coverage_regions_index_overlap( start, end )
-            yield tablename, coverage, regions, index
-
-class TableCoverageSummary:
-    def __init__( self, coverage_reader, chrom_lengths ):
-        self.coverage_reader = coverage_reader
-        self.chrom_lengths = chrom_lengths
-        self.chromosome_coverage = {} #dict of bitset by chromosome holding user's collapsed input intervals
-        self.total_interval_size = 0 #total size of user's input intervals
-        self.total_interval_count = 0 #total number of user's input intervals
-        self.table_coverage = {} #dict of total coverage by user's input intervals by table
-        self.table_chromosome_size = {} #dict of dict of table:chrom containing total coverage of table for a chrom
-        self.table_chromosome_count = {} #dict of dict of table:chrom containing total number of coverage ranges of table for a chrom
-        self.table_regions_overlaped_count = {} #total number of table regions overlaping user's input intervals (non unique)
-        self.interval_table_overlap_count = {} #total number of user input intervals which overlap table
-        self.region_size_errors = {} #dictionary of lists of invalid ranges by chromosome
-    def add_region( self, chrom, start, end ):
-        chrom_length = self.chrom_lengths.get( chrom )
-        region_start = min( start, chrom_length )
-        region_end = min( end, chrom_length )
-        region_length = region_end - region_start
-
-        if region_length < 1 or region_start != start or region_end != end:
-            if chrom not in self.region_size_errors:
-                self.region_size_errors[chrom] = []
-            self.region_size_errors[chrom].append( ( start, end ) )
-            if region_length < 1: return
-
-        self.total_interval_size += region_length
-        self.total_interval_count += 1
-        if chrom not in self.chromosome_coverage:
-            self.chromosome_coverage[chrom] = bx.bitset.BitSet( chrom_length )
-
-        self.chromosome_coverage[chrom].set_range( region_start, region_length )
-        for table_name, coverage, regions in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, region_start, region_end ):
-            if table_name not in self.table_coverage:
-                self.table_coverage[table_name] = 0
-                self.table_chromosome_size[table_name] = {}
-                self.table_regions_overlaped_count[table_name] = 0
-                self.interval_table_overlap_count[table_name] = 0
-                self.table_chromosome_count[table_name] = {}
-            if chrom not in self.table_chromosome_size[table_name]:
-                self.table_chromosome_size[table_name][chrom] = self.coverage_reader._coverage[table_name][chrom]._total_coverage
-                self.table_chromosome_count[table_name][chrom] = len( self.coverage_reader._coverage[table_name][chrom]._coverage )
-            self.table_coverage[table_name] += coverage
-            if coverage:
-                self.interval_table_overlap_count[table_name] += 1
-            self.table_regions_overlaped_count[table_name] += regions
-    def iter_table_coverage( self ):
-        def get_nr_coverage():
-            #returns non-redundant coverage, where user's input intervals have been collapse to resolve overlaps
-            table_coverage = {} #dictionary of tables containing number of table bases overlaped by nr intervals
-            interval_table_overlap_count = {} #dictionary of tables containing number of nr intervals overlaping table
-            table_regions_overlap_count = {} #dictionary of tables containing number of regions overlaped (unique)
-            interval_count = 0 #total number of nr intervals
-            interval_size = 0 #holds total size of nr intervals
-            region_start_end = {} #holds absolute start,end for each user input chromosome
-            for chrom, chromosome_bitset in self.chromosome_coverage.iteritems():
-                #loop through user's collapsed input intervals
-                end = 0
-                last_end_index = {}
-                interval_size += chromosome_bitset.count_range()
-                while True:
-                    if end >= chromosome_bitset.size: break
-                    start = chromosome_bitset.next_set( end )
-                    if start >= chromosome_bitset.size: break
-                    end = chromosome_bitset.next_clear( start )
-                    interval_count += 1
-                    if chrom not in region_start_end:
-                        region_start_end[chrom] = [start, end]
-                    else:
-                        region_start_end[chrom][1] = end
-                    for table_name, coverage, region_count, start_index in self.coverage_reader.iter_table_coverage_regions_index_by_region( chrom, start, end ):
-                        if table_name not in table_coverage:
-                            table_coverage[table_name] = 0
-                            interval_table_overlap_count[table_name] = 0
-                            table_regions_overlap_count[table_name] = 0
-                        table_coverage[table_name] += coverage
-                        if coverage:
-                            interval_table_overlap_count[table_name] += 1
-                            table_regions_overlap_count[table_name] += region_count
-                            if table_name in last_end_index and last_end_index[table_name] == start_index:
-                                table_regions_overlap_count[table_name] -= 1
-                            last_end_index[table_name] = start_index + region_count - 1
-            table_region_coverage = {} #total coverage for tables by bounding nr interval region
-            table_region_count = {} #total number for tables by bounding nr interval region
-            for chrom, start_end in region_start_end.items():
-                for table_name, coverage, region_count in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, start_end[0], start_end[1] ):
-                    if table_name not in table_region_coverage:
-                        table_region_coverage[table_name] = 0
-                        table_region_count[table_name] = 0
-                    table_region_coverage[table_name] += coverage
-                    table_region_count[table_name] += region_count
-            return table_region_coverage, table_region_count, interval_count, interval_size, table_coverage, table_regions_overlap_count, interval_table_overlap_count
-        table_region_coverage, table_region_count, nr_interval_count, nr_interval_size, nr_table_coverage, nr_table_regions_overlap_count, nr_interval_table_overlap_count = get_nr_coverage()
-        for table_name in self.table_coverage:
-            #TODO: determine a type of statistic, then calculate and report here
-            yield table_name, sum( self.table_chromosome_size.get( table_name, {} ).values() ), sum( self.table_chromosome_count.get( table_name, {} ).values() ), table_region_coverage.get( table_name, 0 ), table_region_count.get( table_name, 0 ), self.total_interval_count, self.total_interval_size,  self.table_coverage[table_name], self.table_regions_overlaped_count.get( table_name, 0), self.interval_table_overlap_count.get( table_name, 0 ), nr_interval_count, nr_interval_size, nr_table_coverage[table_name], nr_table_regions_overlap_count.get( table_name, 0 ), nr_interval_table_overlap_count.get( table_name, 0 )
-
-def profile_per_interval( interval_filename, chrom_col, start_col, end_col, out_filename, keep_empty, coverage_reader ):
-    out = open( out_filename, 'wb' )
-    for region in bx.intervals.io.NiceReaderWrapper( open( interval_filename, 'rb' ), chrom_col = chrom_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ):
-        for table_name, coverage, region_count in coverage_reader.iter_table_coverage_regions_by_region( region.chrom, region.start, region.end ):
-            if keep_empty or coverage:
-                #only output regions that have atleast 1 base covered unless empty are requested
-                out.write( "%s\t%s\t%s\t%s\n" % ( "\t".join( region.fields ), table_name, coverage, region_count ) )
-    out.close()
-
-def profile_summary( interval_filename, chrom_col, start_col, end_col, out_filename, keep_empty, coverage_reader, chrom_lengths ):
-    out = open( out_filename, 'wb' )
-    table_coverage_summary = TableCoverageSummary( coverage_reader, chrom_lengths )
-    for region in bx.intervals.io.NiceReaderWrapper( open( interval_filename, 'rb' ), chrom_col = chrom_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ):
-        table_coverage_summary.add_region( region.chrom, region.start, region.end )
-
-    out.write( "#tableName\ttableChromosomeCoverage\ttableChromosomeCount\ttableRegionCoverage\ttableRegionCount\tallIntervalCount\tallIntervalSize\tallCoverage\tallTableRegionsOverlaped\tallIntervalsOverlapingTable\tnrIntervalCount\tnrIntervalSize\tnrCoverage\tnrTableRegionsOverlaped\tnrIntervalsOverlapingTable\n" )
-    for table_name, table_chromosome_size, table_chromosome_count, table_region_coverage, table_region_count, total_interval_count, total_interval_size, total_coverage, table_regions_overlaped_count, interval_region_overlap_count, nr_interval_count, nr_interval_size, nr_coverage, nr_table_regions_overlaped_count, nr_interval_table_overlap_count in table_coverage_summary.iter_table_coverage():
-        if keep_empty or total_coverage:
-            #only output tables that have atleast 1 base covered unless empty are requested
-            out.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( table_name, table_chromosome_size, table_chromosome_count, table_region_coverage, table_region_count, total_interval_count, total_interval_size, total_coverage, table_regions_overlaped_count, interval_region_overlap_count, nr_interval_count, nr_interval_size, nr_coverage, nr_table_regions_overlaped_count, nr_interval_table_overlap_count ) )
-    out.close()
-
-    #report chrom size errors as needed:
-    if table_coverage_summary.region_size_errors:
-        print "Regions provided extended beyond known chromosome lengths, and have been truncated as necessary, for the following intervals:"
-        for chrom, regions in table_coverage_summary.region_size_errors.items():
-            if len( regions ) > 3:
-                extra_region_info = ", ... "
-            else:
-                extra_region_info = ""
-            print "%s has max length of %s, exceeded by %s%s." % ( chrom, chrom_lengths.get( chrom ), ", ".join( map( str, regions[:3] ) ), extra_region_info )
-
-class ChromosomeLengths:
-    def __init__( self, profiler_info ):
-        self.chroms = {}
-        self.default_bitset_size = int( profiler_info.get( 'bitset_size', bx.bitset.MAX ) )
-        chroms = profiler_info.get( 'chromosomes', None )
-        if chroms:
-            for chrom in chroms.split( ',' ):
-                for fields in chrom.rsplit( '=', 1 ):
-                    if len( fields ) == 2:
-                        self.chroms[ fields[0] ] = int( fields[1] )
-                    else:
-                        self.chroms[ fields[0] ] = self.default_bitset_size
-    def get( self, name ):
-        return self.chroms.get( name, self.default_bitset_size )
-
-def parse_profiler_info( filename ):
-    profiler_info = {}
-    try:
-        for line in open( filename ):
-            fields = line.rstrip( '\n\r' ).split( '\t', 1 )
-            if len( fields ) == 2:
-                if fields[0] in profiler_info:
-                    if not isinstance( profiler_info[ fields[0] ], list ):
-                        profiler_info[ fields[0] ] = [ profiler_info[ fields[0] ] ]
-                    profiler_info[ fields[0] ].append( fields[1] )
-                else:
-                    profiler_info[ fields[0] ] = fields[1]
-    except:
-        pass #likely missing file
-    return profiler_info
-
-def __main__():
-    parser = optparse.OptionParser()
-    parser.add_option(
-        '-k','--keep_empty',
-        action="store_true",
-        dest='keep_empty',
-        default=False,
-        help='Keep tables with 0 coverage'
-    )
-    parser.add_option(
-        '-b','--buffer',
-        dest='buffer',
-        type='int',default=10,
-        help='Number of Chromosomes to keep buffered'
-    )
-    parser.add_option(
-        '-c','--chrom_col',
-        dest='chrom_col',
-        type='int',default=1,
-        help='Chromosome column'
-    )
-    parser.add_option(
-        '-s','--start_col',
-        dest='start_col',
-        type='int',default=2,
-        help='Start Column'
-    )
-    parser.add_option(
-        '-e','--end_col',
-        dest='end_col',
-        type='int',default=3,
-        help='End Column'
-    )
-    parser.add_option(
-        '-p','--path',
-        dest='path',
-        type='str',default='/galaxy/data/annotation_profiler/hg18',
-        help='Path to profiled data for this organism'
-    )
-    parser.add_option(
-        '-t','--table_names',
-        dest='table_names',
-        type='str',default='None',
-        help='Table names requested'
-    )
-    parser.add_option(
-        '-i','--input',
-        dest='interval_filename',
-        type='str',
-        help='Input Interval File'
-    )
-    parser.add_option(
-        '-o','--output',
-        dest='out_filename',
-        type='str',
-        help='Input Interval File'
-    )
-    parser.add_option(
-        '-S','--summary',
-        action="store_true",
-        dest='summary',
-        default=False,
-        help='Display Summary Results'
-    )
-
-    options, args = parser.parse_args()
-
-    assert os.path.isdir( options.path ), IOError( "Configuration error: Table directory is missing (%s)" % options.path )
-
-    #get profiler_info
-    profiler_info = parse_profiler_info( os.path.join( options.path, 'profiler_info.txt' ) )
-
-    table_names = options.table_names.split( "," )
-    if table_names == ['None']: table_names = None
-    coverage_reader = CachedCoverageReader( options.path, buffer = options.buffer, table_names = table_names, profiler_info = profiler_info )
-
-    if options.summary:
-        profile_summary( options.interval_filename, options.chrom_col - 1, options.start_col - 1, options.end_col -1, options.out_filename, options.keep_empty, coverage_reader, ChromosomeLengths( profiler_info ) )
-    else:
-        profile_per_interval( options.interval_filename, options.chrom_col - 1, options.start_col - 1, options.end_col -1, options.out_filename, options.keep_empty, coverage_reader )
-
-    #print out data version info
-    print 'Data version (%s:%s:%s)' % ( profiler_info.get( 'dbkey', 'unknown' ), profiler_info.get( 'profiler_hash', 'unknown' ), profiler_info.get( 'dump_time', 'unknown' ) )
-
-if __name__ == "__main__": __main__()
Binary file tools/bedtools/._bedToBam.xml has changed
--- a/tools/bedtools/bedToBam.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-<tool id="bedToBam" name="bedToBam">
-  <description>convert BED or GFF or VCF to BAM</description>
-  <command>bedToBam -i $input -g $genome $bed12 $mapq $ubam > $outfile </command>
-  <inputs>
-    <param name="input" format="bed,gff,vcf" type="data" label="Input file (BED,GFF,VCF)" help="BED files must be at least BED4 to be amenable to BAM (needs name field)"/>
-    <param name="genome" type="select" label="Select genome">
-     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.mm9.genome" selected="true">mm9</option>
-     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.mm8.genome">mm8</option>
-     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.hg18.genome">hg18</option>
-     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.hg19.genome">hg19</option>
-    </param>
-    <param name="mapq" size="10" type="integer" value="255" label="Set the mappinq quality for the BAM records"/>
-    <param name="bed12" label="The BED file is in BED12 format" help="The BAM CIGAR string will reflect BED blocks" type="boolean" truevalue="-bed12" falsevalue="" checked="False"/>
-    <param name="ubam" label="Write uncompressed BAM output" help="Default is to write compressed BAM" type="boolean" truevalue="-ubam" falsevalue="" checked="False"/>
-  </inputs>
-  <outputs>
-    <data format="bam" name="outfile" />
-  </outputs>
-  <help>
-
-**What it does**
-
-Program: bedToBam (v2.13.3)
-Author:  Aaron Quinlan (aaronquinlan@gmail.com)
-Summary: Converts feature records to BAM format.
-
-
-  </help>
-</tool>
--- a/tools/data_destination/epigraph.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Perform genome analysis" id="epigraph_export">
-    <description> and prediction with EpiGRAPH</description>
-    <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
-    <inputs>
-        <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
-            <validator type="unspecified_build" />
-        </param>
-        <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/DataImport.jsp" />
-        <param name="DATA_URL" type="baseurl" value="/datasets" />
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
-    </inputs>
-    <outputs/>
-    <help>
-
-.. class:: warningmark
-
-After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance.
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods.
-
------
-
-.. class:: infomark
-
-**EpiGRAPH outline**
-
-The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties.
-
-.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/
-
-    </help>
-</tool>
-
-
--- a/tools/data_destination/epigraph_test.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Perform genome analysis" id="epigraph_test_export">
-    <description> and prediction with EpiGRAPH Test</description>
-    <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
-    <inputs>
-        <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
-            <validator type="unspecified_build" />
-        </param>
-        <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" />
-        <param name="DATA_URL" type="baseurl" value="/datasets" />
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
-    </inputs>
-    <outputs/>
-    <help>
-
-.. class:: warningmark
-
-After clicking the **Execute** button, you will be redirected to the EpiGRAPH test website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance.
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods.
-
------
-
-.. class:: infomark
-
-**EpiGRAPH outline**
-
-The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties.
-
-.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/
-
-    </help>
-</tool>
-
--- a/tools/data_source/access_libraries.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Access Libraries" id="library_access1">
-    <description>stored locally</description>
-    <inputs action="/library/index" method="get" target="_parent">
-        <param name="default_action" type="hidden" value="import_to_histories" />
-    </inputs>
-    <uihints minwidth="800"/>
-</tool>
--- a/tools/data_source/bed_convert.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-<tool id="BED File Converter1" name="BED File Converter">
-  <description>creates a bed or xbed file containing from text query</description>
-  <command>noop</command>
-  <inputs>
-    <display>creates a bed or xbed file containing user assigned input of $input</display>
-    <param format="tabular" name="input" type="data" />
-    <param name="chrom" size="4" type="text" value="all" />
-  </inputs>
-  <outputs>
-    <data format="bed" name="out_file1" />
-  </outputs>
-  <help>User specifies delimiter, header information, and column assignments and the file will be converted to BED or xBED.
-</help>
-</tool>
\ No newline at end of file
--- a/tools/data_source/biomart.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
-
-    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
-    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
--->
-<tool name="BioMart" id="biomart" tool_type="data_source" version="1.0.1">
-	<description>Central server</description>
-	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-	<inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
-		<display>go to BioMart Central $GALAXY_URL</display>
-		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="_export" missing="1" />
-                <value name="GALAXY_URL" missing="0" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
-            <value_translation>
-                <value galaxy_value="tabular" remote_value="TSV" />
-            </value_translation>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
-        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="Biomart query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-    </request_param_translation>
-	<uihints minwidth="800"/>
-	<outputs>
-		<data name="output" format="tabular" />
-	</outputs>
-	<options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/biomart_test.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
-
-    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
-    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
--->
-<tool name="BioMart" id="biomart_test" tool_type="data_source" version="1.0.1">
-	<description>Test server</description>
-	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-	<inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
-		<display>go to BioMart Central $GALAXY_URL</display>
-		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
-	</inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="_export" missing="1" />
-                <value name="GALAXY_URL" missing="0" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
-            <value_translation>
-                <value galaxy_value="tabular" remote_value="TSV" />
-            </value_translation>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
-        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="Biomart test query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-    </request_param_translation>
-	<uihints minwidth="800"/>
-	<outputs>
-		<data name="output" format="tabular" />
-	</outputs>
-	<options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/bx_browser.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="BX main" id="bx_browser" tool_type="data_source">
-    <description>browser</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://main.genome-browser.bx.psu.edu/cgi-bin/hgTables" check_values="false" method="get">
-        <display>go to BX Browser $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
-        <param name="tool_id" type="hidden" value="bx_browser" />
-        <param name="sendToGalaxy" type="hidden" value="1" />
-        <param name="hgta_compressType" type="hidden" value="none" />
-        <param name="hgta_outputType" type="hidden" value="bed" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
-        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
-        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
-        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
-        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" >
-            <value_translation>
-                <value galaxy_value="tabular" remote_value="primaryTable" />
-                <value galaxy_value="tabular" remote_value="selectedFields" />
-                <value galaxy_value="wig" remote_value="wigData" />
-                <value galaxy_value="interval" remote_value="tab" />
-                <value galaxy_value="html" remote_value="hyperlinks" />
-                <value galaxy_value="fasta" remote_value="sequence" />
-            </value_translation>
-        </request_param>
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="tabular" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/cbi_rice_mart.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="CBI Rice Mart" id="cbi_rice_mart" tool_type="data_source" version="1.0.1">
-    <description>rice mart</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://ricemart.cbi.edu.cn/biomart/martview/" check_values="false" method="get" target="_top">
-        <display>go to RMap rice mart $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="_export" missing="1" />
-                <value name="GALAXY_URL" missing="0" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
-            <value_translation>
-                <value galaxy_value="tabular" remote_value="TSV" />
-            </value_translation>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
-        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="Rice mart query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="tabular" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/data_source.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# Retrieves data from external data source applications and stores in a dataset file.
-# Data source application parameters are temporarily stored in the dataset file.
-import socket, urllib, sys, os
-from galaxy import eggs #eggs needs to be imported so that galaxy.util can find docutils egg...
-from galaxy.util.json import from_json_string, to_json_string
-import galaxy.model # need to import model before sniff to resolve a circular import dependency
-from galaxy.datatypes import sniff
-from galaxy.datatypes.registry import Registry
-from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_FILE
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-GALAXY_PARAM_PREFIX = 'GALAXY'
-GALAXY_ROOT_DIR = os.path.realpath( os.path.join( os.path.split( os.path.realpath( __file__ ) )[0], '..', '..' ) )
-GALAXY_DATATYPES_CONF_FILE = os.path.join( GALAXY_ROOT_DIR, 'datatypes_conf.xml' )
-
-def load_input_parameters( filename, erase_file = True ):
-    datasource_params = {}
-    try:
-        json_params = from_json_string( open( filename, 'r' ).read() )
-        datasource_params = json_params.get( 'param_dict' )
-    except:
-        json_params = None
-        for line in open( filename, 'r' ):
-            try:
-                line = line.strip()
-                fields = line.split( '\t' )
-                datasource_params[ fields[0] ] = fields[1]
-            except:
-                continue
-    if erase_file:
-        open( filename, 'w' ).close() #open file for writing, then close, removes params from file
-    return json_params, datasource_params
-
-def __main__():
-    filename = sys.argv[1]
-    try:
-        max_file_size = int( sys.argv[2] )
-    except:
-        max_file_size = 0
-
-    job_params, params = load_input_parameters( filename )
-    if job_params is None: #using an older tabular file
-        enhanced_handling = False
-        job_params = dict( param_dict = params )
-        job_params[ 'output_data' ] =  [ dict( out_data_name = 'output',
-                                               ext = 'data',
-                                               file_name = filename,
-                                               extra_files_path = None ) ]
-        job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE )
-    else:
-        enhanced_handling = True
-        json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata
-
-    datatypes_registry = Registry( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
-
-    URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
-    URL_method = params.get( 'URL_method', None )
-
-    # The Python support for fetching resources from the web is layered. urllib uses the httplib
-    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
-    # a socket should wait for a response before timing out. By default the socket module has no
-    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
-    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
-    # doing the following.
-    socket.setdefaulttimeout( 600 )
-
-    for data_dict in job_params[ 'output_data' ]:
-        cur_filename =  data_dict.get( 'file_name', filename )
-        cur_URL =  params.get( '%s|%s|URL' % ( GALAXY_PARAM_PREFIX, data_dict[ 'out_data_name' ] ), URL )
-        if not cur_URL:
-            open( cur_filename, 'w' ).write( "" )
-            stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
-
-        # The following calls to urllib.urlopen() will use the above default timeout
-        try:
-            if not URL_method or URL_method == 'get':
-                page = urllib.urlopen( cur_URL )
-            elif URL_method == 'post':
-                page = urllib.urlopen( cur_URL, urllib.urlencode( params ) )
-        except Exception, e:
-            stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
-        if max_file_size:
-            file_size = int( page.info().get( 'Content-Length', 0 ) )
-            if file_size > max_file_size:
-                stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
-        #do sniff stream for multi_byte
-        try:
-            cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( cur_filename, os.O_WRONLY | os.O_CREAT ), cur_filename )
-        except Exception, e:
-            stop_err( 'Unable to fetch %s:\n%s' % ( cur_URL, e ) )
-
-        #here import checks that upload tool performs
-        if enhanced_handling:
-            try:
-                ext = sniff.handle_uploaded_dataset_file( filename, datatypes_registry, ext = data_dict[ 'ext' ], is_multi_byte = is_multi_byte )
-            except Exception, e:
-                stop_err( str( e ) )
-            info = dict( type = 'dataset',
-                         dataset_id = data_dict[ 'dataset_id' ],
-                         ext = ext)
-
-            json_file.write( "%s\n" % to_json_string( info ) )
-
-if __name__ == "__main__": __main__()
--- a/tools/data_source/echo.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Script that just echos the command line.
-"""
-
-import sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-print '-' * 20, "<br>"
-for elem in sys.argv:
-    print elem, "<br>"
-print '-' * 20, "<br>"
\ No newline at end of file
--- a/tools/data_source/echo.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-
-<tool name="Echo" id="echo1">
-
-	<description>
-		echoes parameters
-	</description>
-
-	<command interpreter="python">echo.py $input $database $output </command>
-
-	<inputs>
-		<param format="tabular" name="input" type="data" label="Input stuff"/>
-        <param type="select" name="database" label="Database">
-            <option value="alignseq.loc">Human (hg18)</option>
-            <option value="faseq.loc">Fly (dm3)</option>
-        </param>
-	</inputs>
-
-	<outputs>
-		<data format="input" name="output" label="Blat on ${database.value_label}" />
-	</outputs>
-
-</tool>
--- a/tools/data_source/encode_db.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-<?xml version="1.0"?>
-
-<tool name="EncodeDB" id="encode_db1">
-
-	<description>
-		at NHGRI
-	</description>
-
-	<command interpreter="python">
-		fetch.py "$url" $output
-	</command>
-
-	<inputs action="http://research.nhgri.nih.gov/projects/ENCODEdb/cgi-bin/power_query.cgi" target="_top">
-<!--	<inputs action="http://localhost:9000/prepared"> -->
-		<display>go to EncodeDB $GALAXY_URL</display>
-		<param name="GALAXY_URL" type="baseurl" value="/async/encode_db1" />
-	</inputs>
-
-  <uihints minwidth="800"/>
-
-  <outputs>
-    <data format="bed" name="output" />
-  </outputs>
-
-	<options sanitize="False" refresh="True"/>
-
-</tool>
\ No newline at end of file
--- a/tools/data_source/epigraph_import.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source">
-    <description> server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get">
-        <display>go to EpiGRAPH server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" />
-        <request_param galaxy_name="info" remote_name="INFO" missing="" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/epigraph_import_test.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source">
-    <description> test server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get">
-        <display>go to EpiGRAPH server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" />
-        <request_param galaxy_name="info" remote_name="INFO" missing="" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/eupathdb.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-<tool name="EuPathDB" id="eupathdb" tool_type="data_source" url_method="post">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://eupathdb.org/eupathdb/queries_tools.jsp" check_values="false" method="get">
-        <display>go to EuPathDB server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=eupathdb" />
-    </inputs>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="tabular" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/fetch.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Script that just echos the command line.
-"""
-
-import sys, os, urllib
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-BUFFER = 1048576
-
-url      = sys.argv[1]
-out_name = sys.argv[2]
-
-out = open(out_name, 'wt')
-try:
-    page = urllib.urlopen(url)
-    while 1:
-        data = page.read(BUFFER)
-        if not data:
-            break
-        out.write(data)
-except Exception, e:
-    print 'Error getting the data -> %s' % e
-out.close()
--- a/tools/data_source/fly_modencode.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<?xml version="1.0"?>
-<tool name="modENCODE fly" id="modENCODEfly" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://modencode.oicr.on.ca/fgb2/gbrowse/fly" check_values="false" target="_top">
-        <display>go to modENCODE fly server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=modENCODEfly" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="dm2" >
-            <value_translation>
-                <value galaxy_value="dm2" remote_value="fly" />
-            </value_translation>
-        </request_param>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="d" missing="" />
-                <value name="dbkey" missing="dm2" />
-                <value name="q" missing="" />
-                <value name="s" missing="" />
-                <value name="t" missing="" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/flymine.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="Flymine" id="flymine" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://www.flymine.org" check_values="false" method="get">
-        <display>go to Flymine server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" >
-            <value_translation>
-                <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect -->
-            </value_translation>
-        </request_param>
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
-
--- a/tools/data_source/flymine_test.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="Flymine test" id="flymine_test" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get">
-        <display>go to Flymine server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
-
--- a/tools/data_source/genbank.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-from Bio import GenBank
-import sys, os, textwrap
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def make_fasta(rec):
-    '''Creates fasta format from a record'''
-    gi   = rec.annotations.get('gi','')
-    org  = rec.annotations.get('organism','')
-    date = rec.annotations.get('date','')
-    head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date)
-    body = '\n'.join(textwrap.wrap(rec.seq.data, width=80))
-    return head, body
-
-if __name__ == '__main__':
-
-    mode  = sys.argv[1]
-    text  = sys.argv[2]
-    output_file = sys.argv[3]
-
-    print 'Searching for %s <br>' % text
-
-    # check if inputs are all numbers
-    try:
-        gi_list = text.split()
-        tmp = map(int, gi_list)
-    except ValueError:
-        gi_list = GenBank.search_for(text, max_ids=10)
-
-    fp = open(output_file, 'wt')
-    record_parser = GenBank.FeatureParser()
-    ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser = record_parser)
-    for gid in gi_list:
-        res = ncbi_dict[gid]
-        head, body =  make_fasta(res)
-        fp.write(head+body+'\n')
-        print head
-    fp.close()
-
-
-
--- a/tools/data_source/genbank.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-<tool id="genbank" name="Connect to Genbank">
-<!--  <description>queries genbank</description> -->
-  <command interpreter="python">genbank.py $mode "$text" $output</command>
-  <inputs>
-    <param name="mode" type="select">
-      <option value="nucleotide">nucleotide database</option>
-      <option value="protein">proteins database</option>
-      <label>Get sequences from the</label>
-    </param>
-    <param name="text" size="40" type="text" value="6273291">
-      <label>with accession ID</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="output" />
-  </outputs>
-  <help>
-At the moment this tool allows the following simple searches:
-
-- by GI: **51594135**
-- by accession: **CF622840**
-- using text: **human hbb1** (this feature is experimental)
-  </help>
-
-</tool>
\ No newline at end of file
--- a/tools/data_source/gramene_mart.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
-
-    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
-    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
--->
-<tool name="GrameneMart" id="gramenemart" tool_type="data_source" version="1.0.1">
-    <description> Central server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://www.gramene.org/biomart/martview" check_values="false" method="get" target="_top">
-        <display>go to GrameneMart Central $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="_export" missing="1" />
-                <value name="GALAXY_URL" missing="0" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular">
-            <value_translation>
-                <value galaxy_value="tabular" remote_value="TSV" />
-            </value_translation>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
-        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="Biomart query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="tabular" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/hapmapmart.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    hacked from biomart.xml - testing hapmap biomart - problem is going to be converting these to lped/pbed
-    the data returned will be in all sorts of different shapes - and the sample ids need to be obtained separately
-    to create reliable pedigrees. eesh...
-
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
-
-    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
-    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
--->
-<tool name="HapMapMart" id="hapmapmart" tool_type="data_source" version="0.0.01">
-	<description>HapMap Biomart</description>
-	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-	<inputs action="http://hapmap.ncbi.nlm.nih.gov/biomart/martview" check_values="false" method="get" target="_top">
-		<display>go to HapMap BioMart $GALAXY_URL</display>
-		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/hapmapmart" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="_export" missing="1" />
-                <value name="GALAXY_URL" missing="0" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
-            <value_translation>
-                <value galaxy_value="tabular" remote_value="TSV" />
-            </value_translation>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
-        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="hg18" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="human" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="HapMap query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-    </request_param_translation>
-	<uihints minwidth="800"/>
-	<outputs>
-		<data name="output" format="tabular" />
-	</outputs>
-	<options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/hbvar.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-<?xml version="1.0"?>
-<tool name="HbVar" id="hbvar">
-
-	<description>Human Hemoglobin Variants and Thalassemias</description>
-
-	<command/>
-
-	<inputs action="http://globin.bx.psu.edu/cgi-bin/hbvar/query_vars3" check_values="false" method="get" target="_top">
-		<display>go to HbVar database $GALAXY_URL $tool_id</display>
-		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/hbvar" />
-		<param name="tool_id" type="hidden" value = "hbvar"/>
-	</inputs>
-
-	<uihints minwidth="800"/>
-
-	<code file="hbvar_filter.py"/>
-
-	<outputs>
-		<data name="output" format="txt" />
-	</outputs>
-
-	<options sanitize="False" refresh="True"/>
-
-</tool>
-
--- a/tools/data_source/hbvar_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-#TODO: Set dbkey to proper UCSC build, if known
-import urllib
-
-from galaxy import datatypes, config
-import tempfile, shutil
-
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
-    """Sets the name of the data"""
-    data_name = param_dict.get( 'name', 'HbVar query' )
-    data_type = param_dict.get( 'type', 'txt' )
-    if data_type == 'txt': data_type='interval' #All data is TSV, assume interval
-    name, data = out_data.items()[0]
-    data = app.datatypes_registry.change_datatype(data, data_type)
-    data.name = data_name
-    out_data[name] = data
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    """Verifies the data after the run"""
-
-    URL = param_dict.get( 'URL', None )
-    URL = URL + '&_export=1&GALAXY_URL=0'
-    if not URL:
-        raise Exception('Datasource has not sent back a URL parameter')
-
-    CHUNK_SIZE = 2**20 # 1Mb
-    MAX_SIZE   = CHUNK_SIZE * 100
-
-    try:
-        page = urllib.urlopen(URL)
-    except Exception, exc:
-        raise Exception('Problems connecting to %s (%s)' % (URL, exc) )
-
-    name, data = out_data.items()[0]
-
-    fp = open(data.file_name, 'wb')
-    size = 0
-    while 1:
-        chunk = page.read(CHUNK_SIZE)
-        if not chunk:
-            break
-        if size > MAX_SIZE:
-            raise Exception('----- maximum datasize exceeded ---')
-        size += len(chunk)
-        fp.write(chunk)
-
-    fp.close()
-    #Set meta data, format file to be valid interval type
-    if isinstance(data.datatype, datatypes.interval.Interval):
-        data.set_meta(first_line_is_header=True)
-        #check for missing meta data, if all there, comment first line and process file
-        if not data.missing_meta():
-            line_ctr = -1
-            temp = tempfile.NamedTemporaryFile('w')
-            temp_filename = temp.name
-            temp.close()
-            temp = open(temp_filename,'w')
-            chromCol = int(data.metadata.chromCol) - 1
-            startCol = int(data.metadata.startCol) - 1
-            strandCol = int(data.metadata.strandCol) - 1
-
-
-            for line in open(data.file_name, 'r'):
-                line_ctr += 1
-
-                fields = line.strip().split('\t')
-
-                temp.write("%s\n" % '\t'.join(fields))
-
-            temp.close()
-            shutil.move(temp_filename,data.file_name)
-
-        else:
-            data = app.datatypes_registry.change_datatype(data, 'tabular')
-    data.set_size()
-    data.set_peek()
-    app.model.context.add( data )
-    app.model.context.flush()
--- a/tools/data_source/import.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Script that imports locally stored data as a new dataset for the user
-Usage: import id outputfile
-"""
-import sys, os
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-BUFFER = 1048576
-
-dataid   = sys.argv[1]
-out_name = sys.argv[2]
-
-
-id2name = {
-    'eryth'         : 'ErythPreCRMmm3_cusTrk.txt',
-    'cishg16'       : 'ReglRegHBBhg16CusTrk.txt',
-    'cishg17'       : 'ReglRegHBBhg17CusTrk.txt',
-    'exons'         : 'ExonsKnownGenes_mm3.txt',
-    'krhg16'        : 'known_regulatory_hg16.bed',
-    'krhg17'        : 'known_regulatory_hg17.bed',
-    'tARhg16mmc'    : 'hg16.mouse.t_AR.cold.bed',
-    'tARhg16mmm'    : 'hg16.mouse.t_AR.medium.bed',
-    'tARhg16mmh'    : 'hg16.mouse.t_AR.hot.bed',
-    'tARhg16rnc'    : 'hg16.rat.t_AR.cold.bed',
-    'tARhg16rnm'    : 'hg16.rat.t_AR.medium.bed',
-    'tARhg16rnh'    : 'hg16.rat.t_AR.hot.bed',
-    'phastConsHg16' : 'phastConsMost_hg16.bed',
-    'omimhg16'      : 'omimDisorders_hg16.tab',
-    'omimhg17'      : 'omimDisorders_hg17.tab',
-
-}
-
-fname = id2name.get(dataid, '')
-if not fname:
-    print 'Importing invalid data %s' % dataid
-    sys.exit()
-else:
-    print 'Imported %s' % fname
-
-# this path is hardcoded
-inp_name = os.path.join('database', 'import', fname)
-
-try:
-    inp = open(inp_name, 'rt')
-except:
-    print 'Could not find file %s' % inp_name
-    sys.exit()
-
-out = open(out_name, 'wt')
-
-while 1:
-    data = inp.read(BUFFER)
-    if not data:
-        break
-    out.write(data)
-
-inp.close()
-out.close()
--- a/tools/data_source/import.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-<tool id="Featured datasets4" name="Featured datasets">
-  <description>(PSU prepared queries)</description>
-  <command interpreter="python">import.py $data $output</command>
-  <inputs>
-	<display>$data</display>
-	<param name="data" type="select" display="radio">
-      <option value="eryth">Erythroid predicted cis-regulatory modules</option>
-      <option value="exons">Exons of protein-coding genes in the mouse genome, assembly mm3</option>
-      <option value="cishg16 ">Known cis-regulatory modules in the human HBB gene complex (hg16)</option>
-      <option value="cishg17">Known cis-regulatory modules in the human HBB gene complex (hg17)</option>
-      <option value="krhg16">Known regulatory regions (hg16)</option>
-      <option value="krhg17">Known regulatory regions (hg17)</option>
-      <option value="tARhg16mmc">Human (hg16) evolutionary cold region (vs mouse)</option>
-      <option value="tARhg16mmm">Human (hg16) evolutionary medium region (vs mouse)</option>
-      <option value="tARhg16mmh">Human (hg16) evolutionary hot region (vs mouse)</option>
-      <option value="tARhg16rnc">Human (hg16) evolutionary cold region (vs rat)</option>
-      <option value="tARhg16rnm">Human (hg16) evolutionary medium region (vs rat)</option>
-      <option value="tARhg16rnh">Human (hg16) evolutionary hot region (vs rat)</option>
-      <option value="phastConsHg16">phastCons hg16 (stringent, top ~5%) from UCSC</option>
-      <option value="omimhg16">OMIM disorders (hg16)</option>
-      <option value="omimhg17">OMIM disorders (hg17)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="bed" name="output" />
-  </outputs>
-</tool>
--- a/tools/data_source/metabolicmine.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-<?xml version="1.0"?>
-<tool name="metabolicMine" id="metabolicmine" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://www.metabolicmine.org/beta/begin.do" check_values="false" method="get">
-        <display>go to metabolicMine server $GALAXY_URL</display>
-    </inputs>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/microbial_import.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Script that imports locally stored data as a new dataset for the user
-Usage: import id outputfile
-"""
-import sys, os
-from shutil import copyfile
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-BUFFER = 1048576
-
-uids = sys.argv[1].split(",")
-out_file1 = sys.argv[2]
-
-#remove NONE from uids
-have_none = True
-while have_none:
-    try:
-        uids.remove('None')
-    except:
-        have_none = False
-
-
-#create dictionary keyed by uid of tuples of (displayName,filePath,build) for all files
-available_files = {}
-try:
-    filename = sys.argv[-1]
-    for i, line in enumerate( file( filename ) ):
-        if not line or line[0:1] == "#" : continue
-        fields = line.split('\t')
-        try:
-            info_type = fields.pop(0)
-
-            if info_type.upper()=="DATA":
-                uid = fields.pop(0)
-                org_num = fields.pop(0)
-                chr_acc = fields.pop(0)
-                feature = fields.pop(0)
-                filetype = fields.pop(0)
-                path = fields.pop(0).replace("\r","").replace("\n","")
-
-                file_type = filetype
-                build = org_num
-                description = uid
-            else:
-                continue
-        except:
-            continue
-
-        available_files[uid]=(description,path,build,file_type,chr_acc)
-except:
-    print >>sys.stderr, "It appears that the configuration file for this tool is missing."
-
-#create list of tuples of (displayName,FileName,build) for desired files
-desired_files = []
-for uid in uids:
-    try:
-        desired_files.append(available_files[uid])
-    except:
-        continue
-
-#copy first file to contents of given output file
-file1_copied = False
-while not file1_copied:
-    try:
-        first_file = desired_files.pop(0)
-    except:
-        print >>sys.stderr, "There were no valid files requested."
-        sys.exit()
-    file1_desc, file1_path, file1_build, file1_type,file1_chr_acc = first_file
-    try:
-        copyfile(file1_path,out_file1)
-        print "#File1\t"+file1_desc+"\t"+file1_chr_acc+"\t"+file1_build+"\t"+file1_type
-        file1_copied = True
-    except:
-        print >>sys.stderr, "The file specified is missing."
-        continue
-        #print >>sys.stderr, "The file specified is missing."
-
-
-#Tell post-process filter where remaining files reside
-for extra_output in desired_files:
-    file_desc, file_path, file_build, file_type,file_chr_acc = extra_output
-    print "#NewFile\t"+file_desc+"\t"+file_chr_acc+"\t"+file_build+"\t"+file_path+"\t"+file_type
--- a/tools/data_source/microbial_import.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<tool id="microbial_import1" name="Get Microbial Data">
-  <command interpreter="python">microbial_import.py $CDS,$tRNA,$rRNA,$sequence,$GeneMark,$GeneMarkHMM,$Glimmer3 $output ${GALAXY_DATA_INDEX_DIR}/microbial_data.loc</command>
-  <inputs>
-      <param name="kingdom" type="select" label="Select the Desired Kingdom">
-        <options from_file="microbial_data.loc" startswith="ORG">
-          <column name="name" index="3"/>
-          <column name="value" index="3"/>
-          <filter type="unique_value" name="unique" column="3"/>
-        </options>
-      </param>
-      <param name="org" type="select" label="Select the Desired Organism">
-        <options from_file="microbial_data.loc" startswith="ORG">
-          <column name="name" index="2"/>
-          <column name="value" index="1"/>
-          <filter type="param_value" ref="kingdom" name="kingdom" column="3"/>
-          <filter type="sort_by" column="2"/>
-        </options>
-      </param>
-      <param name="CDS" type="select" label="Select Desired Coding Sequences" display="checkboxes" multiple="True">
-        <options from_file="microbial_data.loc" startswith="DATA">
-          <column name="name" index="3"/>
-          <column name="value" index="1"/>
-          <column name="feature" index="4"/>
-          <filter type="param_value" ref="org" name="kingdom" column="2"/>
-          <filter type="static_value" name="feature" value="CDS" column="4"/>
-        </options>
-      </param>
-      <param name="tRNA" type="select" label="Select Desired tRNA" display="checkboxes" multiple="True">
-        <options from_file="microbial_data.loc" startswith="DATA">
-          <column name="name" index="3"/>
-          <column name="value" index="1"/>
-          <column name="feature" index="4"/>
-          <filter type="param_value" ref="org" name="kingdom" column="2"/>
-          <filter type="static_value" name="feature" value="tRNA" column="4"/>
-        </options>
-      </param>
-      <param name="rRNA" type="select" label="Select Desired rRNA" display="checkboxes" multiple="True">
-        <options from_file="microbial_data.loc" startswith="DATA">
-          <column name="name" index="3"/>
-          <column name="value" index="1"/>
-          <column name="feature" index="4"/>
-          <filter type="param_value" ref="org" name="kingdom" column="2"/>
-          <filter type="static_value" name="feature" value="rRNA" column="4"/>
-        </options>
-      </param>
-      <param name="sequence" type="select" label="Select Desired DNA Sequences" display="checkboxes" multiple="True">
-        <options from_file="microbial_data.loc" startswith="DATA">
-          <column name="name" index="3"/>
-          <column name="value" index="1"/>
-          <column name="feature" index="4"/>
-          <filter type="param_value" ref="org" name="kingdom" column="2"/>
-          <filter type="static_value" name="feature" value="sequence" column="4"/>
-        </options>
-      </param>
-      <param name="GeneMark" type="select" label="Select Desired GeneMark Annotations" display="checkboxes" multiple="True">
-        <options from_file="microbial_data.loc" startswith="DATA">
-          <column name="name" index="3"/>
-          <column name="value" index="1"/>
-          <column name="feature" index="4"/>
-          <filter type="param_value" ref="org" name="kingdom" column="2"/>
-          <filter type="static_value" name="feature" value="GeneMark" column="4"/>
-        </options>
-      </param>
-      <param name="GeneMarkHMM" type="select" label="Select Desired GeneMarkHMM Annotations" display="checkboxes" multiple="True">
-        <options from_file="microbial_data.loc" startswith="DATA">
-          <column name="name" index="3"/>
-          <column name="value" index="1"/>
-          <column name="feature" index="4"/>
-          <filter type="param_value" ref="org" name="kingdom" column="2"/>
-          <filter type="static_value" name="feature" value="GeneMarkHMM" column="4"/>
-        </options>
-      </param>
-      <param name="Glimmer3" type="select" label="Select Desired Glimmer3 Annotations" display="checkboxes" multiple="True">
-        <options from_file="microbial_data.loc" startswith="DATA">
-          <column name="name" index="3"/>
-          <column name="value" index="1"/>
-          <column name="feature" index="4"/>
-          <filter type="param_value" ref="org" name="kingdom" column="2"/>
-          <filter type="static_value" name="feature" value="Glimmer3" column="4"/>
-        </options>
-      </param>
-  </inputs>
-  <outputs>
-    <data format="bed" name="output"/>
-  </outputs>
-  <code file="microbial_import_code.py"/>
-  <help>
-
-This tool will allow you to obtain various genomic datasets for any completed Microbial Genome Project as listed at NCBI_.
-
-.. _NCBI: http://www.ncbi.nlm.nih.gov/genomes/lproks.cgi?view=1
-
-Current datasets available include
-  1. CDS
-  2. tRNA
-  3. rRNA
-  4. FASTA Sequences
-  5. GeneMark Annotations
-  6. GeneMarkHMM Annotations
-  7. Glimmer3 Annotations
-
------
-
-Organisms in **bold** are available at the UCSC Browser.
-
------
-
-.. class:: infomark
-
-**Note:** Having trouble locating your organism?  Click here_ for a list of available species and their location.
-
-.. _here: http://wiki.g2.bx.psu.edu/Main/Data%20Libraries/Microbes
-
-  </help>
-</tool>
--- a/tools/data_source/microbial_import_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-
-def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
-    # FIXME: this function is duplicated in the DynamicOptions class.  It is used here only to
-    # set data.name in exec_after_process().
-    microbe_info= {}
-    orgs = {}
-
-    filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR
-    for i, line in enumerate( open( filename ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            fields = line.split( sep )
-            #read each line, if not enough fields, go to next line
-            try:
-                info_type = fields.pop(0)
-                if info_type.upper() == "ORG":
-                    #ORG     12521   Clostridium perfringens SM101   bacteria        Firmicutes      CP000312,CP000313,CP000314,CP000315     http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521
-                    org_num = fields.pop(0)
-                    name = fields.pop(0)
-                    kingdom = fields.pop(0)
-                    group = fields.pop(0)
-                    chromosomes = fields.pop(0)
-                    info_url = fields.pop(0)
-                    link_site = fields.pop(0)
-                    if org_num not in orgs:
-                        orgs[ org_num ] = {}
-                        orgs[ org_num ][ 'chrs' ] = {}
-                    orgs[ org_num ][ 'name' ] = name
-                    orgs[ org_num ][ 'kingdom' ] = kingdom
-                    orgs[ org_num ][ 'group' ] = group
-                    orgs[ org_num ][ 'chromosomes' ] = chromosomes
-                    orgs[ org_num ][ 'info_url' ] = info_url
-                    orgs[ org_num ][ 'link_site' ] = link_site
-                elif info_type.upper() == "CHR":
-                    #CHR     12521   CP000315        Clostridium perfringens phage phiSM101, complete genome 38092   110684521       CP000315.1
-                    org_num = fields.pop(0)
-                    chr_acc = fields.pop(0)
-                    name = fields.pop(0)
-                    length = fields.pop(0)
-                    gi = fields.pop(0)
-                    gb = fields.pop(0)
-                    info_url = fields.pop(0)
-                    chr = {}
-                    chr[ 'name' ] = name
-                    chr[ 'length' ] = length
-                    chr[ 'gi' ] = gi
-                    chr[ 'gb' ] = gb
-                    chr[ 'info_url' ] = info_url
-                    if org_num not in orgs:
-                        orgs[ org_num ] = {}
-                        orgs[ org_num ][ 'chrs' ] = {}
-                    orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr
-                elif info_type.upper() == "DATA":
-                    #DATA    12521_12521_CDS 12521   CP000315        CDS     bed     /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed
-                    uid = fields.pop(0)
-                    org_num = fields.pop(0)
-                    chr_acc = fields.pop(0)
-                    feature = fields.pop(0)
-                    filetype = fields.pop(0)
-                    path = fields.pop(0)
-                    data = {}
-                    data[ 'filetype' ] = filetype
-                    data[ 'path' ] = path
-                    data[ 'feature' ] = feature
-
-                    if org_num not in orgs:
-                        orgs[ org_num ] = {}
-                        orgs[ org_num ][ 'chrs' ] = {}
-                    if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]:
-                        orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {}
-                    orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data
-                else: continue
-            except: continue
-    for org_num in orgs:
-        org = orgs[ org_num ]
-        if org[ 'kingdom' ] not in microbe_info:
-            microbe_info[ org[ 'kingdom' ] ] = {}
-        if org_num not in microbe_info[ org[ 'kingdom' ] ]:
-            microbe_info[ org[ 'kingdom' ] ][org_num] = org
-    return microbe_info
-
-#post processing, set build for data and add additional data to history
-from galaxy import datatypes, config, jobs, tools
-from shutil import copyfile
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    base_dataset = out_data.items()[0][1]
-    history = base_dataset.history
-    if history == None:
-        print "unknown history!"
-        return
-    kingdom = param_dict.get( 'kingdom', None )
-    #group = param_dict.get( 'group', None )
-    org = param_dict.get( 'org', None )
-
-    #if not (kingdom or group or org):
-    if not (kingdom or org):
-        print "Parameters are not available."
-    #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values
-    if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ):
-        kingdom = kingdom.value
-    if isinstance( org, tools.parameters.basic.UnvalidatedValue ):
-        org = org.value
-
-    GALAXY_DATA_INDEX_DIR = app.config.tool_data_path
-    microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' )
-    new_stdout = ""
-    split_stdout = stdout.split("\n")
-    basic_name = ""
-    for line in split_stdout:
-        fields = line.split("\t")
-        if fields[0] == "#File1":
-            description = fields[1]
-            chr = fields[2]
-            dbkey = fields[3]
-            file_type = fields[4]
-            name, data = out_data.items()[0]
-            data.set_size()
-            basic_name = data.name
-            data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")"
-            data.dbkey = dbkey
-            data.info = data.name
-            data = app.datatypes_registry.change_datatype( data, file_type )
-            data.init_meta()
-            data.set_peek()
-            app.model.context.add( data )
-            app.model.context.flush()
-        elif fields[0] == "#NewFile":
-            description = fields[1]
-            chr = fields[2]
-            dbkey = fields[3]
-            filepath = fields[4]
-            file_type = fields[5]
-            newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library
-            newdata.set_size()
-            newdata.extension = file_type
-            newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")"
-            app.model.context.add( newdata )
-            app.model.context.flush()
-            app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset )
-            history.add_dataset( newdata )
-            app.model.context.add( history )
-            app.model.context.flush()
-            try:
-                copyfile(filepath,newdata.file_name)
-                newdata.info = newdata.name
-                newdata.state = jobs.JOB_OK
-            except:
-                newdata.info = "The requested file is missing from the system."
-                newdata.state = jobs.JOB_ERROR
-            newdata.dbkey = dbkey
-            newdata.init_meta()
-            newdata.set_peek()
-            app.model.context.flush()
--- a/tools/data_source/modmine.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="modENCODE modMine" id="modmine" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://intermine.modencode.org/" check_values="false" method="get">
-        <display>go to modENCODE modMine server $GALAXY_URL</display>
-    </inputs>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
-
--- a/tools/data_source/ratmine.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="Ratmine" id="ratmine" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get">
-        <display>go to Ratmine server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
-        <request_param galaxy_name="organism" remote_name="organism" missing="" />
-        <request_param galaxy_name="table" remote_name="table" missing="" />
-        <request_param galaxy_name="description" remote_name="description" missing="" />
-        <request_param galaxy_name="name" remote_name="name" missing="Ratmine query" />
-        <request_param galaxy_name="info" remote_name="info" missing="" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" >
-            <value_translation>
-                <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect -->
-            </value_translation>
-        </request_param>
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/ucsc_archaea.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-<tool name="UCSC Archaea" id="ucsc_proxy">
-
-	<description>table browser</description>
-
-	<command interpreter="python">
-		ucsc_proxy.py $param_file $output
-	</command>
-
-	<inputs action="/ucsc_proxy/index" check_values="false">
-		<display>go to UCSC $init $hgta_outputType</display>
-		<param type="hidden" name="init" value="3"/>
-		<param type="hidden" name="hgta_outputType" value="bed"/>
-	</inputs>
-
-	<code file="ucsc_filter.py"/>
-
-	<outputs>
-		<data name="output" format="bed" />
-	</outputs>
-
-</tool>
-
--- a/tools/data_source/ucsc_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-# runs after the job (and after the default post-filter)
-from galaxy import datatypes, jobs
-
-def validate(incoming):
-    """Validator"""
-    #raise Exception, 'not quite right'
-    pass
-
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
-    """Sets the name of the data"""
-    outputType = param_dict.get( 'hgta_outputType', None )
-    if isinstance(outputType, list) and len(outputType)>0: outputType = outputType[-1]
-    items = out_data.items()
-
-    for name, data in items:
-        data.name  = param_dict.get('display', data.name)
-        data.dbkey = param_dict.get('dbkey', '???')
-
-        if outputType == 'wigData':
-            ext = "wig"
-        elif outputType == 'maf':
-            ext = "maf"
-        elif outputType == 'gff':
-            ext = "gff"
-        elif outputType == 'gff3':
-            ext = "gff3"
-        else:
-            if 'hgta_doPrintSelectedFields' in param_dict:
-                ext = "interval"
-            elif 'hgta_doGetBed' in param_dict:
-                ext = "bed"
-            elif 'hgta_doGenomicDna' in param_dict:
-                ext = "fasta"
-            elif 'hgta_doGenePredSequence' in param_dict:
-                ext = "fasta"
-            else:
-                ext = "interval"
-
-        data = app.datatypes_registry.change_datatype(data, ext)
-        out_data[name] = data
-
-def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    """Verifies the data after the run"""
-    items = out_data.items()
-    for name, data in items:
-        data.set_size()
-        try:
-            err_msg, err_flag = 'Errors:', False
-            line_count = 0
-            num_lines = len(file(data.file_name).readlines())
-            for line in file(data.file_name):
-                line_count += 1
-                if line and line[0] == '-':
-                    if line_count + 3 == num_lines and not err_flag:
-                        err_flag = True
-                        err_msg = "Warning: It appears that your results have been truncated by UCSC. View the bottom of your result file for details."
-                        break
-                    err_flag = True
-                    err_msg = err_msg +" (line "+str(line_count)+")"+line
-            data.set_peek()
-            if isinstance(data.datatype, datatypes.interval.Interval) and data.missing_meta():
-                data = app.datatypes_registry.change_datatype(data, 'tabular')
-                out_data[name] = data
-            if err_flag:
-                raise Exception(err_msg)
-        except Exception, exc:
-            data.info  = data.info + "\n" + str(exc)
-            data.blurb = "error"
--- a/tools/data_source/ucsc_proxy.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-#!/usr/bin/env python
-import urllib
-import sys, os
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-CHUNK   = 2**20 # 1Mb
-MAXSIZE = CHUNK * 100
-if __name__ == '__main__':
-
-    if len(sys.argv) != 3:
-        print 'Usage ucsc.py input_params output_file'
-        sys.exit()
-
-    inp_file = sys.argv[1]
-    out_file = sys.argv[2]
-
-    DEFAULT_URL = "http://genome.ucsc.edu/hgTables?"
-
-    # this must stay a list to allow multiple selections for the same widget name (checkboxes)
-    params  = []
-    for line in file(inp_file):
-        line = line.strip()
-        if line:
-            parts = line.split('=')
-            if len(parts) == 0:
-                key = ""
-                value = ""
-            elif len(parts) == 1:
-                key = parts[0]
-                value = ""
-            else:
-                key = parts[0]
-                value = parts[1]
-            if key == 'display':
-                print value
-            # get url from params, refered from proxy.py, initialized by the tool xml
-            elif key == 'proxy_url':
-                DEFAULT_URL = value
-            else:
-                params.append( (key, value) )
-
-    #print params
-
-    encoded_params = urllib.urlencode(params)
-    url = DEFAULT_URL + encoded_params
-
-    #print url
-
-    page = urllib.urlopen(url)
-
-    fp = open(out_file, 'wt')
-    size = 0
-    while 1:
-        data = page.read(CHUNK)
-        if not data:
-            break
-        if size > MAXSIZE:
-            fp.write('----- maximum datasize exceeded ---\n')
-            break
-        size += len(data)
-        fp.write(data)
-
-    fp.close()
-
--- a/tools/data_source/ucsc_proxy.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-<tool name="UCSC Main" id="ucsc_proxy">
-
-	<description>table browser proxy</description>
-
-	<command interpreter="python">
-		ucsc_proxy.py $param_file $output
-	</command>
-
-	<inputs action="/ucsc_proxy/index" check_values="false">
-		<display>go to UCSC $init $hgta_outputType</display>
-		<param type="hidden" name="init" value="1"/>
-		<param type="hidden" name="hgta_outputType" value="bed"/>
-	</inputs>
-
-	<code file="ucsc_filter.py"/>
-
-	<outputs>
-		<data name="output" format="bed" />
-	</outputs>
-
-</tool>
-
--- a/tools/data_source/ucsc_tablebrowser.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source">
-    <description>table browser</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
-        <display>go to UCSC Table Browser $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
-        <param name="tool_id" type="hidden" value="ucsc_table_direct1" />
-        <param name="sendToGalaxy" type="hidden" value="1" />
-        <param name="hgta_compressType" type="hidden" value="none" />
-        <param name="hgta_outputType" type="hidden" value="bed" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
-        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
-        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
-        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
-        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" >
-            <value_translation>
-                <value galaxy_value="auto" remote_value="primaryTable" />
-                <value galaxy_value="auto" remote_value="selectedFields" />
-                <value galaxy_value="wig" remote_value="wigData" />
-                <value galaxy_value="interval" remote_value="tab" />
-                <value galaxy_value="html" remote_value="hyperlinks" />
-                <value galaxy_value="fasta" remote_value="sequence" />
-                <value galaxy_value="gtf" remote_value="gff" />
-            </value_translation>
-        </request_param>
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/>
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/ucsc_tablebrowser_archaea.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source">
-    <description>table browser</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
-        <display>go to UCSC Table Browser $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
-        <param name="tool_id" type="hidden" value="ucsc_table_direct_archaea1" />
-        <param name="sendToGalaxy" type="hidden" value="1" />
-        <param name="hgta_compressType" type="hidden" value="none" />
-        <param name="hgta_outputType" type="hidden" value="bed" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
-        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
-        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
-        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
-        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" >
-            <value_translation>
-                <value galaxy_value="auto" remote_value="primaryTable" />
-                <value galaxy_value="auto" remote_value="selectedFields" />
-                <value galaxy_value="wig" remote_value="wigData" />
-                <value galaxy_value="interval" remote_value="tab" />
-                <value galaxy_value="html" remote_value="hyperlinks" />
-                <value galaxy_value="fasta" remote_value="sequence" />
-                <value galaxy_value="gtf" remote_value="gff" />
-            </value_translation>
-        </request_param>
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/>
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/ucsc_tablebrowser_test.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<?xml version="1.0"?>
-<!--
-    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
-    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
-    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
--->
-<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source">
-    <description>table browser</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
-        <display>go to UCSC Table Browser $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
-        <param name="tool_id" type="hidden" value="ucsc_table_direct_test1" />
-        <param name="sendToGalaxy" type="hidden" value="1" />
-        <param name="hgta_compressType" type="hidden" value="none" />
-        <param name="hgta_outputType" type="hidden" value="bed" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="URL" remote_name="URL" missing="" />
-        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
-        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
-        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
-        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
-        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" >
-            <value_translation>
-                <value galaxy_value="auto" remote_value="primaryTable" />
-                <value galaxy_value="auto" remote_value="selectedFields" />
-                <value galaxy_value="wig" remote_value="wigData" />
-                <value galaxy_value="interval" remote_value="tab" />
-                <value galaxy_value="html" remote_value="hyperlinks" />
-                <value galaxy_value="fasta" remote_value="sequence" />
-                <value galaxy_value="gtf" remote_value="gff" />
-            </value_translation>
-        </request_param>
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/>
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/ucsc_testproxy.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-<tool name="UCSC Test" id="ucsc_testproxy">
-
-	<description>table browser proxy</description>
-
-	<command interpreter="python">
-		ucsc_proxy.py $param_file $output
-	</command>
-
-	<inputs action="/ucsc_proxy/index" check_values="false">
-		<display>go to UCSC genome-test $init $hgta_outputType</display>
-		<param type="hidden" name="init" value="2"/>
-		<param type="hidden" name="hgta_outputType" value="bed"/>
-	</inputs>
-
-	<code file="ucsc_filter.py"/>
-
-	<outputs>
-		<data name="output" format="bed" />
-	</outputs>
-
-</tool>
-
--- a/tools/data_source/upload.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,394 +0,0 @@
-#!/usr/bin/env python
-#Processes uploads from the user.
-
-# WARNING: Changes in this tool (particularly as related to parsing) may need
-# to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools
-
-import urllib, sys, os, gzip, tempfile, shutil, re, gzip, zipfile, codecs, binascii
-from galaxy import eggs
-# need to import model before sniff to resolve a circular import dependency
-import galaxy.model
-from galaxy.datatypes.checkers import *
-from galaxy.datatypes import sniff
-from galaxy.datatypes.binary import *
-from galaxy.datatypes.images import Pdf
-from galaxy.datatypes.registry import Registry
-from galaxy import util
-from galaxy.datatypes.util.image_util import *
-from galaxy.util.json import *
-
-try:
-    import Image as PIL
-except ImportError:
-    try:
-        from PIL import Image as PIL
-    except:
-        PIL = None
-
-try:
-    import bz2
-except:
-    bz2 = None
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg, ret=1 ):
-    sys.stderr.write( msg )
-    sys.exit( ret )
-def file_err( msg, dataset, json_file ):
-    json_file.write( to_json_string( dict( type = 'dataset',
-                                           ext = 'data',
-                                           dataset_id = dataset.dataset_id,
-                                           stderr = msg ) ) + "\n" )
-    # never remove a server-side upload
-    if dataset.type in ( 'server_dir', 'path_paste' ):
-        return
-    try:
-        os.remove( dataset.path )
-    except:
-        pass
-def safe_dict(d):
-    """
-    Recursively clone json structure with UTF-8 dictionary keys
-    http://mellowmachines.com/blog/2009/06/exploding-dictionary-with-unicode-keys-as-python-arguments/
-    """
-    if isinstance(d, dict):
-        return dict([(k.encode('utf-8'), safe_dict(v)) for k,v in d.iteritems()])
-    elif isinstance(d, list):
-        return [safe_dict(x) for x in d]
-    else:
-        return d
-def check_bam( file_path ):
-    return Bam().sniff( file_path )
-def check_sff( file_path ):
-    return Sff().sniff( file_path )
-def check_pdf( file_path ):
-    return Pdf().sniff( file_path )
-def check_bigwig( file_path ):
-    return BigWig().sniff( file_path )
-def check_bigbed( file_path ):
-    return BigBed().sniff( file_path )
-def parse_outputs( args ):
-    rval = {}
-    for arg in args:
-        id, files_path, path = arg.split( ':', 2 )
-        rval[int( id )] = ( path, files_path )
-    return rval
-def add_file( dataset, registry, json_file, output_path ):
-    data_type = None
-    line_count = None
-    converted_path = None
-    stdout = None
-    link_data_only = dataset.get( 'link_data_only', 'copy_files' )
-
-    try:
-        ext = dataset.file_type
-    except AttributeError:
-        file_err( 'Unable to process uploaded file, missing file_type parameter.', dataset, json_file )
-        return
-
-    if dataset.type == 'url':
-        try:
-            temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dataset.path ), prefix='url_paste' )
-        except Exception, e:
-            file_err( 'Unable to fetch %s\n%s' % ( dataset.path, str( e ) ), dataset, json_file )
-            return
-        dataset.path = temp_name
-    # See if we have an empty file
-    if not os.path.exists( dataset.path ):
-        file_err( 'Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file )
-        return
-    if not os.path.getsize( dataset.path ) > 0:
-        file_err( 'The uploaded file is empty', dataset, json_file )
-        return
-    if not dataset.type == 'url':
-        # Already set is_multi_byte above if type == 'url'
-        try:
-            dataset.is_multi_byte = util.is_multi_byte( codecs.open( dataset.path, 'r', 'utf-8' ).read( 100 ) )
-        except UnicodeDecodeError, e:
-            dataset.is_multi_byte = False
-    # Is dataset an image?
-    image = check_image( dataset.path )
-    if image:
-        if not PIL:
-            image = None
-        # get_image_ext() returns None if nor a supported Image type
-        ext = get_image_ext( dataset.path, image )
-        data_type = ext
-    # Is dataset content multi-byte?
-    elif dataset.is_multi_byte:
-        data_type = 'multi-byte char'
-        ext = sniff.guess_ext( dataset.path, is_multi_byte=True )
-    # Is dataset content supported sniffable binary?
-    elif check_bam( dataset.path ):
-        ext = 'bam'
-        data_type = 'bam'
-    elif check_sff( dataset.path ):
-        ext = 'sff'
-        data_type = 'sff'
-    elif check_pdf( dataset.path ):
-        ext = 'pdf'
-        data_type = 'pdf'
-    elif check_bigwig( dataset.path ):
-        ext = 'bigwig'
-        data_type = 'bigwig'
-    elif check_bigbed( dataset.path ):
-        ext = 'bigbed'
-        data_type = 'bigbed'
-    if not data_type:
-        # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
-        is_gzipped, is_valid = check_gzip( dataset.path )
-        if is_gzipped and not is_valid:
-            file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file )
-            return
-        elif is_gzipped and is_valid:
-            if link_data_only == 'copy_files':
-                # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
-                CHUNK_SIZE = 2**20 # 1Mb
-                fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False )
-                gzipped_file = gzip.GzipFile( dataset.path, 'rb' )
-                while 1:
-                    try:
-                        chunk = gzipped_file.read( CHUNK_SIZE )
-                    except IOError:
-                        os.close( fd )
-                        os.remove( uncompressed )
-                        file_err( 'Problem decompressing gzipped data', dataset, json_file )
-                        return
-                    if not chunk:
-                        break
-                    os.write( fd, chunk )
-                os.close( fd )
-                gzipped_file.close()
-                # Replace the gzipped file with the decompressed file if it's safe to do so
-                if dataset.type in ( 'server_dir', 'path_paste' ):
-                    dataset.path = uncompressed
-                else:
-                    shutil.move( uncompressed, dataset.path )
-            dataset.name = dataset.name.rstrip( '.gz' )
-            data_type = 'gzip'
-        if not data_type and bz2 is not None:
-            # See if we have a bz2 file, much like gzip
-            is_bzipped, is_valid = check_bz2( dataset.path )
-            if is_bzipped and not is_valid:
-                file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file )
-                return
-            elif is_bzipped and is_valid:
-                if link_data_only == 'copy_files':
-                    # We need to uncompress the temp_name file
-                    CHUNK_SIZE = 2**20 # 1Mb
-                    fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False )
-                    bzipped_file = bz2.BZ2File( dataset.path, 'rb' )
-                    while 1:
-                        try:
-                            chunk = bzipped_file.read( CHUNK_SIZE )
-                        except IOError:
-                            os.close( fd )
-                            os.remove( uncompressed )
-                            file_err( 'Problem decompressing bz2 compressed data', dataset, json_file )
-                            return
-                        if not chunk:
-                            break
-                        os.write( fd, chunk )
-                    os.close( fd )
-                    bzipped_file.close()
-                    # Replace the bzipped file with the decompressed file if it's safe to do so
-                    if dataset.type in ( 'server_dir', 'path_paste' ):
-                        dataset.path = uncompressed
-                    else:
-                        shutil.move( uncompressed, dataset.path )
-                dataset.name = dataset.name.rstrip( '.bz2' )
-                data_type = 'bz2'
-        if not data_type:
-            # See if we have a zip archive
-            is_zipped = check_zip( dataset.path )
-            if is_zipped:
-                if link_data_only == 'copy_files':
-                    CHUNK_SIZE = 2**20 # 1Mb
-                    uncompressed = None
-                    uncompressed_name = None
-                    unzipped = False
-                    z = zipfile.ZipFile( dataset.path )
-                    for name in z.namelist():
-                        if name.endswith('/'):
-                            continue
-                        if unzipped:
-                            stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
-                            break
-                        fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False )
-                        if sys.version_info[:2] >= ( 2, 6 ):
-                            zipped_file = z.open( name )
-                            while 1:
-                                try:
-                                    chunk = zipped_file.read( CHUNK_SIZE )
-                                except IOError:
-                                    os.close( fd )
-                                    os.remove( uncompressed )
-                                    file_err( 'Problem decompressing zipped data', dataset, json_file )
-                                    return
-                                if not chunk:
-                                    break
-                                os.write( fd, chunk )
-                            os.close( fd )
-                            zipped_file.close()
-                            uncompressed_name = name
-                            unzipped = True
-                        else:
-                            # python < 2.5 doesn't have a way to read members in chunks(!)
-                            try:
-                                outfile = open( uncompressed, 'wb' )
-                                outfile.write( z.read( name ) )
-                                outfile.close()
-                                uncompressed_name = name
-                                unzipped = True
-                            except IOError:
-                                os.close( fd )
-                                os.remove( uncompressed )
-                                file_err( 'Problem decompressing zipped data', dataset, json_file )
-                                return
-                    z.close()
-                    # Replace the zipped file with the decompressed file if it's safe to do so
-                    if uncompressed is not None:
-                        if dataset.type in ( 'server_dir', 'path_paste' ):
-                            dataset.path = uncompressed
-                        else:
-                            shutil.move( uncompressed, dataset.path )
-                        dataset.name = uncompressed_name
-                data_type = 'zip'
-        if not data_type:
-            if check_binary( dataset.path ):
-                # We have a binary dataset, but it is not Bam, Sff or Pdf
-                data_type = 'binary'
-                #binary_ok = False
-                parts = dataset.name.split( "." )
-                if len( parts ) > 1:
-                    ext = parts[1].strip().lower()
-                    if ext not in unsniffable_binary_formats:
-                        file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file )
-                        return
-                    elif ext in unsniffable_binary_formats and dataset.file_type != ext:
-                        err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext )
-                        file_err( err_msg, dataset, json_file )
-                        return
-        if not data_type:
-            # We must have a text file
-            if check_html( dataset.path ):
-                file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file )
-                return
-        if data_type != 'binary':
-            if link_data_only == 'copy_files':
-                in_place = True
-                if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]:
-                    in_place = False
-                if dataset.space_to_tab:
-                    line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place )
-                else:
-                    line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place )
-            if dataset.file_type == 'auto':
-                ext = sniff.guess_ext( dataset.path, registry.sniff_order )
-            else:
-                ext = dataset.file_type
-            data_type = ext
-    # Save job info for the framework
-    if ext == 'auto' and dataset.ext:
-        ext = dataset.ext
-    if ext == 'auto':
-        ext = 'data'
-    datatype = registry.get_datatype_by_extension( ext )
-    if dataset.type in ( 'server_dir', 'path_paste' ) and link_data_only == 'link_to_files':
-        # Never alter a file that will not be copied to Galaxy's local file store.
-        if datatype.dataset_content_needs_grooming( dataset.path ):
-            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
-                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
-            file_err( err_msg, dataset, json_file )
-            return
-    if link_data_only == 'copy_files' and dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]:
-        # Move the dataset to its "real" path
-        if converted_path is not None:
-            shutil.copy( converted_path, output_path )
-            try:
-                os.remove( converted_path )
-            except:
-                pass
-        else:
-            # This should not happen, but it's here just in case
-            shutil.copy( dataset.path, output_path )
-    elif link_data_only == 'copy_files':
-        shutil.move( dataset.path, output_path )
-    # Write the job info
-    stdout = stdout or 'uploaded %s file' % data_type
-    info = dict( type = 'dataset',
-                 dataset_id = dataset.dataset_id,
-                 ext = ext,
-                 stdout = stdout,
-                 name = dataset.name,
-                 line_count = line_count )
-    json_file.write( to_json_string( info ) + "\n" )
-    if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path ):
-        # Groom the dataset content if necessary
-        datatype.groom_dataset_content( output_path )
-def add_composite_file( dataset, registry, json_file, output_path, files_path ):
-        if dataset.composite_files:
-            os.mkdir( files_path )
-            for name, value in dataset.composite_files.iteritems():
-                value = util.bunch.Bunch( **value )
-                if dataset.composite_file_paths[ value.name ] is None and not value.optional:
-                    file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file )
-                    break
-                elif dataset.composite_file_paths[value.name] is not None:
-                    dp = dataset.composite_file_paths[value.name][ 'path' ]
-                    isurl = dp.find('://') <> -1 # todo fixme
-                    if isurl:
-                       try:
-                           temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dp ), prefix='url_paste' )
-                       except Exception, e:
-                           file_err( 'Unable to fetch %s\n%s' % ( dp, str( e ) ), dataset, json_file )
-                           return
-                       dataset.path = temp_name
-                       dp = temp_name
-                    if not value.is_binary:
-                        if dataset.composite_file_paths[ value.name ].get( 'space_to_tab', value.space_to_tab ):
-                            sniff.convert_newlines_sep2tabs( dp )
-                        else:
-                            sniff.convert_newlines( dp )
-                    shutil.move( dp, os.path.join( files_path, name ) )
-        # Move the dataset to its "real" path
-        shutil.move( dataset.primary_file, output_path )
-        # Write the job info
-        info = dict( type = 'dataset',
-                     dataset_id = dataset.dataset_id,
-                     stdout = 'uploaded %s file' % dataset.file_type )
-        json_file.write( to_json_string( info ) + "\n" )
-
-def __main__():
-
-    if len( sys.argv ) < 4:
-        print >>sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...'
-        sys.exit( 1 )
-
-    output_paths = parse_outputs( sys.argv[4:] )
-    json_file = open( 'galaxy.json', 'w' )
-
-    registry = Registry( sys.argv[1], sys.argv[2] )
-
-    for line in open( sys.argv[3], 'r' ):
-        dataset = from_json_string( line )
-        dataset = util.bunch.Bunch( **safe_dict( dataset ) )
-        try:
-            output_path = output_paths[int( dataset.dataset_id )][0]
-        except:
-            print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id
-            sys.exit( 1 )
-        if dataset.type == 'composite':
-            files_path = output_paths[int( dataset.dataset_id )][1]
-            add_composite_file( dataset, registry, json_file, output_path, files_path )
-        else:
-            add_file( dataset, registry, json_file, output_path )
-    # clean up paramfile
-    try:
-        os.remove( sys.argv[3] )
-    except:
-        pass
-
-if __name__ == '__main__':
-    __main__()
--- a/tools/data_source/upload.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,213 +0,0 @@
-<?xml version="1.0"?>
-
-<tool name="Upload File" id="upload1" version="1.1.3">
-  <description>
-    from your computer
-  </description>
-  <action module="galaxy.tools.actions.upload" class="UploadToolAction"/>
-  <command interpreter="python">
-      upload.py $GALAXY_ROOT_DIR $GALAXY_DATATYPES_CONF_FILE $paramfile
-    #set $outnum = 0
-    #while $varExists('output%i' % $outnum):
-        #set $output = $getVar('output%i' % $outnum)
-        #set $outnum += 1
-        #set $file_name = $output.file_name
-        ## FIXME: This is not future-proof for other uses of external_filename (other than for use by the library upload's "link data" feature)
-        #if $output.dataset.dataset.external_filename:
-            #set $file_name = "None"
-        #end if
-        ${output.dataset.dataset.id}:${output.files_path}:${file_name}
-    #end while
-  </command>
-  <inputs nginx_upload="true">
-    <param name="file_type" type="select" label="File Format" help="Which format? See help below">
-      <options from_parameter="tool.app.datatypes_registry.upload_file_formats" transform_lines="[ &quot;%s%s%s&quot; % ( line, self.separator, line ) for line in obj ]">
-        <column name="value" index="1"/>
-        <column name="name" index="0"/>
-        <filter type="sort_by" column="0"/>
-        <filter type="add_value" name="Auto-detect" value="auto" index="0"/>
-      </options>
-    </param>
-    <param name="async_datasets" type="hidden" value="None"/>
-    <upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type" metadata_ref="files_metadata">
-        <param name="file_data" type="file" size="30" label="File" ajax-upload="true" help="TIP: Due to browser limitations, uploading files larger than 2GB is guaranteed to fail.  To upload large files, use the URL method (below) or FTP (if enabled by the site administrator).">
-        <validator type="expression" message="You will need to reselect the file you specified (%s)." substitute_value_in_message="True">not ( ( isinstance( value, unicode ) or isinstance( value, str ) ) and value != "" )</validator> <!-- use validator to post message to user about needing to reselect the file, since most browsers won't accept the value attribute for file inputs -->
-      </param>
-      <param name="url_paste" type="text" area="true" size="5x35" label="URL/Text" help="Here you may specify a list of URLs (one per line) or paste the contents of a file."/>
-      <param name="ftp_files" type="ftpfile" label="Files uploaded via FTP"/>
-      <param name="space_to_tab" type="select" display="checkboxes" multiple="True" label="Convert spaces to tabs" help="Use this option if you are entering intervals by hand.">
-        <option value="Yes">Yes</option>
-      </param>
-    </upload_dataset>
-    <param name="dbkey" type="genomebuild" label="Genome" />
-    <conditional name="files_metadata" title="Specify metadata" value_from="self:app.datatypes_registry.get_upload_metadata_params" value_ref="file_type" value_ref_in_group="False" />
-    <!-- <param name="other_dbkey" type="text" label="Or user-defined Genome" /> -->
-  </inputs>
-  <help>
-
-**Auto-detect**
-
-The system will attempt to detect Axt, Fasta, Fastqsolexa, Gff, Gff3, Html, Lav, Maf, Tabular, Wiggle, Bed and Interval (Bed with headers) formats. If your file is not detected properly as one of the known formats, it most likely means that it has some format problems (e.g., different number of columns on different rows). You can still coerce the system to set your data to the format you think it should be.  You can also upload compressed files, which will automatically be decompressed.
-
------
-
-**Ab1**
-
-A binary sequence file in 'ab1' format with a '.ab1' file extension.  You must manually select this 'File Format' when uploading the file.
-
------
-
-**Axt**
-
-blastz pairwise alignment format.  Each alignment block in an axt file contains three lines: a summary line and 2 sequence lines.  Blocks are separated from one another by blank lines.  The summary line contains chromosomal position and size information about the alignment. It consists of 9 required fields.
-
------
-
-**Bam**
-
-A binary file compressed in the BGZF format with a '.bam' file extension.
-
------
-
-**Bed**
-
-* Tab delimited format (tabular)
-* Does not require header line
-* Contains 3 required fields:
-
-  - chrom - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1).
-  - chromStart - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0.
-  - chromEnd - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
-
-* May contain 9 additional optional BED fields:
-
-  - name - Defines the name of the BED line. This label is displayed to the left of the BED line in the Genome Browser window when the track is open to full display mode or directly to the left of the item in pack mode.
-  - score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray).
-  - strand - Defines the strand - either '+' or '-'.
-  - thickStart - The starting position at which the feature is drawn thickly (for example, the start codon in gene displays).
-  - thickEnd - The ending position at which the feature is drawn thickly (for example, the stop codon in gene displays).
-  - itemRgb - An RGB value of the form R,G,B (e.g. 255,0,0). If the track line itemRgb attribute is set to "On", this RBG value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser.
-  - blockCount - The number of blocks (exons) in the BED line.
-  - blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
-  - blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
-
-* Example::
-
-    chr22 1000 5000 cloneA 960 + 1000 5000 0 2 567,488, 0,3512
-    chr22 2000 6000 cloneB 900 - 2000 6000 0 2 433,399, 0,3601
-
------
-
-**Fasta**
-
-A sequence in FASTA format consists of a single-line description, followed by lines of sequence data.  The first character of the description line is a greater-than (">") symbol in the first column.  All lines should be shorter than 80 characters::
-
-    >sequence1
-    atgcgtttgcgtgc
-    gtcggtttcgttgc
-    >sequence2
-    tttcgtgcgtatag
-    tggcgcggtga
-
------
-
-**FastqSolexa**
-
-FastqSolexa is the Illumina (Solexa) variant of the Fastq format, which stores sequences and quality scores in a single file::
-
-    @seq1
-    GACAGCTTGGTTTTTAGTGAGTTGTTCCTTTCTTT
-    +seq1
-    hhhhhhhhhhhhhhhhhhhhhhhhhhPW@hhhhhh
-    @seq2
-    GCAATGACGGCAGCAATAAACTCAACAGGTGCTGG
-    +seq2
-    hhhhhhhhhhhhhhYhhahhhhWhAhFhSIJGChO
-
-Or::
-
-    @seq1
-    GAATTGATCAGGACATAGGACAACTGTAGGCACCAT
-    +seq1
-    40 40 40 40 35 40 40 40 25 40 40 26 40 9 33 11 40 35 17 40 40 33 40 7 9 15 3 22 15 30 11 17 9 4 9 4
-    @seq2
-    GAGTTCTCGTCGCCTGTAGGCACCATCAATCGTATG
-    +seq2
-    40 15 40 17 6 36 40 40 40 25 40 9 35 33 40 14 14 18 15 17 19 28 31 4 24 18 27 14 15 18 2 8 12 8 11 9
-
------
-
-**Gff**
-
-GFF lines have nine required fields that must be tab-separated.
-
------
-
-**Gff3**
-
-The GFF3 format addresses the most common extensions to GFF, while preserving backward compatibility with previous formats.
-
------
-
-**Interval (Genomic Intervals)**
-
-- Tab delimited format (tabular)
-- File must start with definition line in the following format (columns may be in any order).::
-
-    #CHROM START END STRAND
-
-- CHROM - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1).
-- START - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0.
-- END - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
-- STRAND - Defines the strand - either '+' or '-'.
-
-- Example::
-
-    #CHROM START END   STRAND NAME COMMENT
-    chr1   10    100   +      exon myExon
-    chrX   1000  10050 -      gene myGene
-
------
-
-**Lav**
-
-Lav is the primary output format for BLASTZ.  The first line of a .lav file begins with #:lav..
-
------
-
-**MAF**
-
-TBA and multiz multiple alignment format.  The first line of a .maf file begins with ##maf. This word is followed by white-space-separated "variable=value" pairs. There should be no white space surrounding the "=".
-
------
-
-**Scf**
-
-A binary sequence file in 'scf' format with a '.scf' file extension.  You must manually select this 'File Format' when uploading the file.
-
------
-
-**Sff**
-
-A binary file in 'Standard Flowgram Format' with a '.sff' file extension.
-
------
-
-**Tabular (tab delimited)**
-
-Any data in tab delimited format (tabular)
-
------
-
-**Wig**
-
-The wiggle format is line-oriented.  Wiggle data is preceded by a track definition line, which adds a number of options for controlling the default display of this track.
-
------
-
-**Other text type**
-
-Any text file
-
-  </help>
-</tool>
--- a/tools/data_source/worm_modencode.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<?xml version="1.0"?>
-<tool name="modENCODE worm" id="modENCODEworm" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://modencode.oicr.on.ca/fgb2/gbrowse/worm" check_values="false" target="_top">
-        <display>go to modENCODE worm server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=modENCODEworm" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="ce6" >
-            <value_translation>
-                <value galaxy_value="ce6" remote_value="worm" />
-            </value_translation>
-        </request_param>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="d" missing="" />
-                <value name="dbkey" missing="ce6" />
-                <value name="q" missing="" />
-                <value name="s" missing="" />
-                <value name="t" missing="" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/wormbase.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Wormbase" id="wormbase" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://www.wormbase.org/db/seq/gbgff/c_elegans/" check_values="false" target="_top">
-        <display>go to Wormbase server $GALAXY_URL</display>
-        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase" />
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="d" missing="" />
-                <value name="dbkey" missing="" />
-                <value name="q" missing="" />
-                <value name="s" missing="" />
-                <value name="t" missing="" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/wormbase_test.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Wormbase" id="wormbase_test" tool_type="data_source">
-	<description>test server</description>
-	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-	<inputs action="http://dev.wormbase.org/db/seq/gbrowse/c_elegans/" check_values="false" target="_top">
-		<display>go to Wormbase test server $GALAXY_URL</display>
-		<param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase_test" />
-	</inputs>
-    <request_param_translation>
-        <request_param galaxy_name="URL" remote_name="URL" missing="">
-            <append_param separator="&amp;" first_separator="?" join="=">
-                <value name="d" missing="" />
-                <value name="dbkey" missing="" />
-                <value name="q" missing="" />
-                <value name="s" missing="" />
-                <value name="t" missing="" />
-            </append_param>
-        </request_param>
-        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
-	</outputs>
-	<options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/data_source/yeastmine.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-<?xml version="1.0"?>
-<tool name="YeastMine" id="yeastmine" tool_type="data_source">
-    <description>server</description>
-    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
-    <inputs action="http://yeastmine.yeastgenome.org/yeastmine/begin.do" check_values="false" method="get">
-        <display>go to yeastMine server $GALAXY_URL</display>
-    </inputs>
-    <request_param_translation>
-        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" >
-            <value_translation>
-                <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect -->
-            </value_translation>
-        </request_param>
-    </request_param_translation>
-    <uihints minwidth="800"/>
-    <outputs>
-        <data name="output" format="txt" />
-    </outputs>
-    <options sanitize="False" refresh="True"/>
-</tool>
--- a/tools/discreteWavelet/execute_dwt_IvC_all.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-#!/usr/bin/perl -w
-use warnings;
-use IO::Handle;
-
-$usage = "execute_dwt_IvC_all.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out]  \n";
-die $usage unless @ARGV == 4;
-
-#get the input arguments
-my $firstInputFile = $ARGV[0];
-my $secondInputFile = $ARGV[1];
-my $firstOutputFile = $ARGV[2];
-my $secondOutputFile = $ARGV[3];
-
-open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n");
-open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n");
-open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
-open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
-open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
-
-#save all error messages into the error file $errorFile using the error file handle ERROR
-STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
-
-
-print "There are two input data files: \n";
-print "The input data file is: $firstInputFile \n";
-print "The control data file is: $secondInputFile \n";
-
-# IvC test
-$test = "IvC";
-
-# construct an R script to implement the IvC test
-print "\n";
-
-$r_script = "get_dwt_IvC_test.r";
-print "$r_script \n";
-
-# R script
-open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
-print Rcmd "
-        ###########################################################################################
-        # code to do wavelet Indel vs. Control
-        # signal is the difference I-C; function is second moment i.e. variance from zero not mean
-        # to perform wavelet transf. of signal, scale-by-scale analysis of the function
-        # create null bands by permuting the original data series
-        # generate plots and table matrix of correlation coefficients including p-values
-        ############################################################################################
-        library(\"Rwave\");
-        library(\"wavethresh\");
-        library(\"waveslim\");
-
-        options(echo = FALSE)
-
-        # normalize data
-        norm <- function(data){
-            v <- (data - mean(data))/sd(data);
-            if(sum(is.na(v)) >= 1){
-                v <- data;
-            }
-            return(v);
-        }
-
-        dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", wf = \"haar\", boundary = \"reflection\") {
-            print(test);
-            print(pdf);
-            print(table);
-
-            pdf(file = pdf);
-            final_pvalue = NULL;
-            title = NULL;
-
-            short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels;
-            title <- c(\"motif\");
-            for (i in 1:short.levels){
-            	title <- c(title, paste(i, \"moment2\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"), paste(i, \"test\", sep = \"_\"));
-            }
-            print(title);
-
-            # loop to compare a vs a
-            for(i in 1:length(names.short)){
-        		wave1.dwt = NULL;
-        		m2.dwt = diff = var.dwt = NULL;
-        		out = NULL;
-                out <- vector(length = length(title));
-
-        		print(names.short[i]);
-        		print(names.long[i]);
-
-        		# need exit if not comparing motif(a) vs motif(a)
-        		if (names.short[i] != names.long[i]){
-                	stop(paste(\"motif\", names.short[i], \"is not the same as\", names.long[i], sep = \" \"));
-        		}
-        		else {
-                	# signal is the difference I-C data sets
-                    diff<-data.short[,i]-data.long[,i];
-
-                    # normalize the signal
-                    diff<-norm(diff);
-
-                    # function is 2nd moment
-                    # 2nd moment m_j = 1/N[sum_N(W_j + V_J)^2] = 1/N sum_N(W_j)^2 + (X_bar)^2
-            		wave1.dwt <- dwt(diff, wf = wf, short.levels, boundary = boundary);
-            		var.dwt <- wave.variance(wave1.dwt);
-                	m2.dwt <- vector(length = short.levels)
-                    for(level in 1:short.levels){
-                    	m2.dwt[level] <- var.dwt[level, 1] + (mean(diff)^2);
-                    }
-
-            		# CI bands by permutation of time series
-            		feature1 = feature2 = NULL;
-            		feature1 = data.short[, i];
-            		feature2 = data.long[, i];
-            		null = results = med = NULL;
-            		m2_25 = m2_975 = NULL;
-
-            		for (k in 1:1000) {
-                		nk_1 = nk_2 = NULL;
-                		m2_null = var_null = NULL;
-                		null.levels = null_wave1 = null_diff = NULL;
-                		nk_1 <- sample(feature1, length(feature1), replace = FALSE);
-                		nk_2 <- sample(feature2, length(feature2), replace = FALSE);
-                		null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
-                		null_diff <- nk_1-nk_2;
-                		null_diff <- norm(null_diff);
-                		null_wave1 <- dwt(null_diff, wf = wf, short.levels, boundary = boundary);
-                        var_null <- wave.variance(null_wave1);
-                		m2_null <- vector(length = null.levels);
-                		for(level in 1:null.levels){
-                        	m2_null[level] <- var_null[level, 1] + (mean(null_diff)^2);
-                		}
-                		null= rbind(null, m2_null);
-            		}
-
-            		null <- apply(null, 2, sort, na.last = TRUE);
-            		m2_25 <- null[25,];
-            		m2_975 <- null[975,];
-            		med <- apply(null, 2, median, na.rm = TRUE);
-
-            		# plot
-            		results <- cbind(m2.dwt, m2_25, m2_975);
-            		matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), xlab = \"Wavelet Scale\", ylab = c(\"Wavelet 2nd Moment\", test), main = (names.short[i]), cex.main = 0.75);
-            		abline(h = 1);
-
-            		# get pvalues by comparison to null distribution
-            		out <- c(names.short[i]);
-            		for (m in 1:length(m2.dwt)){
-                    	print(paste(\"scale\", m, sep = \" \"));
-                        print(paste(\"m2\", m2.dwt[m], sep = \" \"));
-                        print(paste(\"median\", med[m], sep = \" \"));
-                        out <- c(out, format(m2.dwt[m], digits = 4));
-                        pv = NULL;
-                        if(is.na(m2.dwt[m])){
-                        	pv <- \"NA\";
-                        }
-                        else {
-                        	if (m2.dwt[m] >= med[m]){
-                            	# R tail test
-                                tail <- \"R\";
-                                pv <- (length(which(null[, m] >= m2.dwt[m])))/(length(na.exclude(null[, m])));
-                            }
-                            else{
-                                if (m2.dwt[m] < med[m]){
-                                	# L tail test
-                                    tail <- \"L\";
-                                    pv <- (length(which(null[, m] <= m2.dwt[m])))/(length(na.exclude(null[, m])));
-                                }
-                            }
-                        }
-                        out <- c(out, pv);
-                        print(pv);
-                        out <- c(out, tail);
-                    }
-                    final_pvalue <-rbind(final_pvalue, out);
-                    print(out);
-                }
-            }
-
-            colnames(final_pvalue) <- title;
-            write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE);
-            dev.off();
-        }\n";
-
-print Rcmd "
-        # execute
-        # read in data
-
-        inputData <- read.delim(\"$firstInputFile\");
-        inputDataNames <- colnames(inputData);
-
-        controlData <- read.delim(\"$secondInputFile\");
-        controlDataNames <- colnames(controlData);
-
-        # call the test function to implement IvC test
-        dwt_cor(inputData, inputDataNames, controlData, controlDataNames, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\");
-        print (\"done with the correlation test\");
-\n";
-
-print Rcmd "#eof\n";
-
-close Rcmd;
-
-system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n");
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n");
-system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n");
-
-#close the input and output and error files
-close(ERROR);
-close(OUTPUT2);
-close(OUTPUT1);
-close(INPUT2);
-close(INPUT1);
\ No newline at end of file
--- a/tools/discreteWavelet/execute_dwt_IvC_all.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-<tool id="compute_p-values_second_moments_feature_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Second Moments for Feature Occurrences" version="1.0.0">
-  <description>between two datasets using Discrete Wavelet Transfoms</description>
-
-  <command interpreter="perl">
-  	execute_dwt_IvC_all.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the first input file"/>
-  	<param format="tabular" name="inputFile2" type="data" label="Select the second input file"/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-    <data format="pdf" name="outputFile2"/>
-  </outputs>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program generates plots and computes table matrix of second moments, p-values, and test orientations at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique.
-
-The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and  k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales.
-
-The program has two input files obtained as follows:
-
-For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program.
-
-The program gives two output files:
-
-- The first output file is a TABULAR format file representing the second moments, p-values, and test orientations for each feature at each scale.
-- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the second moment for that feature at every scale.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file.
-
------
-
-**Example**
-
-Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file::
-
-	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
-		226			403			416			221				1165
-		236			444			380			241				1223
-		242			496			391			195				1116
-		243			429			364			191				1118
-		244			410			371			236				1063
-		230			386			370			217				1087
-		275			404			402			214				1044
-		265			443			365			231				1086
-		255			390			354			246				1114
-		281			384			406			232				1102
-		263			459			369			251				1135
-		280			433			400			251				1159
-		278			385			382			231				1147
-		248			393			389			211				1162
-		251			403			385			246				1114
-		239			383			347			227				1172
-
-And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file::
-
-	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
-		235			374			407			257				1159
-		244			356			353			212				1128
-		233			343			322			204				1110
-		222			329			398			253				1054
-		216			325			328			253				1129
-		257			368			352			221				1115
-		238			360			346			224				1102
-		225			350			377			248				1107
-		230			330			365			236				1132
-		241			389			357			220				1120
-		274			354			392			235				1120
-		250			379			354			210				1102
-		254			329			320			251				1080
-		221			355			406			279				1127
-		224			330			390			249				1129
-		246			366			364			218				1176
-
-
-We notice that the number of scales here is 4 because 16 = 2^4. Runnig the program on the above input files gives the following output:
-
-The first output file::
-
-	motif				1_moment2	1_pval	1_test	2_moment2	2_pval	2_test	3_moment2	3_pval	3_test	4_moment2	4_pval	4_test
-
-	deletionHoptspot		0.8751		0.376	L	1.549		0.168	R	0.6152		0.434	L	0.5735		0.488	R
-	insertionHoptspot		0.902		0.396	L	1.172		0.332	R	0.6843		0.456	L	1.728		0.213	R
-	dnaPolPauseFrameshift		1.65		0.013	R	0.267		0.055	L	0.1387		0.124	L	0.4516		0.498	L
-	topoisomeraseCleavageSite	0.7443		0.233	L	1.023		0.432	R	1.933		0.155	R	1.09		0.3	R
-	translinTarget			0.5084		0.057	L	0.8219		0.446	L	3.604		0.019	R	0.4377		0.492	L
-
-The second output file:
-
-.. image:: ./static/operation_icons/dwt_IvC_1.png
-.. image:: ./static/operation_icons/dwt_IvC_2.png
-.. image:: ./static/operation_icons/dwt_IvC_3.png
-.. image:: ./static/operation_icons/dwt_IvC_4.png
-.. image:: ./static/operation_icons/dwt_IvC_5.png
-
-  </help>
-
-</tool>
--- a/tools/discreteWavelet/execute_dwt_cor_aVa_perClass.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,221 +0,0 @@
-#!/usr/bin/perl -w
-
-use warnings;
-use IO::Handle;
-
-$usage = "execute_dwt_cor_aVa_perClass.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out]  \n";
-die $usage unless @ARGV == 4;
-
-#get the input arguments
-my $firstInputFile = $ARGV[0];
-my $secondInputFile = $ARGV[1];
-my $firstOutputFile = $ARGV[2];
-my $secondOutputFile = $ARGV[3];
-
-open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n");
-open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n");
-open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
-open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
-open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
-
-#save all error messages into the error file $errorFile using the error file handle ERROR
-STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
-
-print "There are two input data files: \n";
-print "The input data file is: $firstInputFile \n";
-print "The control data file is: $secondInputFile \n";
-
-# IvC test
-$test = "cor_aVa";
-
-# construct an R script to implement the IvC test
-print "\n";
-
-$r_script = "get_dwt_cor_aVa_test.r";
-print "$r_script \n";
-
-open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
-print Rcmd "
-	##################################################################################
-	# code to do all correlation tests of form: motif(a) vs. motif(a)
-	# add code to create null bands by permuting the original data series
-	# generate plots and table matrix of correlation coefficients including p-values
-	##################################################################################
-	library(\"Rwave\");
-	library(\"wavethresh\");
-	library(\"waveslim\");
-
-	options(echo = FALSE)
-
-	# normalize data
-	norm <- function(data){
-        v <- (data - mean(data))/sd(data);
-        if(sum(is.na(v)) >= 1){
-        	v <- data;
-        }
-        return(v);
-	}
-
-	dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
-		print(test);
-	    print(pdf);
-		print(table);
-
-	    pdf(file = pdf);
-	    final_pvalue = NULL;
-		title = NULL;
-
-	    short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels;
-		title <- c(\"motif\");
-        for (i in 1:short.levels){
-	        title <- c(title, paste(i, \"cor\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"));
-        }
-        print(title);
-
-        # normalize the raw data
-        data.short <- apply(data.short, 2, norm);
-        data.long <- apply(data.long, 2, norm);
-
-        for(i in 1:length(names.short)){
-        	# Kendall Tau
-            # DWT wavelet correlation function
-            # include significance to compare
-            wave1.dwt = wave2.dwt = NULL;
-            tau.dwt = NULL;
-            out = NULL;
-
-            print(names.short[i]);
-            print(names.long[i]);
-
-            # need exit if not comparing motif(a) vs motif(a)
-            if (names.short[i] != names.long[i]){
-            	stop(paste(\"motif\", names.short[i], \"is not the same as\", names.long[i], sep = \" \"));
-            }
-            else {
-            	wave1.dwt <- dwt(data.short[, i], wf = wf, short.levels, boundary = boundary);
-                wave2.dwt <- dwt(data.long[, i], wf = wf, short.levels, boundary = boundary);
-                tau.dwt <- vector(length=short.levels)
-
-				#perform cor test on wavelet coefficients per scale
-				for(level in 1:short.levels){
-                	w1_level = w2_level = NULL;
-                    w1_level <- (wave1.dwt[[level]]);
-                    w2_level <- (wave2.dwt[[level]]);
-                    tau.dwt[level] <- cor.test(w1_level, w2_level, method = method)\$estimate;
-                }
-
-                # CI bands by permutation of time series
-                feature1 = feature2 = NULL;
-                feature1 = data.short[, i];
-                feature2 = data.long[, i];
-                null = results = med = NULL;
-                cor_25 = cor_975 = NULL;
-
-                for (k in 1:1000) {
-                	nk_1 = nk_2 = NULL;
-                    null.levels = NULL;
-                    cor = NULL;
-                    null_wave1 = null_wave2 = NULL;
-
-                    nk_1 <- sample(feature1, length(feature1), replace = FALSE);
-                    nk_2 <- sample(feature2, length(feature2), replace = FALSE);
-                    null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
-                    cor <- vector(length = null.levels);
-                    null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary);
-                    null_wave2 <- dwt(nk_2, wf = wf, short.levels, boundary = boundary);
-
-                    for(level in 1:null.levels){
-                    	null_level1 = null_level2 = NULL;
-                        null_level1 <- (null_wave1[[level]]);
-                        null_level2 <- (null_wave2[[level]]);
-                        cor[level] <- cor.test(null_level1, null_level2, method = method)\$estimate;
-                    }
-                    null = rbind(null, cor);
-                }
-
-                null <- apply(null, 2, sort, na.last = TRUE);
-                print(paste(\"NAs\", length(which(is.na(null))), sep = \" \"));
-                cor_25 <- null[25,];
-                cor_975 <- null[975,];
-                med <- (apply(null, 2, median, na.rm = TRUE));
-
-				# plot
-                results <- cbind(tau.dwt, cor_25, cor_975);
-                matplot(results, type = \"b\", pch = \"*\" , lty = 1, col = c(1, 2, 2), ylim = c(-1, 1), xlab = \"Wavelet Scale\", ylab = \"Wavelet Correlation Kendall's Tau\", main = (paste(test, names.short[i], sep = \" \")), cex.main = 0.75);
-                abline(h = 0);
-
-                # get pvalues by comparison to null distribution
- 			    ### modify pval calculation for error type II of T test ####
-                out <- (names.short[i]);
-                for (m in 1:length(tau.dwt)){
-                	print(paste(\"scale\", m, sep = \" \"));
-                    print(paste(\"tau\", tau.dwt[m], sep = \" \"));
-                    print(paste(\"med\", med[m], sep = \" \"));
-					out <- c(out, format(tau.dwt[m], digits = 3));
-                    pv = NULL;
-                    if(is.na(tau.dwt[m])){
-                    	pv <- \"NA\";
-                    }
-                    else {
-                    	if (tau.dwt[m] >= med[m]){
-                        	# R tail test
-                            print(paste(\"R\"));
-                            ### per sv ok to use inequality not strict
-                            pv <- (length(which(null[, m] >= tau.dwt[m])))/(length(na.exclude(null[, m])));
-                            if (tau.dwt[m] == med[m]){
-								print(\"tau == med\");
-                                print(summary(null[, m]));
-                            }
-                    	}
-                        else if (tau.dwt[m] < med[m]){
-                        	# L tail test
-                            print(paste(\"L\"));
-                            pv <- (length(which(null[, m] <= tau.dwt[m])))/(length(na.exclude(null[, m])));
-                        }
-					}
-					out <- c(out, pv);
-                    print(paste(\"pval\", pv, sep = \" \"));
-                }
-                final_pvalue <- rbind(final_pvalue, out);
-				print(out);
-        	}
-        }
-        colnames(final_pvalue) <- title;
-        write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE)
-        dev.off();
-	}\n";
-
-print Rcmd "
-	# execute
-	# read in data
-
-	inputData1 = inputData2 = NULL;
-	inputData.short1 = inputData.short2 = NULL;
-	inputDataNames.short1 = inputDataNames.short2 = NULL;
-
-	inputData1 <- read.delim(\"$firstInputFile\");
-	inputData.short1 <- inputData1[, +c(1:ncol(inputData1))];
-	inputDataNames.short1 <- colnames(inputData.short1);
-
-	inputData2 <- read.delim(\"$secondInputFile\");
-	inputData.short2 <- inputData2[, +c(1:ncol(inputData2))];
-	inputDataNames.short2 <- colnames(inputData.short2);
-
-	# cor test for motif(a) in inputData1 vs motif(a) in inputData2
-	dwt_cor(inputData.short1, inputDataNames.short1, inputData.short2, inputDataNames.short2, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\");
-	print (\"done with the correlation test\");
-
-	#eof\n";
-close Rcmd;
-
-system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n");
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n");
-system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n");
-
-#close the input and output and error files
-close(ERROR);
-close(OUTPUT2);
-close(OUTPUT1);
-close(INPUT2);
-close(INPUT1);
-
--- a/tools/discreteWavelet/execute_dwt_cor_aVa_perClass.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-<tool id="compute_p-values_correlation_coefficients_feature_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Correlation Coefficients for Feature Occurrences" version="1.0.0">
-  <description>between two datasets using Discrete Wavelet Transfoms</description>
-
-  <command interpreter="perl">
-  	execute_dwt_cor_aVa_perClass.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the first input file"/>
-  	<param format="tabular" name="inputFile2" type="data" label="Select the second input file"/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-    <data format="pdf" name="outputFile2"/>
-  </outputs>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program generates plots and computes table matrix of coefficient correlations and p-values at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique.
-
-The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and  k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales.
-
-The program has two input files obtained as follows:
-
-For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program.
-
-The program gives two output files:
-
-- The first output file is a TABULAR format file representing the coefficient correlations and p-values for each feature at each scale.
-- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the coefficient correlation for that feature at every scale.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file.
-
------
-
-**Example**
-
-Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file::
-
-	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
-		269			366			330			238				1129
-		239			328			327			283				1188
-		254			351			358			297				1151
-		262			371			355			256				1107
-		254			361			352			234				1192
-		265			354			367			240				1182
-		255			359			333			235				1217
-		271			389			387			272				1241
-		240			305			341			249				1159
-		272			351			337			257				1169
-		275			351			337			233				1158
-		305			331			361			253				1172
-		277			341			343			253				1113
-		266			362			355			267				1162
-		235			326			329			241				1230
-		254			335			360			251				1172
-
-And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file::
-
-	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
-		104			146			142			113				478
-		89			146			151			94				495
-		100			176			151			88				435
-		96			163			128			114				468
-		99			138			144			91				513
-		112			126			162			106				468
-		86			127			145			83				491
-		104			145			171			110				496
-		91			121			147			104				469
-		103			141			145			98				458
-		92			134			142			117				468
-		97			146			145			107				471
-		115			121			136			109				470
-		113			135			138			101				491
-		111			150			138			102				451
-		94			128			151			138				481
-
-
-We notice that the number of scales here is 4 because 16 = 2^4. Running the program on the above input files gives the following output:
-
-The first output file::
-
-	motif				1_cor		1_pval		2_cor		2_pval		3_cor		3_pval		4_cor		4_pval
-
-	deletionHoptspot		0.4		0.072		0.143		0.394		-0.667		0.244		1		0.491
-	insertionHoptspot		0.343		0.082		-0.0714		0.446		-1		0.12		1		0.502
-	dnaPolPauseFrameshift		0.617		0.004		-0.5		0.13		0.667		0.234		1		0.506
-	topoisomeraseCleavageSite	-0.183		0.242		-0.286		0.256		0.333		0.353		-1		0.489
-	translinTarget			0.0167		0.503		-0.0714		0.469		1		0.136		1		0.485
-
-The second output file:
-
-.. image:: ./static/operation_icons/dwt_cor_aVa_1.png
-.. image:: ./static/operation_icons/dwt_cor_aVa_2.png
-.. image:: ./static/operation_icons/dwt_cor_aVa_3.png
-.. image:: ./static/operation_icons/dwt_cor_aVa_4.png
-.. image:: ./static/operation_icons/dwt_cor_aVa_5.png
-
-  </help>
-
-</tool>
--- a/tools/discreteWavelet/execute_dwt_cor_aVb_all.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,223 +0,0 @@
-#!/usr/bin/perl -w
-
-use warnings;
-use IO::Handle;
-
-$usage = "execute_dwt_cor_aVb_all.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out]  \n";
-die $usage unless @ARGV == 4;
-
-#get the input arguments
-my $firstInputFile = $ARGV[0];
-my $secondInputFile = $ARGV[1];
-my $firstOutputFile = $ARGV[2];
-my $secondOutputFile = $ARGV[3];
-
-open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n");
-open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n");
-open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
-open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
-open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
-
-#save all error messages into the error file $errorFile using the error file handle ERROR
-STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
-
-print "There are two input data files: \n";
-print "The input data file is: $firstInputFile \n";
-print "The control data file is: $secondInputFile \n";
-
-# IvC test
-$test = "cor_aVb_all";
-
-# construct an R script to implement the IvC test
-print "\n";
-
-$r_script = "get_dwt_cor_aVa_test.r";
-print "$r_script \n";
-
-
-# R script
-open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
-print Rcmd "
-	#################################################################################
-	# code to do all correlation tests of form: motif(a) vs. motif(b)
-	# add code to create null bands by permuting the original data series
-	# generate plots and table matrix of correlation coefficients including p-values
-	#################################################################################
-	library(\"Rwave\");
-	library(\"wavethresh\");
-	library(\"waveslim\");
-
-	options(echo = FALSE)
-
-	# normalize data
-	norm <- function(data){
-		v <- (data - mean(data))/sd(data);
-		if(sum(is.na(v)) >= 1){
-			v <- data;
-		}
-		return(v);
-	}
-
-	dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
-		print(test);
-		print(pdf);
-		print(table);
-
-		pdf(file = pdf);
-		final_pvalue = NULL;
-		title = NULL;
-
-		short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels;
-		title <- c(\"motif1\", \"motif2\");
-		for (i in 1:short.levels){
-			title <- c(title, paste(i, \"cor\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"));
-		}
-		print(title);
-
-		# normalize the raw data
-		data.short <- apply(data.short, 2, norm);
-		data.long <- apply(data.long, 2, norm);
-
-		# loop to compare a vs b
-		for(i in 1:length(names.short)){
-			for(j in 1:length(names.long)){
-				if(i >= j){
-					next;
-				}
-				else {
-					# Kendall Tau
-					# DWT wavelet correlation function
-					# include significance to compare
-					wave1.dwt = wave2.dwt = NULL;
-					tau.dwt = NULL;
-					out = NULL;
-
-					print(names.short[i]);
-					print(names.long[j]);
-
-					# need exit if not comparing motif(a) vs motif(a)
-					if (names.short[i] == names.long[j]){
-						stop(paste(\"motif\", names.short[i], \"is the same as\", names.long[j], sep = \" \"));
-					}
-					else {
-						wave1.dwt <- dwt(data.short[, i], wf = wf, short.levels, boundary = boundary);
-						wave2.dwt <- dwt(data.long[, j], wf = wf, short.levels, boundary = boundary);
-						tau.dwt <-vector(length = short.levels)
-
-						# perform cor test on wavelet coefficients per scale
-						for(level in 1:short.levels){
-							w1_level = w2_level = NULL;
-							w1_level <- (wave1.dwt[[level]]);
-							w2_level <- (wave2.dwt[[level]]);
-							tau.dwt[level] <- cor.test(w1_level, w2_level, method = method)\$estimate;
-						}
-
-						# CI bands by permutation of time series
-						feature1 = feature2 = NULL;
-						feature1 = data.short[, i];
-						feature2 = data.long[, j];
-						null = results = med = NULL;
-						cor_25 = cor_975 = NULL;
-
-						for (k in 1:1000) {
-							nk_1 = nk_2 = NULL;
-							null.levels = NULL;
-							cor = NULL;
-							null_wave1 = null_wave2 = NULL;
-
-							nk_1 <- sample(feature1, length(feature1), replace = FALSE);
-							nk_2 <- sample(feature2, length(feature2), replace = FALSE);
-							null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
-							cor <- vector(length = null.levels);
-							null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary);
-							null_wave2 <- dwt(nk_2, wf = wf, short.levels, boundary = boundary);
-
-							for(level in 1:null.levels){
-								null_level1 = null_level2 = NULL;
-								null_level1 <- (null_wave1[[level]]);
-								null_level2 <- (null_wave2[[level]]);
-								cor[level] <- cor.test(null_level1, null_level2, method = method)\$estimate;
-							}
-							null = rbind(null, cor);
-						}
-
-						null <- apply(null, 2, sort, na.last = TRUE);
-						cor_25 <- null[25, ];
-						cor_975 <- null[975, ];
-						med <- (apply(null, 2, median, na.rm = TRUE));
-
-						# plot
-						results <- cbind(tau.dwt, cor_25, cor_975);
-						matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), ylim = c(-1, 1), xlab = \"Wavelet Scale\", ylab = \"Wavelet Correlation Kendall's Tau\", main = (paste(test, names.short[i], \"vs.\", names.long[j], sep = \" \")), cex.main = 0.75);
-						abline(h = 0);
-
-						# get pvalues by comparison to null distribution
-						### modify pval calculation for error type II of T test ####
-						out <- c(names.short[i],names.long[j]);
-						for (m in 1:length(tau.dwt)){
-							print(m);
-							print(tau.dwt[m]);
-							out <- c(out, format(tau.dwt[m], digits = 3));
-							pv = NULL;
-							if(is.na(tau.dwt[m])){
-								pv <- \"NA\";
-							}
-							else{
-								if (tau.dwt[m] >= med[m]){
-									# R tail test
-									pv <- (length(which(null[, m] >= tau.dwt[m])))/(length(na.exclude(null[, m])));
-								}
-								else{
-									if (tau.dwt[m] < med[m]){
-										# L tail test
-										pv <- (length(which(null[, m] <= tau.dwt[m])))/(length(na.exclude(null[, m])));
-									}
-								}
-							}
-							out <- c(out, pv);
-							print(pv);
-						}
-						final_pvalue <-rbind(final_pvalue, out);
-						print(out);
-					}
-				}
-			}
-		}
-		colnames(final_pvalue) <- title;
-		write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE)
-		dev.off();
-	}\n";
-
-print Rcmd "
-	# execute
-	# read in data
-
-	inputData1 = inputData2 = NULL;
-	inputData.short1 = inputData.short2 = NULL;
-	inputDataNames.short1 = inputDataNames.short2 = NULL;
-
-	inputData1 <- read.delim(\"$firstInputFile\");
-	inputData.short1 <- inputData1[, +c(1:ncol(inputData1))];
-	inputDataNames.short1 <- colnames(inputData.short1);
-
-	inputData2 <- read.delim(\"$secondInputFile\");
-	inputData.short2 <- inputData2[, +c(1:ncol(inputData2))];
-	inputDataNames.short2 <- colnames(inputData.short2);
-
-	# cor test for motif(a) in inputData1 vs motif(b) in inputData2
-	dwt_cor(inputData.short1, inputDataNames.short1, inputData.short2, inputDataNames.short2, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\");
-	print (\"done with the correlation test\");
-
-	#eof\n";
-close Rcmd;
-
-system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n");
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n");
-system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n");
-
-#close the input and output and error files
-close(ERROR);
-close(OUTPUT2);
-close(OUTPUT1);
-close(INPUT2);
-close(INPUT1);
--- a/tools/discreteWavelet/execute_dwt_cor_aVb_all.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-<tool id="compute_p-values_correlation_coefficients_featureA_featureB_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Correlation Coefficients for Occurrences of Two Set of Features" version="1.0.0">
-  <description>between two datasets using Discrete Wavelet Transfoms</description>
-
-  <command interpreter="perl">
-  	execute_dwt_cor_aVb_all.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the first input file"/>
-  	<param format="tabular" name="inputFile2" type="data" label="Select the second input file"/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-    <data format="pdf" name="outputFile2"/>
-  </outputs>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program generates plots and computes table matrix of coefficient correlations and p-values at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique.
-
-The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and  k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales.
-
-The program has two input files obtained as follows:
-
-For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program.
-
-The program gives two output files:
-
-- The first output file is a TABULAR format file representing the coefficient correlations and p-values for each feature at each scale.
-- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the coefficient correlations for that feature at every scale.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file.
-
------
-
-**Example**
-
-Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file::
-
-	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
-		82			162			158			79				459
-		111			196			154			75				459
-		98			178			160			79				475
-		113			201			170			113				436
-		113			173			147			95				446
-		107			150			155			84				436
-		106			166			175			96				448
-		113			176			135			106				514
-		113			170			152			87				450
-		95			152			167			93				467
-		91			171			169			118				426
-		84			139			160			100				459
-		92			154			164			104				440
-		100			145			154			98				472
-		91			161			152			71				461
-		117			164			139			97				463
-
-And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file::
-
-	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
-		269			366			330			238				1129
-		239			328			327			283				1188
-		254			351			358			297				1151
-		262			371			355			256				1107
-		254			361			352			234				1192
-		265			354			367			240				1182
-		255			359			333			235				1217
-		271			389			387			272				1241
-		240			305			341			249				1159
-		272			351			337			257				1169
-		275			351			337			233				1158
-		305			331			361			253				1172
-		277			341			343			253				1113
-		266			362			355			267				1162
-		235			326			329			241				1230
-		254			335			360			251				1172
-
-
-We notice that the number of scales here is 4 because 16 = 2^4. Running the program on the above input files gives the following output:
-
-The first output file::
-
-	motif1				motif2				1_cor		1_pval		2_cor		2_pval		3_cor		3_pval		4_cor		4_pval
-
-	deletionHoptspot		insertionHoptspot		-0.1		0.346		-0.214		0.338		1		0.127		1		0.467
-	deletionHoptspot		dnaPolPauseFrameshift		0.167		0.267		-0.214		0.334		1		0.122		1		0.511
-	deletionHoptspot		topoisomeraseCleavageSite	0.167		0.277		0.143		0.412		-0.667		0.243		1		0.521
-	deletionHoptspot		translinTarget			0		0.505		0.0714		0.441		1		0.124		1		0.518
-	insertionHoptspot		dnaPolPauseFrameshift		-0.202		0.238		0.143		0.379		-1		0.122		1		0.517
-	insertionHoptspot		topoisomeraseCleavageSite	-0.0336		0.457		0.214		0.29		0.667		0.252		1		0.503
-	insertionHoptspot		translinTarget			0.0672		0.389		0.429		0.186		-1		0.119		1		0.506
-	dnaPolPauseFrameshift		topoisomeraseCleavageSite	-0.353		0.101		0.357		0.228		0		0.612		-1		0.49
-	dnaPolPauseFrameshift		translinTarget			-0.151		0.303		-0.571		0.09		-0.333		0.37		-1		1
-	topoisomeraseCleavageSite	translinTarget			-0.37		0.077		-0.222		0.297		0.667		0.234		-1		0.471
-
-The second output file:
-
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_1.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_2.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_3.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_4.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_5.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_6.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_7.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_8.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_9.png
-.. image:: ./static/operation_icons/dwt_cor_aVb_all_10.png
-
-
-  </help>
-
-</tool>
--- a/tools/discreteWavelet/execute_dwt_var_perClass.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,320 +0,0 @@
-#!/usr/bin/perl -w
-
-use warnings;
-use IO::Handle;
-use POSIX qw(floor ceil);
-
-# example: perl execute_dwt_var_perClass.pl hg18_NCNR_10bp_3flanks_deletionHotspot_data_del.txt deletionHotspot 3flanks del
-
-$usage = "execute_dwt_var_perClass.pl [TABULAR.in] [TABULAR.out] [TABULAR.out] [PDF.out] \n";
-die $usage unless @ARGV == 4;
-
-#get the input arguments
-my $inputFile = $ARGV[0];
-my $firstOutputFile = $ARGV[1];
-my $secondOutputFile = $ARGV[2];
-my $thirdOutputFile = $ARGV[3];
-
-open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n");
-open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
-open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
-open (OUTPUT3, ">", $thirdOutputFile) || die("Could not open file $thirdOutputFile \n");
-open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
-
-#save all error messages into the error file $errorFile using the error file handle ERROR
-STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
-
-# choosing meaningful names for the output files
-$max_dwt = $firstOutputFile;
-$pvalue = $secondOutputFile;
-$pdf = $thirdOutputFile;
-
-# count the number of columns in the input file
-while($buffer = <INPUT>){
-	#if ($buffer =~ m/interval/){
-		chomp($buffer);
-		$buffer =~ s/^#\s*//;
-		@contrl = split(/\t/, $buffer);
-		last;
-	#}
-}
-print "The number of columns in the input file is: " . (@contrl) . "\n";
-print "\n";
-
-# count the number of motifs in the input file
-$count = 0;
-for ($i = 0; $i < @contrl; $i++){
-	$count++;
-	print "# $contrl[$i]\n";
-}
-print "The number of motifs in the input file is:  $count \n";
-
-# check if the number of motifs is not a multiple of 12, and round up is so
-$count2 = ($count/12);
-if ($count2 =~ m/(\D)/){
-	print "the number of motifs is not a multiple of 12 \n";
-	$count2 = ceil($count2);
-}
-else {
-	print "the number of motifs is a multiple of 12 \n";
-}
-print "There will be $count2 subfiles\n\n";
-
-# split infile into subfiles only 12 motif per file for R plotting
-for ($x = 1; $x <= $count2; $x++){
-	$a = (($x - 1) * 12 + 1);
-	$b = $x * 12;
-
-	if ($x < $count2){
-		print "# data.short $x <- data_test[, +c($a:$b)]; \n";
-	}
-	else{
-		print "# data.short $x <- data_test[, +c($a:ncol(data_test)]; \n";
-	}
-}
-
-print "\n";
-print "There are 4 output files: \n";
-print "The first output file is a pdf file\n";
-print "The second output file is a max_dwt file\n";
-print "The third output file is a pvalues file\n";
-print "The fourth output file is a test_final_pvalues file\n";
-
-# write R script
-$r_script = "get_dwt_varPermut_getMax.r";
-print "The R file name is: $r_script \n";
-
-open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
-
-print Rcmd "
-	######################################################################
-	# plot power spectra, i.e. wavelet variance by class
-	# add code to create null bands by permuting the original data series
-	# get class of maximum significant variance per feature
-	# generate plots and table matrix of variance including p-values
-	######################################################################
-	library(\"Rwave\");
-	library(\"wavethresh\");
-	library(\"waveslim\");
-
-	options(echo = FALSE)
-
-	# normalize data
-	norm <- function(data){
-		v <- (data-mean(data))/sd(data);
-    	if(sum(is.na(v)) >= 1){
-    		v<-data;
-    	}
-    	return(v);
-	}
-
-	dwt_var_permut_getMax <- function(data, names, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
-		max_var = NULL;
-    	matrix = NULL;
-		title = NULL;
-    	final_pvalue = NULL;
-		short.levels = NULL;
-		scale = NULL;
-
-    	print(names);
-
-   	 	par(mfcol = c(length(names), length(names)), mar = c(0, 0, 0, 0), oma = c(4, 3, 3, 2), xaxt = \"s\", cex = 1, las = 1);
-
-    	short.levels <- wd(data[, 1], filter.number = filter, bc = bc)\$nlevels;
-
-    	title <- c(\"motif\");
-    	for (i in 1:short.levels){
-    		title <- c(title, paste(i, \"var\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"), paste(i, \"test\", sep = \"_\"));
-    	}
-    	print(title);
-
-		# normalize the raw data
-    	data<-apply(data,2,norm);
-
-    	for(i in 1:length(names)){
-    		for(j in 1:length(names)){
-				temp = NULL;
-				results = NULL;
-				wave1.dwt = NULL;
-				out = NULL;
-
-				out <- vector(length = length(title));
-            	temp <- vector(length = short.levels);
-
-            	if(i < j) {
-            		plot(temp, type = \"n\", axes = FALSE, xlab = NA, ylab = NA);
-                	box(col = \"grey\");
-                	grid(ny = 0, nx = NULL);
-            	} else {
-            		if (i > j){
-                		plot(temp, type = \"n\", axes = FALSE, xlab = NA, ylab = NA);
-                    	box(col = \"grey\");
-                    	grid(ny = 0, nx = NULL);
-                 	} else {
-
-                 		wave1.dwt <- dwt(data[, i], wf = wf, short.levels, boundary = boundary);
-
-                		temp_row = (short.levels + 1 ) * -1;
-                		temp_col = 1;
-                    	temp <- wave.variance(wave1.dwt)[temp_row, temp_col];
-
-                    	#permutations code :
-                    	feature1 = NULL;
-						null = NULL;
-						var_25 = NULL;
-						var_975 = NULL;
-						med = NULL;
-
-                    	feature1 = data[, i];
-                    	for (k in 1:1000) {
-							nk_1 = NULL;
-							null.levels = NULL;
-							var = NULL;
-							null_wave1 = NULL;
-
-                        	nk_1 = sample(feature1, length(feature1), replace = FALSE);
-                        	null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
-                        	var <- vector(length = length(null.levels));
-                        	null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary);
-                        	var<- wave.variance(null_wave1)[-8, 1];
-                        	null= rbind(null, var);
-                    	}
-                    	null <- apply(null, 2, sort, na.last = TRUE);
-                    	var_25 <- null[25, ];
-                    	var_975 <- null[975, ];
-                    	med <- (apply(null, 2, median, na.rm = TRUE));
-
-                    	# plot
-                    	results <- cbind(temp, var_25, var_975);
-                    	matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), axes = F);
-
-                    	# get pvalues by comparison to null distribution
-                    	out <- (names[i]);
-                    	for (m in 1:length(temp)){
-                    		print(paste(\"scale\", m, sep = \" \"));
-                        	print(paste(\"var\", temp[m], sep = \" \"));
-                        	print(paste(\"med\", med[m], sep = \" \"));
-                        	pv = tail = NULL;
-							out <- c(out, format(temp[m], digits = 3));
-                        	if (temp[m] >= med[m]){
-                        		# R tail test
-                            	print(\"R\");
-	                        	tail <- \"R\";
-                            	pv <- (length(which(null[, m] >= temp[m])))/(length(na.exclude(null[, m])));
-
-                        	} else {
-                        		if (temp[m] < med[m]){
-                                	# L tail test
-                                	print(\"L\");
-	                            	tail <- \"L\";
-                                	pv <- (length(which(null[, m] <= temp[m])))/(length(na.exclude(null[, m])));
-                        		}
-							}
-							out <- c(out, pv);
-							print(pv);
-							out <- c(out, tail);
-                    	}
-                    	final_pvalue <-rbind(final_pvalue, out);
-
-
-                    	# get variances outside null bands by comparing temp to null
-                    	## temp stores variance for each scale, and null stores permuted variances for null bands
-                    	for (n in 1:length(temp)){
-                    		if (temp[n] <= var_975[n]){
-                        		temp[n] <- NA;
-                        	} else {
-                        		temp[n] <- temp[n];
-                        	}
-                    	}
-                    	matrix <- rbind(matrix, temp)
-            		}
-            	}
-	        	# labels
-	        	if (i == 1){
-	        		mtext(names[j], side = 2, line = 0.5, las = 3, cex = 0.25);
-	        	}
-	        	if (j == 1){
-	        		mtext(names[i], side = 3, line = 0.5, cex = 0.25);
-	        	}
-	        	if (j == length(names)){
-	        		axis(1, at = (1:short.levels), las = 3, cex.axis = 0.5);
-	        	}
-    		}
-    	}
-		colnames(final_pvalue) <- title;
-    	#write.table(final_pvalue, file = \"test_final_pvalue.txt\", sep = \"\\t\", quote = FALSE, row.names = FALSE, append = TRUE);
-
-		# get maximum variance larger than expectation by comparison to null bands
-    	varnames <- vector();
-    	for(i in 1:length(names)){
-    		name1 = paste(names[i], \"var\", sep = \"_\")
-        	varnames <- c(varnames, name1)
-    	}
-   		rownames(matrix) <- varnames;
-    	colnames(matrix) <- (1:short.levels);
-    	max_var <- names;
-    	scale <- vector(length = length(names));
-    	for (x in 1:nrow(matrix)){
-        	if (length(which.max(matrix[x, ])) == 0){
-            	scale[x] <- NA;
-        	}
-        	else{
-        		scale[x] <- colnames(matrix)[which.max(matrix[x, ])];
-        	}
-    	}
-    	max_var <- cbind(max_var, scale);
-    	write.table(max_var, file = \"$max_dwt\", sep = \"\\t\", quote = FALSE, row.names = FALSE, append = TRUE);
-    	return(final_pvalue);
-	}\n";
-
-print Rcmd "
-	# execute
-	# read in data
-
-	data_test = NULL;
-	data_test <- read.delim(\"$inputFile\");
-
-	pdf(file = \"$pdf\", width = 11, height = 8);
-
-	# loop to read and execute on all $count2 subfiles
-	final = NULL;
-	for (x in 1:$count2){
-		sub = NULL;
-		sub_names = NULL;
-		a = NULL;
-		b = NULL;
-
-    	a = ((x - 1) * 12 + 1);
-    	b = x * 12;
-
-    	if (x < $count2){
-    		sub <- data_test[, +c(a:b)];
-			sub_names <- colnames(data_test)[a:b];
-			final <- rbind(final, dwt_var_permut_getMax(sub, sub_names));
-    	}
-    	else{
-    		sub <- data_test[, +c(a:ncol(data_test))];
-			sub_names <- colnames(data_test)[a:ncol(data_test)];
-			final <- rbind(final, dwt_var_permut_getMax(sub, sub_names));
-
-    	}
-	}
-
-	dev.off();
-
-	write.table(final, file = \"$pvalue\", sep = \"\\t\", quote = FALSE, row.names = FALSE);
-
-	#eof\n";
-
-close Rcmd;
-
-system("echo \"wavelet ANOVA started on \`hostname\` at \`date\`\"\n");
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
-system("echo \"wavelet ANOVA ended on \`hostname\` at \`date\`\"\n");
-
-#close the input and output and error files
-close(ERROR);
-close(OUTPUT3);
-close(OUTPUT2);
-close(OUTPUT1);
-close(INPUT);
\ No newline at end of file
--- a/tools/discreteWavelet/execute_dwt_var_perClass.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-<tool id="compute_p-values_max_variances_feature_occurrences_in_one_dataset_using_discrete_wavelet_transfom" name="Compute P-values and Max Variances for Feature Occurrences" version="1.0.0">
-  <description>in one dataset using Discrete Wavelet Transfoms</description>
-
-  <command interpreter="perl">
-  	execute_dwt_var_perClass.pl $inputFile $outputFile1 $outputFile2 $outputFile3
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile" type="data" label="Select the input file"/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-    <data format="tabular" name="outputFile2"/>
-    <data format="pdf" name="outputFile3"/>
-  </outputs>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program generates plots and computes table matrix of maximum variances, p-values, and test orientations at multiple scales for the occurrences of a class of features in one dataset of DNA sequences using multiscale wavelet analysis technique.
-
-The program assumes that the user has one set of DNA sequences, S, which consists of one or more sequences of equal length. Each sequence in S is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and  k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales.
-
-The program has one input file obtained as follows:
-
-For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S, and builds a tabular file representing the count results in each interval of S. This is the input file of the program.
-
-The program gives three output files:
-
-- The first output file is a TABULAR format file giving the scales at which each features has a maximum variances.
-- The second output file is a TABULAR format file representing the variances, p-values, and test orientation for the occurrences of features at each scale based on a random permutation test and using multiscale wavelet analysis technique.
-- The third output file is a PDF file plotting the wavelet variances of each feature at each scale.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- If the number of features is greater than 12, the program will divide each output file into subfiles, such that each subfile represents the results of a group of 12 features except the last subfile that will represents the results of the rest. For example, if the number of features is 17, the p-values file will consists of two subfiles, the first for the features 1-12 and the second for the features 13-17. As for the PDF file, it will consists of two pages in this case.
-- In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file.
-
------
-
-
-**Example**
-
-Counting the occurrences of 8 features (motifs) in 16 intervals (one line per interval) of set of DNA sequences in S gives the following tabular file::
-
-	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	indelHotspot	topoisomeraseCleavageSite	translinTarget		vDjRecombinationSignal		x-likeSite
-		226			403			416			221		1165			832				749			1056
-		236			444			380			241		1223			746				782			1207
-		242			496			391			195		1116			643				770			1219
-		243			429			364			191		1118			694				783			1223
-		244			410			371			236		1063			692				805			1233
-		230			386			370			217		1087			657				787			1215
-		275			404			402			214		1044			697				831			1188
-		265			443			365			231		1086			694				782			1184
-		255			390			354			246		1114			642				773			1176
-		281			384			406			232		1102			719				787			1191
-		263			459			369			251		1135			643				810			1215
-		280			433			400			251		1159			701				777			1151
-		278			385			382			231		1147			697				707			1161
-		248			393			389			211		1162			723				759			1183
-		251			403			385			246		1114			752				776			1153
-		239			383			347			227		1172			759				789			1141
-
-We notice that the number of scales here is 4 because 16 = 2^4. Runnig the program on the above input file gives the following 3 output files:
-
-The first output file::
-
-	motifs			max_var	at scale
-	deletionHoptspot		NA
-	insertionHoptspot		NA
-	dnaPolPauseFrameshift		NA
-	indelHotspot			NA
-	topoisomeraseCleavageSite	3
-	translinTarget			NA
-	vDjRecombinationSignal		NA
-	x.likeSite			NA
-
-The second output file::
-
-	motif				1_var		1_pval		1_test		2_var		2_pval		2_test		3_var		3_pval		3_test		4_var		4_pval		4_test
-
-	deletionHoptspot		0.457		0.048		L		1.18		0.334		R		1.61		0.194		R		3.41		0.055		R
-	insertionHoptspot		0.556		0.109		L		1.34		0.272		R		1.59		0.223		R		2.02		0.157		R
-	dnaPolPauseFrameshift		1.42		0.089		R		0.66		0.331		L		0.421		0.305		L		0.121		0.268		L
-	indelHotspot			0.373		0.021		L		1.36		0.254		R		1.24		0.301		R		4.09		0.047		R
-	topoisomeraseCleavageSite	0.305		0.002		L		0.936		0.489		R		3.78		0.01		R		1.25		0.272		R
-	translinTarget			0.525		0.061		L		1.69		0.11		R		2.02		0.131		R		0.00891		0.069		L
-	vDjRecombinationSignal		0.68		0.138		L		0.957		0.46		R		2.35		0.071		R		1.03		0.357		R
-	x.likeSite			0.928		0.402		L		1.33		0.261		R		0.735		0.431		L		0.783		0.422		R
-
-The third output file:
-
-.. image:: ./static/operation_icons/dwt_var_perClass.png
-
-  </help>
-
-</tool>
--- a/tools/discreteWavelet/execute_dwt_var_perFeature.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,199 +0,0 @@
-#!/usr/bin/perl -w
-# Author: Erika Kvikstad
-
-use warnings;
-use IO::Handle;
-use POSIX qw(floor ceil);
-
-$usage = "execute_dwt_var_perFeature.pl [TABULAR.in] [FEATURE] [ALPHA] [TABULAR.out] [PDF.out] \n";
-die $usage unless @ARGV == 5;
-
-#get the input arguments
-my $inputFile = $ARGV[0];
-my @features = split(/,/,$ARGV[1]);
-my $features_count = scalar(@features);
-my $alpha = $ARGV[2];
-my $outFile1 = $ARGV[3];
-my $outFile2 = $ARGV[4];
-
-open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n");
-open (OUTPUT2, ">", $outFile1) || die("Could not open file $outFile1 \n");
-open (OUTPUT3, ">", $outFile2) || die("Could not open file $outFile2 \n");
-#open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
-
-# choosing meaningful names for the output files
-$pvalue = $outFile1;
-$pdf = $outFile2;
-
-# write R script
-$r_script = "get_dwt_varPermut.r";
-
-open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
-
-print Rcmd "
-	######################################################################
-	# plot multiscale wavelet variance
-	# create null bands by permuting the original data series
-	# generate plots and table of wavelet variance including p-values
-	######################################################################
-	options(echo = FALSE)
-	#library(\"Rwave\");
-	#library(\"wavethresh\");
-	#library(\"waveslim\");
-	# turn off diagnostics for de-bugging only, turn back on for functional tests on test
-	require(\"Rwave\",quietly=TRUE,warn.conflicts = FALSE);
-	require(\"wavethresh\",quietly=TRUE,warn.conflicts = FALSE);
-	require(\"waveslim\",quietly=TRUE,warn.conflicts = FALSE);
-	require(\"bitops\",quietly=TRUE,warn.conflicts = FALSE);
-
-	# to determine if data is properly formatted 2^N observations
-	is.power2<- function(x){x && !(bitAnd(x,x - 1));}
-
-	# dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function
-	dwt_var_permut_getMax <- function(data, names, alpha, filter = 1,family=\"DaubExPhase\", bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
-		max_var = NULL;
-    		matrix = NULL;
-		title = NULL;
-    		final_pvalue = NULL;
-		J = NULL;
-		scale = NULL;
-		out = NULL;
-
-	print(class(data));
-    	print(names);
-	print(alpha);
-
-	par(mar=c(5,4,4,3),oma = c(4, 4, 3, 2), xaxt = \"s\", cex = 1, las = 1);
-
-	title<-c(\"Wavelet\",\"Variance\",\"Pvalue\",\"Test\");
-	print(title);
-
-    	for(i in 1:length(names)){
-		temp = NULL;
-		results = NULL;
-		wave1.dwt = NULL;
-
-		# if data fails formatting check, do something
-
-		print(is.numeric(as.matrix(data)[, i]));
-		if(!is.numeric(as.matrix(data)[, i]))
-			stop(\"data must be a numeric vector\");
-
-		print(length(as.matrix(data)[, i]));
-		print(is.power2(length(as.matrix(data)[, i])));
-		if(!is.power2(length(as.matrix(data)[, i])))
-			stop(\"data length must be a power of two\");
-
-
-    		J <- wd(as.matrix(data)[, i], filter.number = filter, family=family, bc = bc)\$nlevels;
-		print(J);
-            	temp <- vector(length = J);
-               	wave1.dwt <- dwt(as.matrix(data)[, i], wf = wf, J, boundary = boundary);
-		#print(wave1.dwt);
-
-                temp <- wave.variance(wave1.dwt)[-(J+1), 1];
-		print(temp);
-
-                #permutations code :
-                feature1 = NULL;
-		null = NULL;
-		var_lower=limit_lower=NULL;
-		var_upper=limit_upper=NULL;
-		med = NULL;
-
-		limit_lower = alpha/2*1000;
-		print(limit_lower);
-		limit_upper = (1-alpha/2)*1000;
-		print(limit_upper);
-
-		feature1 = as.matrix(data)[,i];
-                for (k in 1:1000) {
-			nk_1 = NULL;
-			null.levels = NULL;
-			var = NULL;
-			null_wave1 = NULL;
-
-                       	nk_1 = sample(feature1, length(feature1), replace = FALSE);
-                       	null.levels <- wd(nk_1, filter.number = filter,family=family ,bc = bc)\$nlevels;
-                       	var <- vector(length = length(null.levels));
-                       	null_wave1 <- dwt(nk_1, wf = wf, J, boundary = boundary);
-                       	var<- wave.variance(null_wave1)[-(null.levels+1), 1];
-                       	null= rbind(null, var);
-               }
-               null <- apply(null, 2, sort, na.last = TRUE);
-               var_lower <- null[limit_lower, ];
-               var_upper <- null[limit_upper, ];
-               med <- (apply(null, 2, median, na.rm = TRUE));
-
-               # plot
-               results <- cbind(temp, var_lower, var_upper);
-		print(results);
-                matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2),xaxt='n',xlab=\"Wavelet Scale\",ylab=\"Wavelet variance\" );
-		mtext(names[i], side = 3, line = 0.5, cex = 1);
-		axis(1, at = 1:J , labels=c(2^(0:(J-1))), las = 3, cex.axis = 1);
-
-                # get pvalues by comparison to null distribution
-		#out <- (names[i]);
-                for (m in 1:length(temp)){
-                    	print(paste(\"scale\", m, sep = \" \"));
-                       	print(paste(\"var\", temp[m], sep = \" \"));
-                       	print(paste(\"med\", med[m], sep = \" \"));
-                       	pv = tail =scale = NULL;
-			scale=2^(m-1);
-			#out <- c(out, format(temp[m], digits = 3));
-                       	if (temp[m] >= med[m]){
-                       		# R tail test
-                           	print(\"R\");
-	                       	tail <- \"R\";
-                            	pv <- (length(which(null[, m] >= temp[m])))/(length(na.exclude(null[, m])));
-
-                       	} else {
-                       		if (temp[m] < med[m]){
-                               		# L tail test
-                               		print(\"L\");
-	                            	tail <- \"L\";
-                                	pv <- (length(which(null[, m] <= temp[m])))/(length(na.exclude(null[, m])));
-                        	}
-			}
-			print(pv);
-			out<-rbind(out,c(paste(\"Scale\", scale, sep=\"_\"),format(temp[m], digits = 3),pv,tail));
-                }
-		final_pvalue <-rbind(final_pvalue, out);
-  	}
-	colnames(final_pvalue) <- title;
-    	return(final_pvalue);
-}\n";
-
-print Rcmd "
-# execute
-# read in data
-data_test = final = NULL;
-sub = sub_names = NULL;
-data_test <- read.delim(\"$inputFile\",header=FALSE);
-pdf(file = \"$pdf\", width = 11, height = 8)\n";
-
-for ($x=0;$x<$features_count;$x++){
-	$feature=$features[$x];
-print Rcmd "
-	if ($feature > ncol(data_test))
-		stop(\"column $feature doesn't exist\");
-	sub<-data_test[,$feature];
-	#sub_names <- colnames(data_test);
-	sub_names<-colnames(data_test)[$feature];
-	final <- rbind(final,dwt_var_permut_getMax(sub, sub_names,$alpha));\n";
-}
-
-print Rcmd "
-
-	dev.off();
-	write.table(final, file = \"$pvalue\", sep = \"\\t\", quote = FALSE, row.names = FALSE);
-
-#eof\n";
-
-close Rcmd;
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
-
-#close the input and output and error files
-close(OUTPUT3);
-close(OUTPUT2);
-close(INPUT);
--- a/tools/discreteWavelet/execute_dwt_var_perFeature.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<tool id="dwt_var1" name="Wavelet variance" version="1.0.0">
-  <description>using Discrete Wavelet Transfoms</description>
-
-  <command interpreter="perl">
-  	execute_dwt_var_perFeature.pl $inputFile $feature $alpha $outputFile1 $outputFile2
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile" type="data" label="Select data"/>
-	<param name="feature" label="Feature column" type="data_column" data_ref="inputFile" multiple="true" help="Please select at least one column"/>
-	<param name="alpha" size="10" type="float" value="0.05" label="alpha (significance level)" />
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-    <data format="pdf" name="outputFile2"/>
-  </outputs>
-  <tests>
-    <test>
-	<param name="inputFile" value="discreteWavelet/dwt_var1/dwt_var_in.interval"/>
-	<param name="feature" value="4"/>
-	<param name="alpha" value="0.05"/>
-	<output name="outputFile1" file="discreteWavelet/dwt_var1/dwt_var_out1.tabular" compare="re_match"/>
-	<output name="outputFile2" file="discreteWavelet/dwt_var1/dwt_var_out2.pdf" compare="sim_size"/>
-    </test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool computes the scale-specific variance in wavelet coeffients obtained from the discrete wavelet transform of a feature of interest.
-
-Input data consists of an ordered series of data, S, equispaced and of sample size N, where N is of the form N = 2^k, and k is a positive integer and represents the number of levels of wavelet decomposition. S could be a time series, or a set of DNA sequences. The user calculates a statistic of interest for each feature in each interval of S: say, expression level of a particular gene in a time course, or the number of LINE elements per window across a chromosome. This tool then performs a discrete wavelet transform of the feature of interest, and plots the resulting variance in wavelet coefficients per wavelet scale. In addition, statistical significance of variances are determined by 1,000 random permutations of the intervals in S, to generate null bands (representing the user provided alpha value) corresponding to the empirical distribution of wavelet variances under the null hypothesis of no inherent order to the series in S.
-
-This tool generates two output files:
-
-- The first output file is a TABULAR format file representing the variances, p-values, and test orientation for the features at each wavelet scale based on a random permutation test.
-- The second output file is a PDF image plotting the wavelet variances of each feature at each scale.
-
------
-
-.. class:: warningmark
-
-**Note**
-In order to obtain empirical p-values, a random perumtation scheme is implemented by the tool, such that the output may generate slightly variations in results each time it is run on the same input file.
-
------
-
-
-**Example**
-
-  </help>
-
-</tool>
--- a/tools/emboss_5/emboss_antigenic.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-<tool id="EMBOSS: antigenic1" name="antigenic" version="5.0.0">
-  <description>Predicts potentially antigenic regions of a protein sequence, using the method of Kolaskar and Tongaonkar.</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>antigenic -sequence $input1 -outfile $out_file1 -minlen $minlen -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="minlen" size="4" type="text" value="6">
-      <label>Minimum Length of region</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output format</label>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">diffseq</option>
-      <option value="excel">Excel (TAB Delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="nametable">NameTable</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">Tagseq</option>
-      <option value="antigenic">Antigenic Output File</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="antigenic" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="minlen" value="6"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_antigenic_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/antigenic.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_backtranseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,220 +0,0 @@
-<tool id="EMBOSS: backtranseq2" name="backtranseq" version="5.0.0">
-  <description>Back translate a protein sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>backtranseq -sequence $input1 -outfile $out_file1 -cfile $cfile -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="cfile" type="select">
-      <label>Codon Usage File</label>
-      <option value="Ehum.cut">Ehum.cut</option>
-      <option value="Eacc.cut">Eacc.cut</option>
-      <option value="Eadenovirus5.cut">Eadenovirus5.cut</option>
-      <option value="Eadenovirus7.cut">Eadenovirus7.cut</option>
-      <option value="Eaidlav.cut">Eaidlav.cut</option>
-      <option value="Eanasp.cut">Eanasp.cut</option>
-      <option value="Eani.cut">Eani.cut</option>
-      <option value="Eani_h.cut">Eani_h.cut</option>
-      <option value="Eanidmit.cut">Eanidmit.cut</option>
-      <option value="Easn.cut">Easn.cut</option>
-      <option value="Eath.cut">Eath.cut</option>
-      <option value="Eatu.cut">Eatu.cut</option>
-      <option value="Eavi.cut">Eavi.cut</option>
-      <option value="Ebja.cut">Ebja.cut</option>
-      <option value="Ebly.cut">Ebly.cut</option>
-      <option value="Ebme.cut">Ebme.cut</option>
-      <option value="Ebmo.cut">Ebmo.cut</option>
-      <option value="Ebna.cut">Ebna.cut</option>
-      <option value="Ebov.cut">Ebov.cut</option>
-      <option value="Ebovsp.cut">Ebovsp.cut</option>
-      <option value="Ebst.cut">Ebst.cut</option>
-      <option value="Ebsu.cut">Ebsu.cut</option>
-      <option value="Ebsu_h.cut">Ebsu_h.cut</option>
-      <option value="Ecac.cut">Ecac.cut</option>
-      <option value="Ecal.cut">Ecal.cut</option>
-      <option value="Eccr.cut">Eccr.cut</option>
-      <option value="Ecel.cut">Ecel.cut</option>
-      <option value="Echi.cut">Echi.cut</option>
-      <option value="Echicken.cut">Echicken.cut</option>
-      <option value="Echisp.cut">Echisp.cut</option>
-      <option value="Echk.cut">Echk.cut</option>
-      <option value="Echmp.cut">Echmp.cut</option>
-      <option value="Echnt.cut">Echnt.cut</option>
-      <option value="Echos.cut">Echos.cut</option>
-      <option value="Echzm.cut">Echzm.cut</option>
-      <option value="Echzmrubp.cut">Echzmrubp.cut</option>
-      <option value="Ecpx.cut">Ecpx.cut</option>
-      <option value="Ecre.cut">Ecre.cut</option>
-      <option value="Ecrisp.cut">Ecrisp.cut</option>
-      <option value="Ectr.cut">Ectr.cut</option>
-      <option value="Edayhoff.cut">Edayhoff.cut</option>
-      <option value="Eddi.cut">Eddi.cut</option>
-      <option value="Eddi_h.cut">Eddi_h.cut</option>
-      <option value="Edog.cut">Edog.cut</option>
-      <option value="Edro.cut">Edro.cut</option>
-      <option value="Edro_h.cut">Edro_h.cut</option>
-      <option value="Edrosophila.cut">Edrosophila.cut</option>
-      <option value="Eeca.cut">Eeca.cut</option>
-      <option value="Eeco.cut">Eeco.cut</option>
-      <option value="Eeco_h.cut">Eeco_h.cut</option>
-      <option value="Eecoli.cut">Eecoli.cut</option>
-      <option value="Ef1.cut">Ef1.cut</option>
-      <option value="Efish.cut">Efish.cut</option>
-      <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option>
-      <option value="Eham.cut">Eham.cut</option>
-      <option value="Ehha.cut">Ehha.cut</option>
-      <option value="Ehin.cut">Ehin.cut</option>
-      <option value="Ehma.cut">Ehma.cut</option>
-      <option value="Ehuman.cut">Ehuman.cut</option>
-      <option value="Ekla.cut">Ekla.cut</option>
-      <option value="Ekpn.cut">Ekpn.cut</option>
-      <option value="Ella.cut">Ella.cut</option>
-      <option value="Emac.cut">Emac.cut</option>
-      <option value="Emaize.cut">Emaize.cut</option>
-      <option value="Emam_h.cut">Emam_h.cut</option>
-      <option value="Emixlg.cut">Emixlg.cut</option>
-      <option value="Emouse.cut">Emouse.cut</option>
-      <option value="Emsa.cut">Emsa.cut</option>
-      <option value="Emse.cut">Emse.cut</option>
-      <option value="Emta.cut">Emta.cut</option>
-      <option value="Emtu.cut">Emtu.cut</option>
-      <option value="Emus.cut">Emus.cut</option>
-      <option value="Emussp.cut">Emussp.cut</option>
-      <option value="Emva.cut">Emva.cut</option>
-      <option value="Emze.cut">Emze.cut</option>
-      <option value="Emzecp.cut">Emzecp.cut</option>
-      <option value="Encr.cut">Encr.cut</option>
-      <option value="Eneu.cut">Eneu.cut</option>
-      <option value="Engo.cut">Engo.cut</option>
-      <option value="Eoncsp.cut">Eoncsp.cut</option>
-      <option value="Epae.cut">Epae.cut</option>
-      <option value="Epea.cut">Epea.cut</option>
-      <option value="Epet.cut">Epet.cut</option>
-      <option value="Epfa.cut">Epfa.cut</option>
-      <option value="Ephix174.cut">Ephix174.cut</option>
-      <option value="Ephv.cut">Ephv.cut</option>
-      <option value="Ephy.cut">Ephy.cut</option>
-      <option value="Epig.cut">Epig.cut</option>
-      <option value="Epolyomaa2.cut">Epolyomaa2.cut</option>
-      <option value="Epombe.cut">Epombe.cut</option>
-      <option value="Epombecai.cut">Epombecai.cut</option>
-      <option value="Epot.cut">Epot.cut</option>
-      <option value="Eppu.cut">Eppu.cut</option>
-      <option value="Epse.cut">Epse.cut</option>
-      <option value="Epsy.cut">Epsy.cut</option>
-      <option value="Epvu.cut">Epvu.cut</option>
-      <option value="Erab.cut">Erab.cut</option>
-      <option value="Erabbit.cut">Erabbit.cut</option>
-      <option value="Erabsp.cut">Erabsp.cut</option>
-      <option value="Erat.cut">Erat.cut</option>
-      <option value="Eratsp.cut">Eratsp.cut</option>
-      <option value="Erca.cut">Erca.cut</option>
-      <option value="Erhm.cut">Erhm.cut</option>
-      <option value="Eric.cut">Eric.cut</option>
-      <option value="Erle.cut">Erle.cut</option>
-      <option value="Erme.cut">Erme.cut</option>
-      <option value="Ersp.cut">Ersp.cut</option>
-      <option value="Esalsp.cut">Esalsp.cut</option>
-      <option value="Esau.cut">Esau.cut</option>
-      <option value="Esco.cut">Esco.cut</option>
-      <option value="Esgi.cut">Esgi.cut</option>
-      <option value="Eshp.cut">Eshp.cut</option>
-      <option value="Eshpsp.cut">Eshpsp.cut</option>
-      <option value="Esli.cut">Esli.cut</option>
-      <option value="Eslm.cut">Eslm.cut</option>
-      <option value="Esma.cut">Esma.cut</option>
-      <option value="Esmi.cut">Esmi.cut</option>
-      <option value="Esmu.cut">Esmu.cut</option>
-      <option value="Esoy.cut">Esoy.cut</option>
-      <option value="Espi.cut">Espi.cut</option>
-      <option value="Espn.cut">Espn.cut</option>
-      <option value="Espo.cut">Espo.cut</option>
-      <option value="Espo_h.cut">Espo_h.cut</option>
-      <option value="Espu.cut">Espu.cut</option>
-      <option value="Esta.cut">Esta.cut</option>
-      <option value="Esty.cut">Esty.cut</option>
-      <option value="Esus.cut">Esus.cut</option>
-      <option value="Esv40.cut">Esv40.cut</option>
-      <option value="Esyhsp.cut">Esyhsp.cut</option>
-      <option value="Esynsp.cut">Esynsp.cut</option>
-      <option value="Etbr.cut">Etbr.cut</option>
-      <option value="Etcr.cut">Etcr.cut</option>
-      <option value="Eter.cut">Eter.cut</option>
-      <option value="Etetsp.cut">Etetsp.cut</option>
-      <option value="Etob.cut">Etob.cut</option>
-      <option value="Etobcp.cut">Etobcp.cut</option>
-      <option value="Etom.cut">Etom.cut</option>
-      <option value="Etrb.cut">Etrb.cut</option>
-      <option value="Evco.cut">Evco.cut</option>
-      <option value="Ewht.cut">Ewht.cut</option>
-      <option value="Exel.cut">Exel.cut</option>
-      <option value="Exenopus.cut">Exenopus.cut</option>
-      <option value="Eyeast.cut">Eyeast.cut</option>
-      <option value="Eyeastcai.cut">Eyeastcai.cut</option>
-      <option value="Eyen.cut">Eyen.cut</option>
-      <option value="Eysc.cut">Eysc.cut</option>
-      <option value="Eysc_h.cut">Eysc_h.cut</option>
-      <option value="Eyscmt.cut">Eyscmt.cut</option>
-      <option value="Eysp.cut">Eysp.cut</option>
-      <option value="Ezebrafish.cut">Ezebrafish.cut</option>
-      <option value="Ezma.cut">Ezma.cut</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="cfile" value="Ehum.cut"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_backtranseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/backtranseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_banana.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-#! /usr/bin/perl -w
-use strict;
-
-my $cmd_string = join (" ",@ARGV);
-#my $cmd_string = "/home/djb396/temp/emboss/bin/banana -sequence /home/djb396/universe-prototype/test.fasta -outfile result.txt -graph png -goutfile results -auto";
-my $results = `$cmd_string`;
-my @files = split("\n",$results);
-foreach my $thisLine (@files)
-{
-	if ($thisLine =~ /Created /i)
-	{
-		$thisLine =~ /[\w|\.]+$/;
-		$thisLine =$&;
-		print "outfile: $thisLine\n";
-	}
-}
--- a/tools/emboss_5/emboss_banana.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="EMBOSS: banana3" name="banana" version="5.0.0">
-  <description>Bending and curvature plot in B-DNA</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>banana -sequence $input1 -outfile $out_file1 -graph none -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <output name="out_file1" file="emboss_banana_out.txt"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/banana.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_biosed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-<tool id="EMBOSS: biosed4" name="biosed" version="5.0.0">
-  <description>Replace or delete sequence sections</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>biosed -sequence $input1 -outseq $out_file1 -target $target -replace $replace -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="target" size="6" type="text" value="atg">
-      <label>Replace all</label>
-    </param>
-    <param name="replace" size="6" type="text" value="atg">
-      <label>with</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="target" value="atg"/>
-      <param name="replace" value="agt"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_biosed_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/biosed.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_btwisted.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="EMBOSS: btwisted5" name="btwisted" version="5.0.0">
-  <description>Calculates the twisting in a B-DNA sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>btwisted -sequence $input1 -outfile $out_file1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="btwisted" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <output name="out_file1" file="emboss_btwisted_out.btwisted"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/btwisted.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_cai.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,185 +0,0 @@
-<tool id="EMBOSS: cai6" name="cai" version="5.0.0">
-  <description>CAI codon adaptation index</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>cai -seqall $input1 -outfile $out_file1 -cfile $cfile -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="cfile" type="select">
-      <label>Codon Usage File</label>
-      <option value="Eyeastcai.cut">Eyeastcai.cut</option>
-      <option value="Ehum.cut">Ehum.cut</option>
-      <option value="Eacc.cut">Eacc.cut</option>
-      <option value="Eadenovirus5.cut">Eadenovirus5.cut</option>
-      <option value="Eadenovirus7.cut">Eadenovirus7.cut</option>
-      <option value="Eaidlav.cut">Eaidlav.cut</option>
-      <option value="Eanasp.cut">Eanasp.cut</option>
-      <option value="Eani.cut">Eani.cut</option>
-      <option value="Eani_h.cut">Eani_h.cut</option>
-      <option value="Eanidmit.cut">Eanidmit.cut</option>
-      <option value="Easn.cut">Easn.cut</option>
-      <option value="Eath.cut">Eath.cut</option>
-      <option value="Eatu.cut">Eatu.cut</option>
-      <option value="Eavi.cut">Eavi.cut</option>
-      <option value="Ebja.cut">Ebja.cut</option>
-      <option value="Ebly.cut">Ebly.cut</option>
-      <option value="Ebme.cut">Ebme.cut</option>
-      <option value="Ebmo.cut">Ebmo.cut</option>
-      <option value="Ebna.cut">Ebna.cut</option>
-      <option value="Ebov.cut">Ebov.cut</option>
-      <option value="Ebovsp.cut">Ebovsp.cut</option>
-      <option value="Ebst.cut">Ebst.cut</option>
-      <option value="Ebsu.cut">Ebsu.cut</option>
-      <option value="Ebsu_h.cut">Ebsu_h.cut</option>
-      <option value="Ecac.cut">Ecac.cut</option>
-      <option value="Ecal.cut">Ecal.cut</option>
-      <option value="Eccr.cut">Eccr.cut</option>
-      <option value="Ecel.cut">Ecel.cut</option>
-      <option value="Echi.cut">Echi.cut</option>
-      <option value="Echicken.cut">Echicken.cut</option>
-      <option value="Echisp.cut">Echisp.cut</option>
-      <option value="Echk.cut">Echk.cut</option>
-      <option value="Echmp.cut">Echmp.cut</option>
-      <option value="Echnt.cut">Echnt.cut</option>
-      <option value="Echos.cut">Echos.cut</option>
-      <option value="Echzm.cut">Echzm.cut</option>
-      <option value="Echzmrubp.cut">Echzmrubp.cut</option>
-      <option value="Ecpx.cut">Ecpx.cut</option>
-      <option value="Ecre.cut">Ecre.cut</option>
-      <option value="Ecrisp.cut">Ecrisp.cut</option>
-      <option value="Ectr.cut">Ectr.cut</option>
-      <option value="Edayhoff.cut">Edayhoff.cut</option>
-      <option value="Eddi.cut">Eddi.cut</option>
-      <option value="Eddi_h.cut">Eddi_h.cut</option>
-      <option value="Edog.cut">Edog.cut</option>
-      <option value="Edro.cut">Edro.cut</option>
-      <option value="Edro_h.cut">Edro_h.cut</option>
-      <option value="Edrosophila.cut">Edrosophila.cut</option>
-      <option value="Eeca.cut">Eeca.cut</option>
-      <option value="Eeco.cut">Eeco.cut</option>
-      <option value="Eeco_h.cut">Eeco_h.cut</option>
-      <option value="Eecoli.cut">Eecoli.cut</option>
-      <option value="Ef1.cut">Ef1.cut</option>
-      <option value="Efish.cut">Efish.cut</option>
-      <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option>
-      <option value="Eham.cut">Eham.cut</option>
-      <option value="Ehha.cut">Ehha.cut</option>
-      <option value="Ehin.cut">Ehin.cut</option>
-      <option value="Ehma.cut">Ehma.cut</option>
-      <option value="Ehuman.cut">Ehuman.cut</option>
-      <option value="Ekla.cut">Ekla.cut</option>
-      <option value="Ekpn.cut">Ekpn.cut</option>
-      <option value="Ella.cut">Ella.cut</option>
-      <option value="Emac.cut">Emac.cut</option>
-      <option value="Emaize.cut">Emaize.cut</option>
-      <option value="Emam_h.cut">Emam_h.cut</option>
-      <option value="Emixlg.cut">Emixlg.cut</option>
-      <option value="Emouse.cut">Emouse.cut</option>
-      <option value="Emsa.cut">Emsa.cut</option>
-      <option value="Emse.cut">Emse.cut</option>
-      <option value="Emta.cut">Emta.cut</option>
-      <option value="Emtu.cut">Emtu.cut</option>
-      <option value="Emus.cut">Emus.cut</option>
-      <option value="Emussp.cut">Emussp.cut</option>
-      <option value="Emva.cut">Emva.cut</option>
-      <option value="Emze.cut">Emze.cut</option>
-      <option value="Emzecp.cut">Emzecp.cut</option>
-      <option value="Encr.cut">Encr.cut</option>
-      <option value="Eneu.cut">Eneu.cut</option>
-      <option value="Engo.cut">Engo.cut</option>
-      <option value="Eoncsp.cut">Eoncsp.cut</option>
-      <option value="Epae.cut">Epae.cut</option>
-      <option value="Epea.cut">Epea.cut</option>
-      <option value="Epet.cut">Epet.cut</option>
-      <option value="Epfa.cut">Epfa.cut</option>
-      <option value="Ephix174.cut">Ephix174.cut</option>
-      <option value="Ephv.cut">Ephv.cut</option>
-      <option value="Ephy.cut">Ephy.cut</option>
-      <option value="Epig.cut">Epig.cut</option>
-      <option value="Epolyomaa2.cut">Epolyomaa2.cut</option>
-      <option value="Epombe.cut">Epombe.cut</option>
-      <option value="Epombecai.cut">Epombecai.cut</option>
-      <option value="Epot.cut">Epot.cut</option>
-      <option value="Eppu.cut">Eppu.cut</option>
-      <option value="Epse.cut">Epse.cut</option>
-      <option value="Epsy.cut">Epsy.cut</option>
-      <option value="Epvu.cut">Epvu.cut</option>
-      <option value="Erab.cut">Erab.cut</option>
-      <option value="Erabbit.cut">Erabbit.cut</option>
-      <option value="Erabsp.cut">Erabsp.cut</option>
-      <option value="Erat.cut">Erat.cut</option>
-      <option value="Eratsp.cut">Eratsp.cut</option>
-      <option value="Erca.cut">Erca.cut</option>
-      <option value="Erhm.cut">Erhm.cut</option>
-      <option value="Eric.cut">Eric.cut</option>
-      <option value="Erle.cut">Erle.cut</option>
-      <option value="Erme.cut">Erme.cut</option>
-      <option value="Ersp.cut">Ersp.cut</option>
-      <option value="Esalsp.cut">Esalsp.cut</option>
-      <option value="Esau.cut">Esau.cut</option>
-      <option value="Esco.cut">Esco.cut</option>
-      <option value="Esgi.cut">Esgi.cut</option>
-      <option value="Eshp.cut">Eshp.cut</option>
-      <option value="Eshpsp.cut">Eshpsp.cut</option>
-      <option value="Esli.cut">Esli.cut</option>
-      <option value="Eslm.cut">Eslm.cut</option>
-      <option value="Esma.cut">Esma.cut</option>
-      <option value="Esmi.cut">Esmi.cut</option>
-      <option value="Esmu.cut">Esmu.cut</option>
-      <option value="Esoy.cut">Esoy.cut</option>
-      <option value="Espi.cut">Espi.cut</option>
-      <option value="Espn.cut">Espn.cut</option>
-      <option value="Espo.cut">Espo.cut</option>
-      <option value="Espo_h.cut">Espo_h.cut</option>
-      <option value="Espu.cut">Espu.cut</option>
-      <option value="Esta.cut">Esta.cut</option>
-      <option value="Esty.cut">Esty.cut</option>
-      <option value="Esus.cut">Esus.cut</option>
-      <option value="Esv40.cut">Esv40.cut</option>
-      <option value="Esyhsp.cut">Esyhsp.cut</option>
-      <option value="Esynsp.cut">Esynsp.cut</option>
-      <option value="Etbr.cut">Etbr.cut</option>
-      <option value="Etcr.cut">Etcr.cut</option>
-      <option value="Eter.cut">Eter.cut</option>
-      <option value="Etetsp.cut">Etetsp.cut</option>
-      <option value="Etob.cut">Etob.cut</option>
-      <option value="Etobcp.cut">Etobcp.cut</option>
-      <option value="Etom.cut">Etom.cut</option>
-      <option value="Etrb.cut">Etrb.cut</option>
-      <option value="Evco.cut">Evco.cut</option>
-      <option value="Ewht.cut">Ewht.cut</option>
-      <option value="Exel.cut">Exel.cut</option>
-      <option value="Exenopus.cut">Exenopus.cut</option>
-      <option value="Eyeast.cut">Eyeast.cut</option>
-      <option value="Eyen.cut">Eyen.cut</option>
-      <option value="Eysc.cut">Eysc.cut</option>
-      <option value="Eysc_h.cut">Eysc_h.cut</option>
-      <option value="Eyscmt.cut">Eyscmt.cut</option>
-      <option value="Eysp.cut">Eysp.cut</option>
-      <option value="Ezebrafish.cut">Ezebrafish.cut</option>
-      <option value="Ezma.cut">Ezma.cut</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="cai" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="cfile" value="Eyeastcai.cut"/>
-      <output name="out_file1" file="emboss_cai_out.cai"/>
-    </test>
-  </tests>
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cai.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_cai_custom.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-<tool id="EMBOSS: cai_custom6" name="cai custom" version="5.0.0">
-  <description>CAI codon adaptation index using custom codon usage file</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>cai -seqall $input1 -outfile $out_file1 -cfile $input2 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param format="txt" name="input2" type="data">
-      <label>Codon Usage File</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cai_custom.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_chaos.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<tool id="EMBOSS: chaos7" name="chaos" version="5.0.0">
-  <description>Create a chaos game representation plot for a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl chaos -sequence $input1 -graph png -goutfile $out_file1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-<!--    <tests>
-    <test>
-         puts name of file into the png
-    </test>
-  </tests> -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/chaos.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_charge.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-<tool id="EMBOSS: charge8" name="charge" version="5.0.0">
-  <description>Protein charge plot</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>charge -seqall $input1 -outfile $out_file1 -window $window -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="window" size="4" type="text" value="5">
-      <label>Window Size</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="charge" name="out_file1" />
-  </outputs>
- <!--   <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="5"/>
-      <output name="out_file1" file="emboss_charge_out.charge"/>
-    </test>
-  </tests>-->
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/charge.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_checktrans.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="EMBOSS: checktrans9" name="checktrans" version="5.0.0">
-  <description>Reports STOP codons and ORF statistics of a protein</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>checktrans -sequence $input1 -outfile $out_file1 -outseq $out_file2 -osformat3 $out_format2 -outfeat $out_file3 -offormat4 $out_format3 -orfml $orfml -addlast $addlast -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="orfml" size="4" type="text" value="100">
-      <label>Minimum ORF Length to report</label>
-    </param>
-    <param name="addlast" type="select">
-      <label>An asterisk in the protein sequence indicates the position of a STOP codon. Checktrans assumes that all ORFs end in a STOP codon. Forcing the sequence to end with an asterisk, if there
-      is not one there already, makes checktrans treat the end as a potential ORF. If an asterisk is added, it is not included in the reported count of STOPs</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format2" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-    <param name="out_format3" type="select">
-      <label>Output Feature File Format</label>
-      <option value="gff">GFF</option>
-      <option value="embl">EMBL</option>
-      <option value="swiss">SwissProt</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="checktrans" name="out_file1" />
-    <data format="fasta" name="out_file2" />
-    <data format="gff" name="out_file3" />
-  </outputs>
- <!--   <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="orfml" value="100"/>
-      <param name="addlast" value="yes"/>
-      <param name="out_format2" value="fasta"/>
-      <param name="out_format3" value="gff"/>
-      <output name="out_file1" file="emboss_checktrans_out1.txt"/>
-      <output name="out_file2" file="emboss_checktrans_out2.fasta"/>
-      <output name="out_file3" file="emboss_checktrans_out3.gff"/>
-    </test>
-  </tests> -->
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/checktrans.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_chips.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<tool id="EMBOSS: chips10" name="chips" version="5.0.0">
-  <description>Codon usage statistics</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>chips -seqall $input1 -outfile $out_file1 -sum $sum -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="sum" type="select">
-      <label>Sum codons over all sequences</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="chips" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="sum" value="yes"/>
-      <output name="out_file1" file="emboss_chips_out.chips"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/chips.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_cirdna.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<tool id="EMBOSS: cirdna11" name="cirdna" version="5.0.0">
-  <description>Draws circular maps of DNA constructs</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl cirdna -infile $input1 -graphout png -goutfile $out_file1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <!--    <tests>
-    <test>
-         puts name of file into the png
-    </test>
-  </tests> -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cirdna.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_codcmp.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,330 +0,0 @@
-<tool id="EMBOSS: codcmp12" name="codcmp" version="5.0.0">
-  <description>Codon usage table comparison</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>codcmp -first $cfile1 -second $cfile2 -outfile $out_file1 -auto</command>
-  <inputs>
-    <param name="cfile1" type="select">
-      <label>Codon Usage File 1</label>
-      <option value="Ehum.cut">Ehum.cut</option>
-      <option value="Eacc.cut">Eacc.cut</option>
-      <option value="Eadenovirus5.cut">Eadenovirus5.cut</option>
-      <option value="Eadenovirus7.cut">Eadenovirus7.cut</option>
-      <option value="Eaidlav.cut">Eaidlav.cut</option>
-      <option value="Eanasp.cut">Eanasp.cut</option>
-      <option value="Eani.cut">Eani.cut</option>
-      <option value="Eani_h.cut">Eani_h.cut</option>
-      <option value="Eanidmit.cut">Eanidmit.cut</option>
-      <option value="Easn.cut">Easn.cut</option>
-      <option value="Eath.cut">Eath.cut</option>
-      <option value="Eatu.cut">Eatu.cut</option>
-      <option value="Eavi.cut">Eavi.cut</option>
-      <option value="Ebja.cut">Ebja.cut</option>
-      <option value="Ebly.cut">Ebly.cut</option>
-      <option value="Ebme.cut">Ebme.cut</option>
-      <option value="Ebmo.cut">Ebmo.cut</option>
-      <option value="Ebna.cut">Ebna.cut</option>
-      <option value="Ebov.cut">Ebov.cut</option>
-      <option value="Ebovsp.cut">Ebovsp.cut</option>
-      <option value="Ebst.cut">Ebst.cut</option>
-      <option value="Ebsu.cut">Ebsu.cut</option>
-      <option value="Ebsu_h.cut">Ebsu_h.cut</option>
-      <option value="Ecac.cut">Ecac.cut</option>
-      <option value="Ecal.cut">Ecal.cut</option>
-      <option value="Eccr.cut">Eccr.cut</option>
-      <option value="Ecel.cut">Ecel.cut</option>
-      <option value="Echi.cut">Echi.cut</option>
-      <option value="Echicken.cut">Echicken.cut</option>
-      <option value="Echisp.cut">Echisp.cut</option>
-      <option value="Echk.cut">Echk.cut</option>
-      <option value="Echmp.cut">Echmp.cut</option>
-      <option value="Echnt.cut">Echnt.cut</option>
-      <option value="Echos.cut">Echos.cut</option>
-      <option value="Echzm.cut">Echzm.cut</option>
-      <option value="Echzmrubp.cut">Echzmrubp.cut</option>
-      <option value="Ecpx.cut">Ecpx.cut</option>
-      <option value="Ecre.cut">Ecre.cut</option>
-      <option value="Ecrisp.cut">Ecrisp.cut</option>
-      <option value="Ectr.cut">Ectr.cut</option>
-      <option value="Edayhoff.cut">Edayhoff.cut</option>
-      <option value="Eddi.cut">Eddi.cut</option>
-      <option value="Eddi_h.cut">Eddi_h.cut</option>
-      <option value="Edog.cut">Edog.cut</option>
-      <option value="Edro.cut">Edro.cut</option>
-      <option value="Edro_h.cut">Edro_h.cut</option>
-      <option value="Edrosophila.cut">Edrosophila.cut</option>
-      <option value="Eeca.cut">Eeca.cut</option>
-      <option value="Eeco.cut">Eeco.cut</option>
-      <option value="Eeco_h.cut">Eeco_h.cut</option>
-      <option value="Eecoli.cut">Eecoli.cut</option>
-      <option value="Ef1.cut">Ef1.cut</option>
-      <option value="Efish.cut">Efish.cut</option>
-      <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option>
-      <option value="Eham.cut">Eham.cut</option>
-      <option value="Ehha.cut">Ehha.cut</option>
-      <option value="Ehin.cut">Ehin.cut</option>
-      <option value="Ehma.cut">Ehma.cut</option>
-      <option value="Ehuman.cut">Ehuman.cut</option>
-      <option value="Ekla.cut">Ekla.cut</option>
-      <option value="Ekpn.cut">Ekpn.cut</option>
-      <option value="Ella.cut">Ella.cut</option>
-      <option value="Emac.cut">Emac.cut</option>
-      <option value="Emaize.cut">Emaize.cut</option>
-      <option value="Emam_h.cut">Emam_h.cut</option>
-      <option value="Emixlg.cut">Emixlg.cut</option>
-      <option value="Emouse.cut">Emouse.cut</option>
-      <option value="Emsa.cut">Emsa.cut</option>
-      <option value="Emse.cut">Emse.cut</option>
-      <option value="Emta.cut">Emta.cut</option>
-      <option value="Emtu.cut">Emtu.cut</option>
-      <option value="Emus.cut">Emus.cut</option>
-      <option value="Emussp.cut">Emussp.cut</option>
-      <option value="Emva.cut">Emva.cut</option>
-      <option value="Emze.cut">Emze.cut</option>
-      <option value="Emzecp.cut">Emzecp.cut</option>
-      <option value="Encr.cut">Encr.cut</option>
-      <option value="Eneu.cut">Eneu.cut</option>
-      <option value="Engo.cut">Engo.cut</option>
-      <option value="Eoncsp.cut">Eoncsp.cut</option>
-      <option value="Epae.cut">Epae.cut</option>
-      <option value="Epea.cut">Epea.cut</option>
-      <option value="Epet.cut">Epet.cut</option>
-      <option value="Epfa.cut">Epfa.cut</option>
-      <option value="Ephix174.cut">Ephix174.cut</option>
-      <option value="Ephv.cut">Ephv.cut</option>
-      <option value="Ephy.cut">Ephy.cut</option>
-      <option value="Epig.cut">Epig.cut</option>
-      <option value="Epolyomaa2.cut">Epolyomaa2.cut</option>
-      <option value="Epombe.cut">Epombe.cut</option>
-      <option value="Epombecai.cut">Epombecai.cut</option>
-      <option value="Epot.cut">Epot.cut</option>
-      <option value="Eppu.cut">Eppu.cut</option>
-      <option value="Epse.cut">Epse.cut</option>
-      <option value="Epsy.cut">Epsy.cut</option>
-      <option value="Epvu.cut">Epvu.cut</option>
-      <option value="Erab.cut">Erab.cut</option>
-      <option value="Erabbit.cut">Erabbit.cut</option>
-      <option value="Erabsp.cut">Erabsp.cut</option>
-      <option value="Erat.cut">Erat.cut</option>
-      <option value="Eratsp.cut">Eratsp.cut</option>
-      <option value="Erca.cut">Erca.cut</option>
-      <option value="Erhm.cut">Erhm.cut</option>
-      <option value="Eric.cut">Eric.cut</option>
-      <option value="Erle.cut">Erle.cut</option>
-      <option value="Erme.cut">Erme.cut</option>
-      <option value="Ersp.cut">Ersp.cut</option>
-      <option value="Esalsp.cut">Esalsp.cut</option>
-      <option value="Esau.cut">Esau.cut</option>
-      <option value="Esco.cut">Esco.cut</option>
-      <option value="Esgi.cut">Esgi.cut</option>
-      <option value="Eshp.cut">Eshp.cut</option>
-      <option value="Eshpsp.cut">Eshpsp.cut</option>
-      <option value="Esli.cut">Esli.cut</option>
-      <option value="Eslm.cut">Eslm.cut</option>
-      <option value="Esma.cut">Esma.cut</option>
-      <option value="Esmi.cut">Esmi.cut</option>
-      <option value="Esmu.cut">Esmu.cut</option>
-      <option value="Esoy.cut">Esoy.cut</option>
-      <option value="Espi.cut">Espi.cut</option>
-      <option value="Espn.cut">Espn.cut</option>
-      <option value="Espo.cut">Espo.cut</option>
-      <option value="Espo_h.cut">Espo_h.cut</option>
-      <option value="Espu.cut">Espu.cut</option>
-      <option value="Esta.cut">Esta.cut</option>
-      <option value="Esty.cut">Esty.cut</option>
-      <option value="Esus.cut">Esus.cut</option>
-      <option value="Esv40.cut">Esv40.cut</option>
-      <option value="Esyhsp.cut">Esyhsp.cut</option>
-      <option value="Esynsp.cut">Esynsp.cut</option>
-      <option value="Etbr.cut">Etbr.cut</option>
-      <option value="Etcr.cut">Etcr.cut</option>
-      <option value="Eter.cut">Eter.cut</option>
-      <option value="Etetsp.cut">Etetsp.cut</option>
-      <option value="Etob.cut">Etob.cut</option>
-      <option value="Etobcp.cut">Etobcp.cut</option>
-      <option value="Etom.cut">Etom.cut</option>
-      <option value="Etrb.cut">Etrb.cut</option>
-      <option value="Evco.cut">Evco.cut</option>
-      <option value="Ewht.cut">Ewht.cut</option>
-      <option value="Exel.cut">Exel.cut</option>
-      <option value="Exenopus.cut">Exenopus.cut</option>
-      <option value="Eyeast.cut">Eyeast.cut</option>
-      <option value="Eyeastcai.cut">Eyeastcai.cut</option>
-      <option value="Eyen.cut">Eyen.cut</option>
-      <option value="Eysc.cut">Eysc.cut</option>
-      <option value="Eysc_h.cut">Eysc_h.cut</option>
-      <option value="Eyscmt.cut">Eyscmt.cut</option>
-      <option value="Eysp.cut">Eysp.cut</option>
-      <option value="Ezebrafish.cut">Ezebrafish.cut</option>
-      <option value="Ezma.cut">Ezma.cut</option>
-    </param>
-    <param name="cfile2" type="select">
-      <label>Codon Usage File 2</label>
-      <option value="Ehum.cut">Ehum.cut</option>
-      <option value="Eacc.cut">Eacc.cut</option>
-      <option value="Eadenovirus5.cut">Eadenovirus5.cut</option>
-      <option value="Eadenovirus7.cut">Eadenovirus7.cut</option>
-      <option value="Eaidlav.cut">Eaidlav.cut</option>
-      <option value="Eanasp.cut">Eanasp.cut</option>
-      <option value="Eani.cut">Eani.cut</option>
-      <option value="Eani_h.cut">Eani_h.cut</option>
-      <option value="Eanidmit.cut">Eanidmit.cut</option>
-      <option value="Easn.cut">Easn.cut</option>
-      <option value="Eath.cut">Eath.cut</option>
-      <option value="Eatu.cut">Eatu.cut</option>
-      <option value="Eavi.cut">Eavi.cut</option>
-      <option value="Ebja.cut">Ebja.cut</option>
-      <option value="Ebly.cut">Ebly.cut</option>
-      <option value="Ebme.cut">Ebme.cut</option>
-      <option value="Ebmo.cut">Ebmo.cut</option>
-      <option value="Ebna.cut">Ebna.cut</option>
-      <option value="Ebov.cut">Ebov.cut</option>
-      <option value="Ebovsp.cut">Ebovsp.cut</option>
-      <option value="Ebst.cut">Ebst.cut</option>
-      <option value="Ebsu.cut">Ebsu.cut</option>
-      <option value="Ebsu_h.cut">Ebsu_h.cut</option>
-      <option value="Ecac.cut">Ecac.cut</option>
-      <option value="Ecal.cut">Ecal.cut</option>
-      <option value="Eccr.cut">Eccr.cut</option>
-      <option value="Ecel.cut">Ecel.cut</option>
-      <option value="Echi.cut">Echi.cut</option>
-      <option value="Echicken.cut">Echicken.cut</option>
-      <option value="Echisp.cut">Echisp.cut</option>
-      <option value="Echk.cut">Echk.cut</option>
-      <option value="Echmp.cut">Echmp.cut</option>
-      <option value="Echnt.cut">Echnt.cut</option>
-      <option value="Echos.cut">Echos.cut</option>
-      <option value="Echzm.cut">Echzm.cut</option>
-      <option value="Echzmrubp.cut">Echzmrubp.cut</option>
-      <option value="Ecpx.cut">Ecpx.cut</option>
-      <option value="Ecre.cut">Ecre.cut</option>
-      <option value="Ecrisp.cut">Ecrisp.cut</option>
-      <option value="Ectr.cut">Ectr.cut</option>
-      <option value="Edayhoff.cut">Edayhoff.cut</option>
-      <option value="Eddi.cut">Eddi.cut</option>
-      <option value="Eddi_h.cut">Eddi_h.cut</option>
-      <option value="Edog.cut">Edog.cut</option>
-      <option value="Edro.cut">Edro.cut</option>
-      <option value="Edro_h.cut">Edro_h.cut</option>
-      <option value="Edrosophila.cut">Edrosophila.cut</option>
-      <option value="Eeca.cut">Eeca.cut</option>
-      <option value="Eeco.cut">Eeco.cut</option>
-      <option value="Eeco_h.cut">Eeco_h.cut</option>
-      <option value="Eecoli.cut">Eecoli.cut</option>
-      <option value="Ef1.cut">Ef1.cut</option>
-      <option value="Efish.cut">Efish.cut</option>
-      <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option>
-      <option value="Eham.cut">Eham.cut</option>
-      <option value="Ehha.cut">Ehha.cut</option>
-      <option value="Ehin.cut">Ehin.cut</option>
-      <option value="Ehma.cut">Ehma.cut</option>
-      <option value="Ehuman.cut">Ehuman.cut</option>
-      <option value="Ekla.cut">Ekla.cut</option>
-      <option value="Ekpn.cut">Ekpn.cut</option>
-      <option value="Ella.cut">Ella.cut</option>
-      <option value="Emac.cut">Emac.cut</option>
-      <option value="Emaize.cut">Emaize.cut</option>
-      <option value="Emam_h.cut">Emam_h.cut</option>
-      <option value="Emixlg.cut">Emixlg.cut</option>
-      <option value="Emouse.cut">Emouse.cut</option>
-      <option value="Emsa.cut">Emsa.cut</option>
-      <option value="Emse.cut">Emse.cut</option>
-      <option value="Emta.cut">Emta.cut</option>
-      <option value="Emtu.cut">Emtu.cut</option>
-      <option value="Emus.cut">Emus.cut</option>
-      <option value="Emussp.cut">Emussp.cut</option>
-      <option value="Emva.cut">Emva.cut</option>
-      <option value="Emze.cut">Emze.cut</option>
-      <option value="Emzecp.cut">Emzecp.cut</option>
-      <option value="Encr.cut">Encr.cut</option>
-      <option value="Eneu.cut">Eneu.cut</option>
-      <option value="Engo.cut">Engo.cut</option>
-      <option value="Eoncsp.cut">Eoncsp.cut</option>
-      <option value="Epae.cut">Epae.cut</option>
-      <option value="Epea.cut">Epea.cut</option>
-      <option value="Epet.cut">Epet.cut</option>
-      <option value="Epfa.cut">Epfa.cut</option>
-      <option value="Ephix174.cut">Ephix174.cut</option>
-      <option value="Ephv.cut">Ephv.cut</option>
-      <option value="Ephy.cut">Ephy.cut</option>
-      <option value="Epig.cut">Epig.cut</option>
-      <option value="Epolyomaa2.cut">Epolyomaa2.cut</option>
-      <option value="Epombe.cut">Epombe.cut</option>
-      <option value="Epombecai.cut">Epombecai.cut</option>
-      <option value="Epot.cut">Epot.cut</option>
-      <option value="Eppu.cut">Eppu.cut</option>
-      <option value="Epse.cut">Epse.cut</option>
-      <option value="Epsy.cut">Epsy.cut</option>
-      <option value="Epvu.cut">Epvu.cut</option>
-      <option value="Erab.cut">Erab.cut</option>
-      <option value="Erabbit.cut">Erabbit.cut</option>
-      <option value="Erabsp.cut">Erabsp.cut</option>
-      <option value="Erat.cut">Erat.cut</option>
-      <option value="Eratsp.cut">Eratsp.cut</option>
-      <option value="Erca.cut">Erca.cut</option>
-      <option value="Erhm.cut">Erhm.cut</option>
-      <option value="Eric.cut">Eric.cut</option>
-      <option value="Erle.cut">Erle.cut</option>
-      <option value="Erme.cut">Erme.cut</option>
-      <option value="Ersp.cut">Ersp.cut</option>
-      <option value="Esalsp.cut">Esalsp.cut</option>
-      <option value="Esau.cut">Esau.cut</option>
-      <option value="Esco.cut">Esco.cut</option>
-      <option value="Esgi.cut">Esgi.cut</option>
-      <option value="Eshp.cut">Eshp.cut</option>
-      <option value="Eshpsp.cut">Eshpsp.cut</option>
-      <option value="Esli.cut">Esli.cut</option>
-      <option value="Eslm.cut">Eslm.cut</option>
-      <option value="Esma.cut">Esma.cut</option>
-      <option value="Esmi.cut">Esmi.cut</option>
-      <option value="Esmu.cut">Esmu.cut</option>
-      <option value="Esoy.cut">Esoy.cut</option>
-      <option value="Espi.cut">Espi.cut</option>
-      <option value="Espn.cut">Espn.cut</option>
-      <option value="Espo.cut">Espo.cut</option>
-      <option value="Espo_h.cut">Espo_h.cut</option>
-      <option value="Espu.cut">Espu.cut</option>
-      <option value="Esta.cut">Esta.cut</option>
-      <option value="Esty.cut">Esty.cut</option>
-      <option value="Esus.cut">Esus.cut</option>
-      <option value="Esv40.cut">Esv40.cut</option>
-      <option value="Esyhsp.cut">Esyhsp.cut</option>
-      <option value="Esynsp.cut">Esynsp.cut</option>
-      <option value="Etbr.cut">Etbr.cut</option>
-      <option value="Etcr.cut">Etcr.cut</option>
-      <option value="Eter.cut">Eter.cut</option>
-      <option value="Etetsp.cut">Etetsp.cut</option>
-      <option value="Etob.cut">Etob.cut</option>
-      <option value="Etobcp.cut">Etobcp.cut</option>
-      <option value="Etom.cut">Etom.cut</option>
-      <option value="Etrb.cut">Etrb.cut</option>
-      <option value="Evco.cut">Evco.cut</option>
-      <option value="Ewht.cut">Ewht.cut</option>
-      <option value="Exel.cut">Exel.cut</option>
-      <option value="Exenopus.cut">Exenopus.cut</option>
-      <option value="Eyeast.cut">Eyeast.cut</option>
-      <option value="Eyeastcai.cut">Eyeastcai.cut</option>
-      <option value="Eyen.cut">Eyen.cut</option>
-      <option value="Eysc.cut">Eysc.cut</option>
-      <option value="Eysc_h.cut">Eysc_h.cut</option>
-      <option value="Eyscmt.cut">Eyscmt.cut</option>
-      <option value="Eysp.cut">Eysp.cut</option>
-      <option value="Ezebrafish.cut">Ezebrafish.cut</option>
-      <option value="Ezma.cut">Ezma.cut</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="codcmp" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="cfile1" value="Ehum.cut"/>
-      <param name="cfile2" value="Eacc.cut"/>
-      <output name="out_file1" file="emboss_codcmp_out.codcmp"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/codcmp.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_coderet.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-<tool id="EMBOSS: coderet13" name="coderet" version="5.0.0">
-  <description>Extract CDS, mRNA and translations from feature tables</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <!--  <command>coderet -seqall $input1 -outfile $out_file1 -osformat2 $out_format1 -cds $cds -mrna $mrna -translation $translation -auto</command>-->
-  <command>coderet -seqall $input1 -outfile $out_file1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <!--
-    <param name="cds" type="boolean" truevalue="yes" falsevalue="no" checked="true">
-      <label>Extract CDS sequences</label>
-    </param>
-    <param name="mrna" type="select">
-      <label>Extract mRNA sequences</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="translation" type="select">
-      <label>Extract translated sequences</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-    -->
-  </inputs>
-  <outputs>
-    <data format="coderet" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <output name="out_file1" file="emboss_coderet_out.coderet"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/coderet.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_compseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="EMBOSS: compseq14" name="compseq" version="5.0.0">
-  <description>Count composition of dimer/trimer/etc words in a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>compseq -sequence $input1 -outfile $out_file1 -word $word -frame $frame -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="word" size="4" type="text" value="2">
-      <label>Size of word (window) to count</label>
-    </param>
-    <param name="frame" type="select">
-      <label>Frame to inspect</label>
-      <option value="0">All Frames</option>
-      <option value="1">Frame 1</option>
-      <option value="2">Frame 2</option>
-      <option value="3">Frame 3</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="compseq" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="word" value="2"/>
-      <param name="frame" value="0"/>
-      <output name="out_file1" file="emboss_compseq_out.compseq"/>
-    </test>
-  </tests>
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/compseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_cpgplot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<tool id="EMBOSS: cpgplot15" name="cpgplot" version="5.0.0">
-  <description>Plot CpG rich areas</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_cpgplot_wrapper.pl cpgplot -sequence $input1 -window $window -minlen $minlen -minpc $minpc -outfile $outfile -graph png -goutfile $goutfile -outfeat $outfeat -minoe $minoe -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="window" size="4" type="integer" value="100">
-      <label>Window Size</label>
-    </param>
-    <param name="minlen" size="4" type="integer" value="200">
-      <label>Minimum length</label>
-    </param>
-    <param name="minoe" size="4" type="float" value="0.6">
-      <label>Minimum average observed to expected ratio</label>
-    </param>
-    <param name="minpc" size="4" type="float" value="50.0">
-      <label>Minimum average percentage of G plus C</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="cpgplot" name="outfile" />
-    <data format="png" name="goutfile" />
-    <data format="gff" name="outfeat" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cpgplot.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_cpgplot_wrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-#! /usr/bin/perl -w
-use strict;
-use File::Copy;
-
-my $cmd_string = join (" ",@ARGV);
-my $results = `$cmd_string`;
-my @files = split("\n",$results);
-my $fileNameOut = $ARGV[14];
-move($fileNameOut.".1.png",$fileNameOut);
--- a/tools/emboss_5/emboss_cpgreport.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-<tool id="EMBOSS: cpgreport16" name="cpgreport" version="5.0.0">
-  <description>Reports all CpG rich regions</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>cpgreport -sequence $input1 -outfile $out_file1 -outfeat $out_file2 -offormat3 $out_format2 -score $score -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="score" size="4" type="text" value="17">
-      <label>Score for each CG sequence found (1-200)</label>
-    </param>
-    <param name="out_format2" type="select">
-      <label>Output Feature File Format</label>
-      <option value="gff">GFF</option>
-      <option value="embl">EMBL</option>
-      <option value="swiss">SwissProt</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="cpgreport" name="out_file1" />
-    <data format="gff" name="out_file2" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="score" value="17"/>
-      <param name="out_format2" value="gff"/>
-      <output name="out_file2" file="emboss_cpgreport_out2.cpgreport"/>
-    </test>
-    <!-- <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="score" value="17"/>
-      <param name="out_format2" value="gff"/>
-      <output name="out_file1" file="emboss_cpgreport_out1.gff"/>
-    </test>  -->
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cpgreport.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_cusp.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<tool id="EMBOSS: cusp17" name="cusp" version="5.0.0">
-  <description>Create a codon usage table</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>cusp -sequence $input1 -outfile $out_file1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="cusp" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <output name="out_file1" file="emboss_cusp_out.cusp"/>
-    </test>
-  </tests>
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cusp.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_cutseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="EMBOSS: cutseq18" name="cutseq" version="5.0.0">
-  <description>Removes a specified section from a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>cutseq -sequence $input1 -outseq $out_file1 -from $from -to $to -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="from" size="8" type="text" value="1">
-      <label>Start of region to delete</label>
-    </param>
-    <param name="to" size="8" type="text" value="1">
-      <label>End of region to delete</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="from" value="1"/>
-      <param name="to" value="1"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_cutseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cutseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_dan.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-<tool id="EMBOSS: dan19" name="dan" version="5.0.0">
-  <description>Calculates DNA RNA/DNA melting temperature</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl dan -sequence $input1 -windowsize $window -goutfile $out_file1 -graph png -plot $plot1 -shiftincrement $shift -dnaconc $dnaconc
-  -saltconc $saltconc -product $product -formamide $formamide -mismatch $mismatch -prodlen $prodlen -thermo $thermo -temperature $temperature -rna $rna -outfile $out_file1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="window" size="4" type="text" value="20">
-      <label>Window Size</label>
-    </param>
-    <param name="shift" size="4" type="text" value="1">
-      <label>Step size (shift increment)</label>
-    </param>
-    <param name="dnaconc" size="4" type="text" value="50.0">
-      <label>DNA Concentration (nM)</label>
-    </param>
-    <param name="saltconc" size="4" type="text" value="50.0">
-      <label>Salt concentration (mM)</label>
-    </param>
-    <param name="thermo" type="select">
-      <label>Output the DeltaG, DeltaH and DeltaS values</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="temperature" size="4" type="text" value="25 ">
-      <label>Temperature at which to calculate the DeltaG, DeltaH and DeltaS values</label>
-    </param>
-    <param name="rna" type="select">
-      <label>Sequence is RNA</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="product" type="select">
-      <label>Include percent formamide, percent of mismatches allowed and product length</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="formamide" size="4" type="text" value="0 ">
-      <label>Formamide concentration (nM)</label>
-    </param>
-    <param name="mismatch" size="4" type="text" value="0 ">
-      <label>Percent mismatch to be used in calculations</label>
-    </param>
-    <param name="prodlen" size="4" type="text" value="20">
-      <label>Product length to be used in calculations</label>
-    </param>
-    <param name="plot1" type="select">
-      <label>Create a graph</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="dan" name="out_file1" />
-  </outputs>
-  <!--
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="20"/>
-      <param name="shift" value="1"/>
-      <param name="dnaconc" value="50"/>
-      <param name="saltconc" value="50"/>
-      <param name="thermo" value="yes"/>
-      <param name="temperature" value="25"/>
-      <param name="rna" value="no"/>
-      <param name="product" value="no"/>
-      <param name="formamide" value="0"/>
-      <param name="mismatch" value="0"/>
-      <param name="prodlen" value="20"/>
-      <param name="plot1" value="yes"/>
-      <output name="out_file1" file="emboss_dan_out.png"/>
-    </test>
-  </tests>
-  -->
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/dan.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_degapseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-<tool id="EMBOSS: degapseq20" name="degapseq" version="5.0.0">
-  <description>Removes gap characters from sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>degapseq -sequence $input1 -outseq $out_file1 -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_degapseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/degapseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_descseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="EMBOSS: descseq21" name="descseq" version="5.0.0">
-  <description>Alter the name or description of a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>descseq -sequence $input1 -outseq $out_file1 -name "$seqname" -description "$desc" -append $append -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="seqname" size="50" type="text" value="">
-      <label>Name of the sequence</label>
-    </param>
-    <param name="desc" size="50" type="text" value="">
-      <label>Description of the sequence</label>
-    </param>
-    <param name="append" type="select">
-      <label>Append the name or description on to the end of the existing name or description</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="seqname" value="test"/>
-      <param name="desc" value="test"/>
-      <param name="append" value="yes"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_descseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/descseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_diffseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-<tool id="EMBOSS: diffseq22" name="diffseq" version="5.0.0">
-  <description>Find differences between nearly identical sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>diffseq -asequence $input1 -bsequence $input2 -outfile $out_file1 -aoutfeat $out_file2 -boutfeat $out_file3 -wordsize $wordsize -globaldifferences $globaldifferences -rformat3
-  $out_format1 -offormat4 $out_format2 -offormat5 $out_format3 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="wordsize" size="4" type="text" value="20">
-      <label>Wordsize</label>
-    </param>
-    <param name="globaldifferences" type="select">
-      <label>Report differences at the ends</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="diffseq">Diffseq</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-    <param name="out_format2" type="select">
-      <label>Sequence 1 Output Feature File Format</label>
-      <option value="gff">GFF</option>
-      <option value="embl">EMBL</option>
-      <option value="swiss">SwissProt</option>
-    </param>
-    <param name="out_format3" type="select">
-      <label>Sequence 2 Output Feature File Format</label>
-      <option value="gff">GFF</option>
-      <option value="embl">EMBL</option>
-      <option value="swiss">SwissProt</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="diffseq" name="out_file1" />
-    <data format="gff" name="out_file2" />
-    <data format="gff" name="out_file3" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/diffseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_digest.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-<tool id="EMBOSS: digest23" name="digest" version="5.0.0">
-  <description>Protein proteolytic enzyme or reagent cleavage digest</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>digest -seqall $input1 -outfile $out_file1 -menu $menu -unfavoured $unfavoured -overlap $overlap -allpartials $allpartials -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="menu" type="select">
-      <label>Enzyme/Reagent</label>
-      <option value="1">Trypsin</option>
-      <option value="2">Lys-C</option>
-      <option value="3">Arg-C</option>
-      <option value="4">Asp-N</option>
-      <option value="5">V8-bicarb</option>
-      <option value="6">V8-phosph</option>
-      <option value="7">Chymotrypsin</option>
-      <option value="8">CNBr</option>
-    </param>
-    <param name="unfavoured" type="select">
-      <label>Trypsin will not normally cut after a K if it is followed by (e.g.) another K or a P. Specifying this shows those cuts, as well as the favoured ones.</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="overlap" type="select">
-      <label>Used for partial digestion. Shows all cuts from favoured cut sites plus 1..3, 2..4, 3..5 etc but not (e.g.) 2..5. Overlaps are therefore fragments with exactly one potential cut site
-      within it.</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="allpartials" type="select">
-      <label>As for overlap but fragments containing more than one potential cut site are included.</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="seqtable">SeqTable</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="digest" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/digest.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_dotmatcher.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-<tool id="EMBOSS: dotmatcher24" name="dotmatcher" version="5.0.0">
-  <description>Displays a thresholded dotplot of two sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl dotmatcher -asequence $input1 -bsequence $input2 -goutfile $out_file1 -windowsize $windowsize -threshold $threshold -graph png -xygraph png
-  -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="windowsize" size="4" type="text" value="10">
-      <label>Window size</label>
-    </param>
-    <param name="threshold" size="4" type="text" value="23">
-      <label>Threshold</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <!-- functional tests not possible since image output contains file name information and timestamp -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/dotmatcher.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_dotpath.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-<tool id="EMBOSS: dotpath25" name="dotpath" version="5.0.0">
-  <description>Non-overlapping wordmatch dotplot of two sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl dotpath -asequence $input1 -bsequence $input2 -goutfile $out_file1 -wordsize $wordsize -overlaps $overlaps -boxit $boxit -graph png
-  -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="wordsize" size="4" type="text" value="4">
-      <label>Word size (Integer 2 or more)</label>
-    </param>
-    <param name="overlaps" type="select">
-      <label>Display the overlapping matches (in red) as well as the minimal set of non-overlapping matches</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="boxit" type="select">
-      <label>Draw a box around dotplot</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <!-- functional tests not possible since image output contains file name information and timestamp -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/dotpath.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_dottup.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<tool id="EMBOSS: dottup26" name="dottup" version="5.0.0">
-  <description>Displays a wordmatch dotplot of two sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl dottup -asequence $input1 -bsequence $input2 -goutfile $out_file1 -wordsize $wordsize -boxit $boxit -graph png  -xygraph png -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="wordsize" size="4" type="text" value="4">
-      <label>Word size</label>
-    </param>
-    <param name="boxit" type="select">
-      <label>Draw a box around dotplot</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <!-- functional tests not possible since image output contains file name information and timestamp -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/dottup.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_dreg.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-<tool id="EMBOSS: dreg27" name="dreg" version="5.0.0">
-  <description>Regular expression search of a nucleotide sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>dreg -sequence $input1 -outfile $out_file1 -pattern "$pattern" -raccshow3 "no" -rusashow3 "no" -rdesshow3 "no" -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param name="pattern" size="50" type="text" value="(AUG)">
-      <label>Regular expression pattern</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="dreg" name="out_file1" />
-  </outputs>
-  <!-- tests not possible since dreg timestamps output file -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/dreg.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_einverted.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-<tool id="EMBOSS: einverted28" name="einverted" version="5.0.0">
-  <description>Finds DNA inverted repeats</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>einverted -sequence $input1 -outfile $out_file1 -gap $gap -threshold $threshold -match $match -mismatch $mismatch -maxrepeat $maxrepeat -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="gap" size="4" type="text" value="12">
-      <label>Gap penalty</label>
-    </param>
-    <param name="threshold" size="4" type="text" value="50">
-      <label>Minimum score threshold</label>
-    </param>
-    <param name="match" size="4" type="text" value="3">
-      <label>Match score</label>
-    </param>
-    <param name="mismatch" size="4" type="text" value="-4">
-      <label>Mismatch score</label>
-    </param>
-    <param name="maxrepeat" size="4" type="text" value="2000">
-      <label>Maximum separation between the start of repeat and the end of the inverted repeat</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="einverted" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.fasta"/>
-      <param name="gap" value="12"/>
-      <param name="threshold" value="50"/>
-      <param name="match" value="3"/>
-      <param name="mismatch" value="-4"/>
-      <param name="maxrepeat" value="2000"/>
-      <output name="out_file1" file="emboss_einverted_out.einverted"/>
-    </test>
-  </tests>
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/einverted.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_epestfind.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-<tool id="EMBOSS: epestfind29" name="epestfind" version="5.0.0">
-  <description>Finds PEST motifs as potential proteolytic cleavage sites</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl epestfind -sequence $input1 -goutfile $ofile2 -outfile $ofile1 -window $window -order $order -potential $potential -poor $poor
-  -invalid $invalid -map $map -graph png -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="window" size="4" type="text" value="10">
-      <label>Minimal distance between positively charged amino acids</label>
-    </param>
-    <param name="order" type="select">
-      <label>Sort by</label>
-      <option value="3">Score</option>
-      <option value="1">Length</option>
-      <option value="2">Position</option>
-    </param>
-    <param name="threshold" size="4" type="text" value="5.0">
-      <label>Threshold value to discriminate weak from potential PEST motifs.</label>
-    </param>
-    <param name="potential" type="select">
-      <label>Decide whether potential PEST motifs should be printed</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="poor" type="select">
-      <label>Decide whether poor PEST motifs should be printed</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="invalid" type="select">
-      <label>Decide whether invalid PEST motifs should be printed</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="map" type="select">
-      <label>Decide whether PEST motifs should be mapped to sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="ofile2" />
-    <data format="epestfind" name="ofile1" />
-  </outputs>
-<!--    <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="10"/>
-      <param name="order" value="3"/>
-      <param name="threshold" value="5.0"/>
-      <param name="potential" value="yes"/>
-      <param name="poor" value="yes"/>
-      <param name="invalid" value="no"/>
-      <param name="map" value="yes"/>
-      <output name="ofile1" file="emboss_epestfind_out.epestfind"/>
-    </test>
-  </tests>  output file contains file location info  -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/epestfind.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_equicktandem.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="EMBOSS: equicktandem31" name="equicktandem" version="5.0.0">
-  <description>Finds tandem repeats</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>equicktandem -sequence $input1 -outfile $out_file1 -origfile $ofile2 -maxrepeat $maxrepeat -threshold $threshold -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="maxrepeat" size="4" type="text" value="600">
-      <label>Maximum repeat size</label>
-    </param>
-    <param name="threshold" size="4" type="text" value="20">
-      <label>Threshold score</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="table">Table</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="table" name="out_file1" />
-    <data format="equicktandem" name="ofile2" />
-  </outputs>
-    <tests>
-    <test>
-      <param name="input1" value="1.fasta"/>
-      <param name="maxrepeat" value="600"/>
-      <param name="threshold" value="20"/>
-      <param name="out_format1" value="table"/>
-      <output name="ofile2" file="emboss_equicktandem_out.equicktandem"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/equicktandem.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_est2genome.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-<tool id="EMBOSS: est2genome32" name="est2genome" version="5.0.0">
-  <description>Align EST and genomic DNA sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>est2genome -estsequence $input1 -genomesequence $input2 -outfile $out_file1 -match $match -mismatch $mismatch -gappenalty $gappenalty -intronpenalty $intronpenalty -splicepenalty
-  $splicepenalty -minscore $minscore -reverse $reverse -splice $splice -mode $mode -best $best -shuffle $shuffle -seed $seed -align $align -width $width -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>EST sequence(s)</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Genomic sequence</label>
-    </param>
-    <param name="match" size="4" type="text" value="1">
-      <label>Score for matching two bases</label>
-    </param>
-    <param name="mismatch" size="4" type="text" value="1">
-      <label>Cost for mismatching two bases</label>
-    </param>
-    <param name="gappenalty" size="4" type="text" value="2">
-      <label>Cost for deleting a single base in either sequence, excluding introns</label>
-    </param>
-    <param name="intronpenalty" size="4" type="text" value="40">
-      <label>Cost for an intron, independent of length</label>
-    </param>
-    <param name="splicepenalty" size="4" type="text" value="20">
-      <label>Cost for an intron, independent of length and starting/ending on donor-acceptor sites</label>
-    </param>
-    <param name="minscore" size="4" type="text" value="30">
-      <label>Exclude alignments with scores below this threshold score</label>
-    </param>
-    <param name="reverse" type="select">
-      <label>Reverse the orientation of the EST sequence</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="splice" type="select">
-      <label>Use donor and acceptor splice sites</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="mode" type="select">
-      <label>Comparison mode</label>
-      <option value="both">Both strands</option>
-      <option value="forward">Forward strand only</option>
-      <option value="reverse">Reverse strand only</option>
-    </param>
-    <param name="best" type="select">
-      <label>Only best comparisons</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="shuffle" size="4" type="text" value="0">
-      <label>Shuffle</label>
-    </param>
-    <param name="seed" size="4" type="text" value="20825">
-      <label>Random number seed</label>
-    </param>
-    <param name="align" type="select">
-      <label>Show the alignment</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="width" size="4" type="text" value="50">
-      <label>Alignment width</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="est2genome" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="1.fasta"/>
-      <param name="match" value="1"/>
-      <param name="mismatch" value="1"/>
-      <param name="match" value="1"/>
-      <param name="gappenalty" value="2"/>
-      <param name="intronpenalty" value="40"/>
-      <param name="splicepenalty" value="20"/>
-      <param name="minscore" value="30"/>
-      <param name="reverse" value="no"/>
-      <param name="splice" value="yes"/>
-      <param name="mode" value="both"/>
-      <param name="best" value="yes"/>
-      <param name="shuffle" value="0"/>
-      <param name="seed" value="20825"/>
-      <param name="align" value="no"/>
-      <param name="width" value="50"/>
-      <output name="out_file1" file="emboss_est2genome_out.est2genome"/>
-    </test>
-  </tests>
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/est2genome.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_etandem.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-<tool id="EMBOSS: etandem33" name="etandem" version="5.0.0">
-  <description>Looks for tandem repeats in a nucleotide sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>etandem -sequence $input1 -outfile $out_file1 -origfile $ofile2 -minrepeat $minrepeat -maxrepeat $maxrepeat -threshold $threshold -mismatch $mismatch -uniform $uniform -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="minrepeat" size="4" type="text" value="10">
-      <label>Minimum repeat size</label>
-    </param>
-    <param name="maxrepeat" size="4" type="text" value="10">
-      <label>Maximum repeat size</label>
-    </param>
-    <param name="threshold" size="4" type="text" value="20">
-      <label>Threshold score</label>
-    </param>
-    <param name="mismatch" type="select">
-      <label>Allow N as a mismatch</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="uniform" type="select">
-      <label>Allow uniform consensus</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="table">Table</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="etandem" name="out_file1" />
-    <data format="table" name="ofile2" />
-  </outputs>
-    <tests>
-    <test>
-      <param name="input1" value="1.fasta"/>
-      <param name="minrepeat" value="10"/>
-      <param name="maxrepeat" value="10"/>
-      <param name="threshold" value="20"/>
-      <param name="mismatch" value="no"/>
-       <param name="uniform" value="no"/>
-      <param name="out_format1" value="table"/>
-      <output name="ofile2" file="emboss_etandem_out.table"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/etandem.html
-  </help>
-</tool>
-
-
--- a/tools/emboss_5/emboss_extractfeat.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-<tool id="EMBOSS: extractfeat34" name="extractfeat" version="5.0.0">
-  <!-- tool tested with documentation, functional test not designed due to empty files resulting from test input sequences -->
-  <description>Extract features from a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>extractfeat -sequence $input1 -outseq $out_file1 -before $before -after $after -source "$source" -type "$type" -sense $sense -minscore $minscore -maxscore $maxscore -tag "$tag" -value
-  "$value" -join $join -featinname $featinname -describe "$describe" -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="before" size="4" type="text" value="0">
-      <label>Number of bases or residues before the feature to include in the extracted sequence</label>
-    </param>
-    <param name="after" size="4" type="text" value="0">
-      <label>Number of bases or residues after the feature to include in the extracted sequence</label>
-    </param>
-    <param name="source" size="4" type="text" value="*">
-      <label>Feature source</label>
-    </param>
-    <param name="type" size="4" type="text" value="*">
-      <label>Feature type</label>
-    </param>
-    <param name="sense" type="select">
-      <label>Feature sense</label>
-      <option value="0">Any sense</option>
-      <option value="1">Forward sense</option>
-      <option value="-1">Reverse sense</option>
-    </param>
-    <param name="minscore" size="4" type="text" value="0.0">
-      <label>Minimum score</label>
-    </param>
-    <param name="maxscore" size="4" type="text" value="0.0">
-      <label>Maximum score</label>
-    </param>
-    <param name="tag" size="4" type="text" value="*">
-      <label>Feature tags</label>
-    </param>
-    <param name="value" size="4" type="text" value="*">
-      <label>Tag values</label>
-    </param>
-    <param name="join" type="select">
-      <label>Join features</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="featinname" type="select">
-      <label>Put feature type in sequence name</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="describe" size="4" type="text" value="">
-      <label>Specify one or more tag names that should be added to the output sequence Description text</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/extractfeat.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_extractseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-<tool id="EMBOSS: extractseq35" name="extractseq" version="5.0.0">
-  <description>Extract regions from a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>extractseq -sequence $input1 -outseq $out_file1 -regions $regions -separate $separate -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="regions" size="20" type="text" value="1-9999999">
-      <label>Regions to extract</label>
-    </param>
-    <param name="separate" type="select">
-      <label>Write each specified region as a separate sequence</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="regions" value="1-9999999"/>
-      <param name="separate" value="no"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_extractseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/extractseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_format_corrector.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-#EMBOSS format corrector
-
-import operator
-#from galaxy import datatypes
-
-#Properly set file formats after job run
-def exec_after_process( app, inp_data, out_data, param_dict,tool, stdout, stderr):
-#Properly set file formats before job run
-#def exec_before_job(trans, inp_data, out_data, param_dict,tool):
-    #why isn't items an ordered list?
-    items = out_data.items()
-    #lets sort it ourselves....
-    items = sorted(items, key=operator.itemgetter(0))
-    #items is now sorted...
-
-    #normal filetype correction
-    data_count=1
-    for name, data in items:
-        outputType = param_dict.get( 'out_format'+str(data_count), None )
-        #print "data_count",data_count, "name", name, "outputType", outputType
-        if outputType !=None:
-            if outputType == 'ncbi':
-                outputType = "fasta"
-            elif outputType == 'excel':
-                outputType = "tabular"
-            elif outputType == 'text':
-                outputType = "txt"
-            data = app.datatypes_registry.change_datatype(data, outputType)
-            app.model.context.add( data )
-            app.model.context.flush()
-        data_count+=1
-
-    #html filetype correction
-    data_count=1
-    for name, data in items:
-        wants_plot = param_dict.get( 'html_out'+str(data_count), None )
-        ext = "html"
-        if wants_plot == "yes":
-            data = app.datatypes_registry.change_datatype(data, ext)
-            app.model.context.add( data )
-            app.model.context.flush()
-        data_count+=1
-
-    #png file correction
-    data_count=1
-    for name, data in items:
-        wants_plot = param_dict.get( 'plot'+str(data_count), None )
-        ext = "png"
-        if wants_plot == "yes":
-            data = app.datatypes_registry.change_datatype(data, ext)
-            app.model.context.add( data )
-            app.model.context.flush()
-        data_count+=1
--- a/tools/emboss_5/emboss_freak.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-<tool id="EMBOSS: freak36" name="freak" version="5.0.0">
-  <description>Residue/base frequency table or plot</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>freak -seqall $input1 -outfile $out_file1 -window $window -letters $letters -graph png -step $step -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="letters" size="5" type="text" value="gc">
-      <label>Residue letters</label>
-    </param>
-    <param name="step" size="5" type="text" value="1">
-      <label>Stepping value</label>
-    </param>
-    <param name="window" size="5" type="text" value="30">
-      <label>Averaging window</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="freak" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="letters" value="gc"/>
-      <param name="step" value="1"/>
-      <param name="window" value="30"/>
-      <output name="out_file1" file="emboss_freak_out.freak"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/freak.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_fuzznuc.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="EMBOSS: fuzznuc37" name="fuzznuc" version="5.0.1">
-  <description>Nucleic acid pattern search</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>fuzznuc -sequence $input1 -outfile $out_file1 -pattern '$pattern' -pmismatch $mismatch -complement $complement -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="pattern" size="5" type="text" value="">
-      <label>Search pattern</label>
-      <sanitizer>
-        <valid initial="string.printable">
-         <remove value="&apos;"/>
-        </valid>
-        <mapping initial="none">
-          <add source="&apos;" target=""/>
-        </mapping>
-      </sanitizer>
-    </param>
-    <param name="mismatch" size="5" type="text" value="0">
-      <label>Number of mismatches</label>
-    </param>
-    <param name="complement" type="select">
-      <label>Search complementary strand</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="seqtable">SeqTable</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="simple">SRS Simple</option>
-      <option value="fuzznuc">Fuzznuc Output File</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fuzznuc" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="pattern" value="AA"/>
-      <param name="mismatch" value="0"/>
-      <param name="complement" value="no"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_fuzznuc_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/fuzznuc.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_fuzzpro.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-<tool id="EMBOSS: fuzzpro38" name="fuzzpro" version="5.0.0">
-  <description>Protein pattern search</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>fuzzpro -sequence $input1 -outfile $out_file1 -pattern "$pattern" -pmismatch $mismatch -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="pattern" size="30" type="text" value="">
-      <label>Search pattern</label>
-    </param>
-    <param name="mismatch" size="5" type="text" value="0">
-      <label>Number of mismatches</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="seqtable">SeqTable</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fuzzpro" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/fuzzpro.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_fuzztran.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-<tool id="EMBOSS: fuzztran39" name="fuzztran" version="5.0.0">
-  <description>Protein pattern search after translation</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>fuzztran -sequence $input1 -outfile $out_file1 -pattern "$pattern" -pmismatch $mismatch -frame $frame -table $table -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="pattern" size="5" type="text" value="">
-      <label>Search pattern</label>
-    </param>
-    <param name="mismatch" size="5" type="text" value="0">
-      <label>Number of mismatches</label>
-    </param>
-    <param name="frame" type="select">
-      <label>Frame(s) to translate</label>
-      <option value="1">Frame 1</option>
-      <option value="2">Frame 2</option>
-      <option value="3">Frame 3</option>
-      <option value="F">Forward three frames</option>
-      <option value="-1">Frame -1</option>
-      <option value="-2">Frame -2</option>
-      <option value="-3">Frame -3</option>
-      <option value="R">Reverse three frames</option>
-      <option value="6">All six frames</option>
-    </param>
-    <param name="table" type="select">
-      <label>Code to use</label>
-      <option value="0">Standard</option>
-      <option value="1">Standard (with alternative initiation codons)</option>
-      <option value="2">Vertebrate Mitochondrial</option>
-      <option value="3">Yeast Mitochondrial</option>
-      <option value="4">Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
-      <option value="5">Invertebrate Mitochondrial</option>
-      <option value="6">Ciliate Macronuclear and Dasycladacean</option>
-      <option value="9">Echinoderm Mitochondrial</option>
-      <option value="10">Euplotid Nuclear</option>
-      <option value="11">Bacterial</option>
-      <option value="12">Alternative Yeast Nuclear</option>
-      <option value="13">Ascidian Mitochondrial</option>
-      <option value="14">Flatworm Mitochondrial</option>
-      <option value="15">Blepharisma Macronuclear</option>
-      <option value="16">Chlorophycean Mitochondrial</option>
-      <option value="21">Trematode Mitochondrial</option>
-      <option value="22">Scenedesmus obliquus</option>
-      <option value="23">Thraustochytrium Mitochondrial</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="table">Table</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fuzztran" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.fasta"/>
-      <param name="pattern" value="AA"/>
-      <param name="mismatch" value="0"/>
-      <param name="frame" value="6"/>
-      <param name="table" value="0"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_fuzztran_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/fuzztran.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_garnier.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-<tool id="EMBOSS: garnier40" name="garnier" version="5.0.0">
-  <description>Predicts protein secondary structure</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>garnier -sequence $input1 -outfile $out_file1 -idc $idc -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="idc" type="select">
-      <label>In their paper, GOR mention that if you know something about the secondary structure content of the protein you are analyzing, you can do better in prediction. 'idc' is an index into a
-      set of arrays, dharr[] and dsarr[], which provide 'decision constants' (dch, dcs), which are offsets that are applied to the weights for the helix and sheet (extend) terms. So, idc=0 says don't
-      use the decision constant offsets, and idc=1 to 6 indicates that various combinations of dch,dcs offsets should be used</label>
-      <option value="0">idc 0</option>
-      <option value="1">idc 1</option>
-      <option value="2">idc 2</option>
-      <option value="3">idc 3</option>
-      <option value="4">idc 4</option>
-      <option value="5">idc 5</option>
-      <option value="6">idc 6</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="tagseq">TagSeq</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="garnier" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="idc" value="0"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_garnier_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/4.0/emboss/apps/garnier.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_geecee.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="EMBOSS: geecee41" name="geecee" version="5.0.0">
-  <description>Calculates fractional GC content of nucleic acid sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>geecee -sequence $input1 -outfile $out_file1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="geecee" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <output name="out_file1" file="emboss_geecee_out.geecee"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/geecee.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_getorf.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,129 +0,0 @@
-<tool id="EMBOSS: getorf42" name="getorf" version="5.0.0">
-  <description>Finds and extracts open reading frames (ORFs)</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>getorf -sequence $input1 -outseq $out_file1 -table $table -minsize $minsize -maxsize $maxsize -find $find -methionine $methionine -circular $circular -reverse $reverse -flanking $flanking
-  -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="table" type="select">
-      <label>Code to use</label>
-      <option value="0">Standard</option>
-      <option value="1">Standard (with alternative initiation codons)</option>
-      <option value="2">Vertebrate Mitochondrial</option>
-      <option value="3">Yeast Mitochondrial</option>
-      <option value="4">Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
-      <option value="5">Invertebrate Mitochondrial</option>
-      <option value="6">Ciliate Macronuclear and Dasycladacean</option>
-      <option value="9">Echinoderm Mitochondrial</option>
-      <option value="10">Euplotid Nuclear</option>
-      <option value="11">Bacterial</option>
-      <option value="12">Alternative Yeast Nuclear</option>
-      <option value="13">Ascidian Mitochondrial</option>
-      <option value="14">Flatworm Mitochondrial</option>
-      <option value="15">Blepharisma Macronuclear</option>
-      <option value="16">Chlorophycean Mitochondrial</option>
-      <option value="21">Trematode Mitochondrial</option>
-      <option value="22">Scenedesmus obliquus</option>
-      <option value="23">Thraustochytrium Mitochondrial</option>
-    </param>
-    <param name="minsize" size="10" type="text" value="30">
-      <label>Minimum nucleotide size of ORF to report</label>
-    </param>
-    <param name="maxsize" size="10" type="text" value="1000000">
-      <label>Maximum nucleotide size of ORF to report</label>
-    </param>
-    <param name="find" type="select">
-      <label>What to output</label>
-      <option value="0">Translation of regions between STOP codons</option>
-      <option value="1">Translation of regions between START and STOP codons</option>
-      <option value="2">Nucleic sequences between STOP codons</option>
-      <option value="3">Nucleic sequences between START and STOP codons</option>
-      <option value="4">Nucleotides flanking START codons</option>
-      <option value="5">Nucleotides flanking initial STOP codons</option>
-      <option value="6">Nucleotides flanking ending STOP codons</option>
-    </param>
-    <param name="methionine" type="select">
-      <label>All START codons to code for Methionine</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="circular" type="select">
-      <label>Circular sequence</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="reverse" type="select">
-      <label>Find ORFs in the reverse complement</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="flanking" size="10" type="text" value="100">
-      <label>Number of flanking nucleotides to output</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <!-- <option value="gff">GFF (m)</option> -->
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="minsize" value="30"/>
-      <param name="maxsize" value="1000000"/>
-      <param name="find" value="0"/>
-      <param name="methionine" value="yes"/>
-      <param name="circular" value="no"/>
-      <param name="reverse" value="yes"/>
-      <param name="table" value="0"/>
-      <param name="flanking" value="100"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_getorf_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/getorf.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_helixturnhelix.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<tool id="EMBOSS: helixturnhelix43" name="helixturnhelix" version="5.0.0">
-  <description>Report nucleic acid binding motifs</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>helixturnhelix -sequence $input1 -outfile $out_file1 -mean $mean -sd $sd -minsd $minsd -eightyseven $eightyseven -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="mean" size="10" type="text" value="238.71">
-      <label>Mean value</label>
-    </param>
-    <param name="sd" size="10" type="text" value="293.61">
-      <label>Standard Deviation value</label>
-    </param>
-    <param name="minsd" size="10" type="text" value="2.5">
-      <label>Minimum SD</label>
-    </param>
-    <param name="eightyseven" type="select">
-      <label>Use the old (1987) weight data</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="motif">Motif</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="motif" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="mean" value="238.71"/>
-      <param name="sd" value="293.61"/>
-      <param name="minsd" value="2.5"/>
-      <param name="eightyseven" value="no"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_helixturnhelix_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/helixturnhelix.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_hmoment.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<tool id="EMBOSS: hmoment44" name="hmoment" version="5.0.0">
-  <description>Hydrophobic moment calculation</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>hmoment -seqall $input1 -outfile $out_file1 -window $window -aangle $aangle -graph png -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="window" size="5" type="text" value="10">
-      <label>Window</label>
-    </param>
-    <param name="aangle" size="5" type="text" value="100">
-      <label>Alpha helix angle (degrees)</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="hmoment" name="out_file1" />
-  </outputs>
-    <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="10"/>
-      <param name="aangle" value="100"/>
-      <output name="out_file1" file="emboss_hmoment_out.hmoment"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/hmoment.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_iep.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-<tool id="EMBOSS: iep45" name="iep" version="5.0.0">
-  <description>Calculates the isoelectric point of a protein</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>iep -sequence $input1 -outfile $out_file1 -step $step -amino $amino -graph png -termini $termini -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="step" size="5" type="text" value=".5">
-      <label>Step value for pH</label>
-    </param>
-    <param name="amino" size="5" type="text" value="1">
-      <label>Number of N-termini</label>
-    </param>
-    <param name="termini" type="select">
-      <label>Include charge at N and C terminus</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="iep" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="step" value="0.5"/>
-      <param name="amino" value="1"/>
-      <param name="termini" value="yes"/>
-      <output name="out_file1" file="emboss_iep_out.iep"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/iep.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_infoseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-<tool id="EMBOSS: infoseq46" name="infoseq" version="5.0.0">
-  <!-- info contains file information always -->
-  <description>Displays some simple information about sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>infoseq -sequence $input1 -outfile $out_file1 -html $html_out1 -heading $heading -usa $usa -name $disname -accession $accession -gi $gi -version $version -type $type -length $length -pgc
-  $pgc -description $description -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="html_out1" type="select">
-      <label>Format output as an HTML table</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="heading" type="select">
-      <label>Display column headings</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="usa" type="select">
-      <label>Display the USA of the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="disname" type="select">
-      <label>Display 'name' column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="accession" type="select">
-      <label>Display 'accession' column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="gi" type="select">
-      <label>Display 'GI' column</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="version" type="select">
-      <label>Display 'version' column</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="type" type="select">
-      <label>Display 'type' column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="length" type="select">
-      <label>Display 'length' column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="pgc" type="select">
-      <label>Display 'percent GC content' column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="description" type="select">
-      <label>Display 'description' column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/infoseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_infoseq_wrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-#! /usr/bin/perl -w
-use strict;
-
-my $cmd_string = join (" ",@ARGV);
-my $results = `$cmd_string`;
-if ($ARGV[6]=~/yes/)
-{
-	print "Extension: html\n";
-}
--- a/tools/emboss_5/emboss_isochore.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-<tool id="EMBOSS: isochore47" name="isochore" version="5.0.0">
-  <description>Plots isochores in large DNA sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl isochore -sequence $input1 -outfile $ofile2 -goutfile $ofile1 -graph png -window $window -shift $shift -auto</command>
-  <!--  <command interpreter="perl">emboss_single_outputfile_wrapper.pl isochore -sequence $input1 -goutfile $ofile1 -graph png -window $window -shift $shift -auto</command>-->
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="window" size="4" type="text" value="1000">
-      <label>Window size</label>
-    </param>
-    <param name="shift" size="4" type="text" value="100">
-      <label>Shift increment</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="ofile1" />
-    <data format="isochore" name="ofile2" />
-  </outputs>
-  <!-- <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="1000"/>
-      <param name="shift" value="100"/>
-      <output name="ofile1" file="emboss_isochore_out.isochore"/>
-      <output name="ofile2" file="emboss_isochore_out.isochore"/>
-    </test>
-         <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="1000"/>
-      <param name="shift" value="100"/>
-      <output name="ofile2" file="emboss_isochore_out.isochore"/>
-    </test>
-  </tests>-->
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-**Syntax**
-
-This application plots GC content over a sequence. It is intended for large sequences such as complete chromosomes or large genomic contigs, although interesting results can also be obtained from shorter sequences. You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/isochore.html
-
-- Both **Window size** and **Shift increment** are intergers.
-
------
-
-**Example**
-
-- Input sequences::
-
-    >hg18_dna range=chrX:151073054-151073376 5'pad=0 3'pad=0 revComp=FALSE strand=? repeatMasking=none
-    TTTATGTCTATAATCCTTACCAAAAGTTACCTTGGAATAAGAAGAAGTCA
-    GTAAAAAGAAGGCTGTTGTTCCGTGAAATACTGTCTTTATGCCTCAGATT
-    TGGAGTGCTCAGAGCCTCTGCAGCAAAGATTTGGCATGTGTCCTAGGCCT
-    GCTCAGAGCAGCAAATCCCACCCTCTTGGAGAATGAGACTCATAGAGGGA
-    CAGCTCCCTCCTCAGAGGCTTCTCTAATGGGACTCCAAAGAGCAAACACT
-    CAGCCCCATGAGGACTGGCCAGGCCAAGTGGTGTGTGGGAACAGGGAGCA
-    GCGGTTTCCAAGAGGATACAGTA
-
-- Output data file::
-
-    Position	Percent G+C 1 .. 323
-    80	0.422
-    112	0.460
-    144	0.509
-    176	0.534
-    208	0.553
-    240	0.553
-
-- Output graphics file:
-
-.. image:: ./static/emboss_icons/isochore.png
-
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_lindna.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-<tool id="EMBOSS: lindna48" name="lindna" version="5.0.0">
-  <!-- tool produces memory error in ajmem.c -->
-  <description>Draws linear maps of DNA constructs</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>lindna -infile $input1 -graphout png -goutfile $out_file1 -ruler $ruler -blocktype $blocktype -maxgroups $maxgroups -maxlabels $maxlabels -intersymbol $intersymbol -intercolour $intercolour
-  -interticks $interticks -gapsize $gapsize -ticklines $ticklines -textheight $textheight -textlength $textlength -margin $margin -tickheight $tickheight -blockheight $blockheight -rangeheight
-  $rangeheight -gapgroup $gapgroup -postext $postext -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="ruler" type="select">
-      <label>Draw a ruler</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="blocktype" type="select">
-      <label>Type of blocks</label>
-      <option value="Filled">Filled blocks</option>
-      <option value="Open">Open blocks</option>
-      <option value="Outline">Black border</option>
-    </param>
-    <param name="maxgroups" size="4" type="text" value="20">
-      <label>Maximum number of groups</label>
-    </param>
-    <param name="maxlabels" size="6" type="text" value="10000">
-      <label>Maximum number of labels</label>
-    </param>
-    <param name="intersymbol" type="select">
-      <label>Type of blocks</label>
-      <option value="Straight">Straight</option>
-    </param>
-    <param name="intercolour" type="select">
-      <label>Colour of junctions between blocks</label>
-      <option value="1">Red</option>
-      <option value="0">Black</option>
-      <option value="2">Yellow</option>
-      <option value="3">Green</option>
-      <option value="4">Aquamarine</option>
-      <option value="5">Pink</option>
-      <option value="6">Wheat</option>
-      <option value="7">Grey</option>
-      <option value="8">Brown</option>
-      <option value="9">Blue</option>
-      <option value="10">Blue-violet</option>
-      <option value="11">Cyan</option>
-      <option value="12">Turquoise</option>
-      <option value="13">Magenta</option>
-      <option value="14">Salmon</option>
-      <option value="15">White</option>
-    </param>
-    <param name="interticks" type="select">
-      <label>Horizontal junctions between ticks</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="gapsize" size="6" type="text" value="500">
-      <label>Interval between ticks in the ruler</label>
-    </param>
-    <param name="ticklines" type="select">
-      <label>Vertical lines at the ruler's ticks</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="textheight" size="4" type="text" value="1.0">
-      <label>Height of text. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size, respectively</label>
-    </param>
-    <param name="textlength" size="4" type="text" value="1.0">
-      <label>Length of text. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size, respectively</label>
-    </param>
-    <param name="margin" size="4" type="text" value="1.0">
-      <label>Width of left margin. This is the region left to the groups where the names of the groups are displayed. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size,
-      respectively</label>
-    </param>
-    <param name="tickheight" size="4" type="text" value="1.0">
-      <label>Height of ticks. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size, respectively</label>
-    </param>
-    <param name="blockheight" size="4" type="text" value="1.0">
-      <label>Height of blocks. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size, respectively</label>
-    </param>
-    <param name="rangeheight" size="4" type="text" value="1.0">
-      <label>Height of range ends. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size, respectively</label>
-    </param>
-    <param name="gapgroup" size="4" type="text" value="1.0">
-      <label>Space between groups. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size, respectively</label>
-    </param>
-    <param name="postext" size="4" type="text" value="1.0">
-      <label>Space between text and ticks, blocks, and ranges. Enter a number &lt;1.0 or &gt;1.0 to decrease or increase the size, respectively</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/lindna.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_marscan.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-<tool id="EMBOSS: marscan49" name="marscan" version="5.0.0">
-  <description>Finds MAR/SAR sites in nucleic sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>marscan -sequence $input1 -outfile $out_file1 -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="gff">GFF</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="gff" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.fasta"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_marscan_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/marscan.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_maskfeat.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="EMBOSS: maskfeat50" name="maskfeat" version="5.0.0">
-  <description>Mask off features of a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>maskfeat -sequence $input1 -outseq $out_file1 -type "$type" -tolower $tolower -maskchar "$maskchar" -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="type" size="50" type="text" value="repeat*">
-      <label>Feature to mask</label>
-    </param>
-    <param name="tolower" type="select">
-      <label>Mask features by converting to lowercase</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="maskchar" size="1" type="text" value="N">
-      <label>Character to mask with</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="type" value="repeat*"/>
-      <param name="tolower" value="no"/>
-      <param name="maskchar" value="N"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_maskfeat_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/maskfeat.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_maskseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="EMBOSS: maskseq51" name="maskseq" version="5.0.0">
-  <description>Mask off regions of a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>maskseq -sequence $input1 -outseq $out_file1 -regions "$regions" -tolower $tolower -maskchar "$maskchar" -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="regions" size="50" type="text" value="">
-      <label>Regions to mask (Example 1-99)</label>
-    </param>
-    <param name="tolower" type="select">
-      <label>mask by converting to lowercase</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="maskchar" size="1" type="text" value="N">
-      <label>Character to use when masking</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="regions" value="1-3"/>
-      <param name="tolower" value="no"/>
-      <param name="maskchar" value="N"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_maskseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/maskseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_matcher.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<tool id="EMBOSS: matcher52" name="matcher" version="5.0.0">
-  <description>Finds the best local alignments between two sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>matcher -asequence $input1 -bsequence $input2 -outfile $out_file1 -alternatives $alternatives -gapopen $gapopen -gapextend $gapextend -aformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="alternatives" size="4" type="text" value="1">
-      <label>Number of alternative matches</label>
-    </param>
-    <param name="gapopen" size="4" type="text" value="16">
-      <label>Gap penalty</label>
-    </param>
-    <param name="gapextend" size="4" type="text" value="4">
-      <label>Gap length (extension) penalty</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Alignment File Format</label>
-      <option value="markx0">Markx0 (p)</option>
-      <option value="simple">Simple (m)</option>
-      <option value="fasta">FASTA (m)</option>
-      <option value="msf">MSF (m)</option>
-      <option value="srs">SRS (m)</option>
-      <option value="pair">Pair (p)</option>
-      <option value="markx1">Markx1 (p)</option>
-      <option value="markx2">Markx2 (p)</option>
-      <option value="markx3">Markx3 (p)</option>
-      <option value="markx10">Markx10 (p)</option>
-      <option value="srspair">SRS pair (p)</option>
-      <option value="score">Score (p)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="markx0" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="1.fasta"/>
-      <param name="alternatives" value="1"/>
-      <param name="gapopen" value="16"/>
-      <param name="gapextend" value="4"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_matcher_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/matcher.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_megamerger.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<tool id="EMBOSS: megamerger53" name="megamerger" version="5.0.0">
-  <description>Merge two large overlapping nucleic acid sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>megamerger -asequence $input1 -bsequence $input2 -outseq $out_file1 -outfile $out_file2 -wordsize $wordsize -prefer $prefer -osformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="wordsize" size="4" type="text" value="20">
-      <label>Word size</label>
-    </param>
-    <param name="prefer" type="select">
-      <label>Prefer first sequence when mismatches occur</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-    <data format="txt" name="out_file2" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/megamerger.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_merger.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-<tool id="EMBOSS: merger54" name="merger" version="5.0.0">
-  <description>Merge two overlapping nucleic acid sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>merger -asequence $input1 -bsequence $input2 -outseq $out_file1 -outfile $out_file2 -gapopen $gapopen -gapextend $gapextend -osformat4 $out_format1 -aformat3 $out_format2 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="gapopen" size="4" type="text" value="50.0">
-      <label>Gap opening penalty</label>
-    </param>
-    <param name="gapextend" size="4" type="text" value="5.0">
-      <label>Gap extension penalty</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-    <param name="out_format2" type="select">
-      <label>Output Alignment File Format</label>
-      <option value="simple">Simple (m)</option>
-      <option value="fasta">FASTA (m)</option>
-      <option value="msf">MSF (m)</option>
-      <option value="srs">SRS (m)</option>
-      <option value="pair">Pair (p)</option>
-      <option value="markx0">Markx0 (p)</option>
-      <option value="markx1">Markx1 (p)</option>
-      <option value="markx2">Markx2 (p)</option>
-      <option value="markx3">Markx3 (p)</option>
-      <option value="markx10">Markx10 (p)</option>
-      <option value="srspair">SRS pair (p)</option>
-      <option value="score">Score (p)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-    <data format="simple" name="out_file2" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/merger.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_msbar.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-<tool id="EMBOSS: msbar55" name="msbar" version="5.0.0">
-  <description>Mutate sequence beyond all recognition</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>msbar -sequence $input1 -outseq $out_file1 -count $count -point $point -block $block -codon $codon -inframe $inframe -minimum $minimum -maximum $maximum -osformat2 $out_format1
-  -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param name="count" size="4" type="text" value="1">
-      <label>Number of times to perform the mutation operations</label>
-    </param>
-    <param name="point" type="select">
-      <label>Types of point mutations to perform</label>
-      <option value="0">None</option>
-      <option value="1">Any of the following</option>
-      <option value="2">Insertions</option>
-      <option value="3">Deletions</option>
-      <option value="4">Changes</option>
-      <option value="5">Duplications</option>
-      <option value="6">Moves</option>
-    </param>
-    <param name="block" type="select">
-      <label>Types of block mutations to perform</label>
-      <option value="0">None</option>
-      <option value="1">Any of the following</option>
-      <option value="2">Insertions</option>
-      <option value="3">Deletions</option>
-      <option value="4">Changes</option>
-      <option value="5">Duplications</option>
-      <option value="6">Moves</option>
-    </param>
-    <param name="codon" type="select">
-      <label>Types of codon mutations to perform. These are only done if the sequence is nucleic</label>
-      <option value="0">None</option>
-      <option value="1">Any of the following</option>
-      <option value="2">Insertions</option>
-      <option value="3">Deletions</option>
-      <option value="4">Changes</option>
-      <option value="5">Duplications</option>
-      <option value="6">Moves</option>
-    </param>
-    <param name="inframe" type="select">
-      <label>Do 'codon' and 'block' operations in frame</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="minimum" size="4" type="text" value="1">
-      <label>Minimum size for a block mutation</label>
-    </param>
-    <param name="maximum" size="4" type="text" value="10">
-      <label>Maximum size for a block mutation</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="count" value="1"/>
-      <param name="point" value="0"/>
-      <param name="block" value="0"/>
-      <param name="codon" value="0"/>
-      <param name="inframe" value="no"/>
-      <param name="minimum" value="1"/>
-      <param name="maximum" value="10"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_msbar_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/msbar.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_multiple_outputfile_wrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#! /usr/bin/perl -w
-use strict;
-
-my $cmd_string = join (" ",@ARGV);
-my $results = `$cmd_string`;
-my @files = split("\n",$results);
-foreach my $thisLine (@files)
-{
-	if ($thisLine =~ /Created /)
-	{
-		$thisLine =~ /[\w|\.]+$/;
-		$thisLine =$&;
-		print "outfile: $thisLine\n";
-	}
-	else
-	{
-		print $thisLine,"\n";
-	}
-}
--- a/tools/emboss_5/emboss_needle.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-<tool id="EMBOSS: needle56" name="needle" version="5.0.0">
-  <description>Needleman-Wunsch global alignment</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>needle -asequence $input1 -bsequence $input2 -outfile $out_file1 -gapopen $gapopen -gapextend $gapextend -brief $brief -aformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="fasta" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="gapopen" size="4" type="text" value="10.0">
-      <label>Gap open penalty</label>
-    </param>
-    <param name="gapextend" size="4" type="text" value="0.5">
-      <label>Gap extension penalty</label>
-    </param>
-    <param name="brief" type="select">
-      <label>Brief identity and similarity</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Alignment File Format</label>
-      <option value="srspair">SRS pair (p)</option>
-      <option value="simple">Simple (m)</option>
-      <option value="fasta">FASTA (m)</option>
-      <option value="msf">MSF (m)</option>
-      <option value="srs">SRS (m)</option>
-      <option value="pair">Pair (p)</option>
-      <option value="markx0">Markx0 (p)</option>
-      <option value="markx1">Markx1 (p)</option>
-      <option value="markx2">Markx2 (p)</option>
-      <option value="markx3">Markx3 (p)</option>
-      <option value="markx10">Markx10 (p)</option>
-      <option value="score">Score (p)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="needle" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="1.fasta"/>
-      <param name="gapopen" value="10"/>
-      <param name="gapextend" value="0.5"/>
-      <param name="brief" value="yes"/>
-      <param name="out_format1" value="score"/>
-      <output name="out_file1" file="emboss_needle_out.score"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-needle reads any two sequences of the same type (DNA or protein).
-
------
-
-**Syntax**
-
-This tool uses the Needleman-Wunsch global alignment algorithm to find the optimum alignment (including gaps) of two sequences when considering their entire length.
-
-- **Optimal alignment:** Dynamic programming methods ensure the optimal global alignment by exploring all possible alignments and choosing the best.
-
-- **The Needleman-Wunsch algorithm** is a member of the class of algorithms that can calculate the best score and alignment in the order of mn steps, (where 'n' and 'm' are the lengths of the two sequences).
-
-- **Gap open penalty:** [10.0 for any sequence] The gap open penalty is the score taken away when a gap is created. The best value depends on the choice of comparison matrix. The default value assumes you are using the EBLOSUM62 matrix for protein sequences, and the EDNAFULL matrix for nucleotide sequences. (Floating point number from 1.0 to 100.0)
-
-- **Gap extension penalty:** [0.5 for any sequence] The gap extension, penalty is added to the standard gap penalty for each base or residue in the gap. This is how long gaps are penalized. Usually you will expect a few long gaps rather than many short gaps, so the gap extension penalty should be lower than the gap penalty. An exception is where one or both sequences are single reads with possible sequencing errors in which case you would expect many single base gaps. You can get this result by setting the gap open penalty to zero (or very low) and using the gap extension penalty to control gap scoring. (Floating point number from 0.0 to 10.0)
-
-You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/needle.html
-
------
-
-**Example**
-
-- Input File::
-
-    >hg18_dna range=chrX:151073054-151073136 5'pad=0 3'pad=0 revComp=FALSE strand=? repeatMasking=none
-    TTTATGTCTATAATCCTTACCAAAAGTTACCTTGGAATAAGAAGAAGTCA
-    GTAAAAAGAAGGCTGTTGTTCCGTGAAATACTG
-
-- If both Sequence1 and Sequence2 take the above file as input, Gap open penalty equals 10.0, Gap extension penalty equals 0.5, Brief identity and similarity is set to Yes, Output Alignment File Format is set to SRS pairs, the output file is::
-
-    ########################################
-    # Program: needle
-    # Rundate: Mon Apr 02 2007 14:23:16
-    # Align_format: srspair
-    # Report_file: ./database/files/dataset_7.dat
-    ########################################
-
-    #=======================================
-    #
-    # Aligned_sequences: 2
-    # 1: hg18_dna
-    # 2: hg18_dna
-    # Matrix: EDNAFULL
-    # Gap_penalty: 10.0
-    # Extend_penalty: 0.5
-    #
-    # Length: 83
-    # Identity:      83/83 (100.0%)
-    # Similarity:    83/83 (100.0%)
-    # Gaps:           0/83 ( 0.0%)
-    # Score: 415.0
-    #
-    #=======================================
-
-    hg18_dna           1 TTTATGTCTATAATCCTTACCAAAAGTTACCTTGGAATAAGAAGAAGTCA     50
-                       ||||||||||||||||||||||||||||||||||||||||||||||||||
-    hg18_dna           1 TTTATGTCTATAATCCTTACCAAAAGTTACCTTGGAATAAGAAGAAGTCA     50
-
-    hg18_dna          51 GTAAAAAGAAGGCTGTTGTTCCGTGAAATACTG     83
-                       |||||||||||||||||||||||||||||||||
-    hg18_dna          51 GTAAAAAGAAGGCTGTTGTTCCGTGAAATACTG     83
-
-    #---------------------------------------
-    #---------------------------------------
-
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_newcpgreport.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-<tool id="EMBOSS: newcpgreport57" name="newcpgreport" version="5.0.0">
-  <description>Report CpG rich areas</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>newcpgreport -sequence $input1 -window $window -shift $shift -minlen $minlen -minpc $minpc -outfile $out_file1 -minoe $minoe -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="window" size="4" type="text" value="100">
-      <label>Window Size</label>
-    </param>
-    <param name="shift" size="4" type="text" value="1">
-      <label>Step size (shift)</label>
-    </param>
-    <param name="minlen" size="4" type="text" value="200">
-      <label>Minimum length</label>
-    </param>
-    <param name="minoe" size="4" type="text" value="0.6">
-      <label>Minimum average observed to expected ratio</label>
-    </param>
-    <param name="minpc" size="4" type="text" value="50.0">
-      <label>Minimum average percentage of G plus C</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="newcpgreport" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="100"/>
-      <param name="shift" value="1"/>
-      <param name="minlen" value="200"/>
-      <param name="minoe" value="0.6"/>
-      <param name="minpc" value="50.0"/>
-      <output name="out_file1" file="emboss_newcpgreport_out.newcpgreport"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/newcpgreport.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_newcpgseek.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-<tool id="EMBOSS: newcpgseek58" name="newcpgseek" version="5.0.0">
-  <description>Reports CpG rich region</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>newcpgseek -sequence $input1 -outfile $out_file1 -score $score -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="score" size="4" type="text" value="17">
-      <label>CpG score</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="newcpgseek" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="score" value="17"/>
-      <output name="out_file1" file="emboss_newcpgseek_out.newcpgseek"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/newcpgseek.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_newseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="EMBOSS: newseq59" name="newseq" version="5.0.0">
-  <description>Type in a short new sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>newseq -outseq $out_file1 -name "$seqname" -description "$description" -type $type -sequence "$sequence" -osformat5 $out_format1 -auto</command>
-  <inputs>
-    <param name="seqname" size="10" type="text" value="">
-      <label>Name of of the sequence</label>
-    </param>
-    <param name="description" size="10" type="text" value="">
-      <label>Description of the sequence</label>
-    </param>
-    <param name="type" type="select">
-      <label>Type of sequence</label>
-      <option value="N">Nucleic</option>
-      <option value="P">Protein</option>
-    </param>
-    <param name="sequence" size="50" type="text" value="">
-      <label>The sequence itself</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="seqname" value="cytoc"/>
-      <param name="description" value="fragment of cytochrome c"/>
-      <param name="type" value="N"/>
-      <param name="sequence" value="KKKEERADLIAY"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_newseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/newseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_noreturn.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-<tool id="EMBOSS: noreturn60" name="noreturn" version="5.0.0">
-  <description>Removes carriage return from ASCII files</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>noreturn -infile $input1 -outfile $out_file1 -system $system -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="system" type="select">
-      <label>Target operating system for end-of-line format</label>
-      <option value="unix">Unix/Linux systems</option>
-      <option value="pc">Windows/DOS</option>
-      <option value="mac">Apple Macintosh</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="noreturn" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="system" value="unix"/>
-      <output name="out_file1" file="emboss_noreturn_out.noreturn"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/noreturn.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_notseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-<tool id="EMBOSS: notseq61" name="notseq" version="5.0.0">
-  <description>Exclude a set of sequences and write out the remaining ones</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>notseq -sequence $input1 -outseq $out_file1 -exclude "$exclude" -osformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>On query</label>
-    </param>
-    <param name="exclude" size="50" type="text" value="">
-      <label>Exclude list</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="exclude" value="AA"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_notseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/notseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_nthseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-<tool id="EMBOSS: nthseq62" name="nthseq" version="5.0.0">
-  <description>Writes one sequence from a multiple set of sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>nthseq -sequence $input1 -outseq $out_file1 -number $number -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="number" size="4" type="text" value="1">
-      <label>Number of the sequence to output</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="number" value="1"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_nthseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/nthseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_octanol.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-<tool id="EMBOSS: octanol63" name="octanol" version="5.0.0">
-  <!-- graphics output with filename, no functional tests added -->
-  <description>Displays protein hydropathy</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl octanol -sequence $input1 -graph png -goutfile $out_file1 -width $width -octanolplot $octanolplot -interfaceplot $interfaceplot
-  -differenceplot $differenceplot -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="width" size="4" type="text" value="19">
-      <label>Window size</label>
-    </param>
-    <param name="octanolplot" type="select">
-      <label>Display the octanol plot</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="interfaceplot" type="select">
-      <label>Display the interface plot</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="differenceplot" type="select">
-      <label>Display the difference plot</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/octanol.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_oddcomp.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<tool id="EMBOSS: oddcomp64" name="oddcomp" version="5.0.0">
-  <!-- output contains file location info, commented out functional tests -->
-  <description>Find protein sequence regions with a biased composition</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>oddcomp -sequence $input1 -infile $input2 -outfile $out_file1 -window $window -ignorebz $ignorebz -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>This is a file in the format of the output produced by 'compseq' that is used to set the minimum frequencies of words in this analysis</label>
-    </param>
-    <param name="window" size="4" type="text" value="30">
-      <label>This is the size of window in which to count. Thus if you want to count frequencies in a 40 aa stretch you should enter 40 here</label>
-    </param>
-    <param name="ignorebz" type="select">
-      <label>The amino acid code B represents Asparagine or Aspartic acid and the code Z represents Glutamine or Glutamic acid. These are not commonly used codes and you may wish not to count words
-      containing them, just noting them in the count of 'Other' words</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="oddcomp" name="out_file1" />
-  </outputs>
-<!--    <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="emboss_compseq_out.compseq"/>
-      <param name="window" value="30"/>
-      <param name="ignorebz" value="yes"/>
-      <output name="out_file1" file="emboss_oddcomp_out.oddcomp"/>
-    </test>
-  </tests> -->
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/oddcomp.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_palindrome.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="EMBOSS: palindrome65" name="palindrome" version="5.0.0">
-  <description>Looks for inverted repeats in a nucleotide sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>palindrome -sequence $input1 -outfile $out_file1 -minpallen $minpallen -maxpallen $maxpallen -gaplimit $gaplimit -nummismatches $nummismatches -overlap $overlap -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="minpallen" size="4" type="text" value="10">
-      <label>Minimum length of palindrome</label>
-    </param>
-    <param name="maxpallen" size="4" type="text" value="100">
-      <label>Maximum length of palindrome</label>
-    </param>
-    <param name="gaplimit" size="4" type="text" value="100">
-      <label>Maximum gap between repeated regions</label>
-    </param>
-    <param name="nummismatches" size="4" type="text" value="0">
-      <label>Number of mismatches allowed</label>
-    </param>
-    <param name="overlap" type="select">
-      <label>Report overlapping matches</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="palindrome" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="minpallen" value="10"/>
-      <param name="maxpallen" value="100"/>
-      <param name="gaplimit" value="100"/>
-      <param name="nummismatches" value="0"/>
-      <param name="overlap" value="yes"/>
-      <output name="out_file1" file="emboss_palindrome_out.palindrome"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/palindrome.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_pasteseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-<tool id="EMBOSS: pasteseq66" name="pasteseq" version="5.0.0">
-  <description>Insert one sequence into another</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>pasteseq -asequence $input2 -bsequence $input1 -outseq $out_file1 -pos $pos -osformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Main sequence</label>
-    </param>
-    <param format="fasta" name="input2" type="data">
-      <label>Sequence to insert</label>
-    </param>
-    <param name="pos" size="4" type="text" value="0">
-      <label>Position to insert after</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.fasta"/>
-      <param name="input2" value="2.fasta"/>
-      <param name="pos" value="0"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_pasteseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input datasets need to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pasteseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_patmatdb.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-<tool id="EMBOSS: patmatdb67" name="patmatdb" version="5.0.0">
-  <description>Search a protein sequence with a motif</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>patmatdb -sequence $input1 -outfile $out_file1 -motif "$motif" -rformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Main sequence</label>
-    </param>
-    <param name="motif" size="4" type="text" value="">
-      <label>Motif to search for</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="dbmotif">DbMotif</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="dbmotif" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="motif" value="aa"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_patmatdb_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/patmatdb.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_pepcoil.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-<tool id="EMBOSS: pepcoil68" name="pepcoil" version="5.0.0">
-  <description>Predicts coiled coil regions</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>pepcoil -sequence $input1 -outfile $out_file1 -window $window -coil $coil -frame $frame -other $other -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="window" size="4" type="text" value="28">
-      <label>Window size</label>
-    </param>
-    <param name="coil" type="select">
-      <label>Report coiled coil regions</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="frame" type="select">
-      <label>Show coil frameshifts</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="other" type="select">
-      <label>Report non coiled coil regions</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="pepcoil" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="28"/>
-      <param name="coil" value="yes"/>
-      <param name="frame" value="yes"/>
-      <param name="other" value="yes"/>
-      <output name="out_file1" file="emboss_pepcoil_out.pepcoil"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pepcoil.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_pepinfo.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-<tool id="EMBOSS: pepinfo69" name="pepinfo" version="5.0.0">
-  <!-- puts file info in output files -->
-  <description>Plots simple amino acid properties in parallel</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl pepinfo -sequence $input1 -outfile $out_file1 -goutfile $out_file2 -graph png -hwindow $hwindow $plot_type -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="hwindow" size="4" type="text" value="9">
-      <label>Window size for hydropathy averaging</label>
-    </param>
-    <param name="plot_type" type="select">
-      <label>Choose a plot type</label>
-      <option value="-generalplot yes -hydropathyplot no">Histogram of general properties</option>
-      <option value="-generalplot no -hydropathyplot yes">Graphs of hydropathy</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="pepinfo" name="out_file1" />
-    <data format="png" name="out_file2" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pepinfo.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_pepnet.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<tool id="EMBOSS: pepnet70" name="pepnet" version="5.0.0">
-  <!-- graphical output file with path information -->
-  <description>Displays proteins as a helical net</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>pepnet -sequence $input1 -graph png -goutfile $out_file1 -squares $squares -diamonds $diamonds -octags $octags -amphipathic $amphipathic -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="squares" size="10" type="text" value="ILVM">
-      <label>Residues to mark with squares</label>
-    </param>
-    <param name="diamonds" size="10" type="text" value="DENQST">
-      <label>Residues to mark with diamonds</label>
-    </param>
-    <param name="octags" size="10" type="text" value="HKR ">
-      <label>Residues to mark with octagons</label>
-    </param>
-    <param name="amphipathic" type="select">
-      <label>If this is true then the residues ACFGILMVWY are marked as squares and all other residues are unmarked. This overrides any other markup that you may have specified</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pepnet.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_pepstats.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<tool id="EMBOSS: pepstats71" name="pepstats" version="5.0.0">
-  <description>Protein statistics</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>pepstats -sequence $input1 -outfile $out_file1 -termini $termini -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="termini" type="select">
-      <label>Include charge at N and C terminus</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="pepstats" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="termini" value="yes"/>
-      <output name="out_file1" file="emboss_pepstats_out.pepstats"/>
-    </test>
-  </tests>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pepstats.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_pepwheel.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-<tool id="EMBOSS: pepwheel72" name="pepwheel" version="5.0.0">
-  <!-- produces png file -->
-  <description>Shows protein sequences as helices</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl pepwheel -sequence $input1 -graph png -goutfile $out_file1 -squares $squares -diamonds $diamonds -octags $octags -amphipathic
-  $amphipathic -steps $steps -turns $turns -wheel $wheel -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="steps" size="10" type="text" value="18">
-      <label>Steps, the number of residues plotted per turn is this value divided by the 'turns' value</label>
-    </param>
-    <param name="turns" size="10" type="text" value="5">
-      <label>Turns, the number of residues plotted per turn is the 'steps' value divided by this value</label>
-    </param>
-    <param name="squares" size="10" type="text" value="ILVM">
-      <label>Residues to mark with squares</label>
-    </param>
-    <param name="diamonds" size="10" type="text" value="DENQST">
-      <label>Residues to mark with diamonds</label>
-    </param>
-    <param name="octags" size="10" type="text" value="HKR">
-      <label>Residues to mark with octagons</label>
-    </param>
-    <param name="wheel" type="select">
-      <label>Plot the wheel</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="amphipathic" type="select">
-      <label>If this is true then the residues ACFGILMVWY are marked as squares and all other residues are unmarked. This overrides any other markup that you may have specified</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pepwheel.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_pepwindow.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-<tool id="EMBOSS: pepwindow73" name="pepwindow" version="5.0.0">
-  <!-- produces png file -->
-  <description>Displays protein hydropathy</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl pepwindow -sequence $input1 -graph png -goutfile $out_file1 -length $length -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="length" size="10" type="text" value="7">
-      <label>Window size</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pepwindow.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_pepwindowall.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-<tool id="EMBOSS: pepwindowall74" name="pepwindowall" version="5.0.0">
-  <!-- produces png file -->
-  <description>Displays protein hydropathy of a set of sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl pepwindowall -sequence $input1 -graph png -goutfile $out_file1 -length $length -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="length" size="10" type="text" value="7">
-      <label>Window size</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/pepwindowall.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_plotcon.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-<tool id="EMBOSS: plotcon75" name="plotcon" version="5.0.0">
-  <!-- produces png file -->
-  <description>Plot quality of conservation of a sequence alignment</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl plotcon -sequences $input1 -graph png -goutfile $out_file1 -winsize $winsize -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="winsize" size="10" type="text" value="4">
-      <label>Number of columns to average alignment quality over</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/plotcon.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_plotorf.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<tool id="EMBOSS: plotorf76" name="plotorf" version="5.0.0">
-  <!-- produces png file output -->
-  <description>Plot potential open reading frames</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl plotorf -sequence $input1 -graph png -goutfile $out_file1 -start $start -stop $stop -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="start" size="15" type="text" value="ATG">
-      <label>Start codons</label>
-    </param>
-    <param name="stop" size="15" type="text" value="TAA">
-      <label>Stop codons</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <!--  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="start" value="ATG"/>
-      <param name="stop" value="TAA"/>
-      <output name="out_file1" file="emboss_plotorf_out.png"/>
-    </test>
-  </tests> -->
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/plotorf.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_polydot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-<tool id="EMBOSS: polydot77" name="polydot" version="5.0.0">
-  <!-- produces png file, not added functional tests -->
-  <description>Displays all-against-all dotplots of a set of sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl polydot -sequence $input1 -graph png -goutfile $output2 -outfeat $output1 -wordsize $wordsize -boxit $boxit -dumpfeat yes -gap
-  $gap -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="wordsize" size="10" type="text" value="6">
-      <label>Word size</label>
-    </param>
-    <param name="boxit" type="select">
-      <label>Draw a box around each dotplot</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="gap" size="10" type="text" value="10">
-      <label>Size of gap</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="polydot" name="output1" />
-    <data format="png" name="output2" />
-  </outputs>
-<!--    <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="wordsize" value="6"/>
-      <param name="boxit" value="yes"/>
-      <param name="gap" value="10"/>
-      <output name="output1" file="emboss_polydot_out.png"/>
-    </test>
-  </tests>-->
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/polydot.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_preg.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-<tool id="EMBOSS: preg78" name="preg" version="5.0.0">
-  <description>Regular expression search of a protein sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>preg -sequence $input1 -outfile $out_file1 -pattern "$pattern" -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param name="pattern" size="50" type="text" value="(ACD)">
-      <label>Regular expression pattern</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="preg" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/preg.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_prettyplot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-<tool id="EMBOSS: prettyplot79" name="prettyplot" version="5.0.0">
-  <!-- produces png output with file name -->
-  <description>Displays aligned sequences, with colouring and boxing</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>prettyplot -sequences $input1 -graph png -goutfile $out_file1 -residuesperline $residuesperline -resbreak $resbreak -ccolours $ccolours -cidentity $cidentity -csimilarity $csimilarity
-  -cother $cother -docolour $docolour -gtitle $title -pair $pair -identity $identity -box $box -boxcol $boxcol -boxcolval $boxcolval -name $name -maxnamelen $maxnamelen -number $number -listoptions
-  $listoptions -consensus $consensus -collision $collision -alternative $alternative -showscore $showscore -portrait $portrait -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="residuesperline" size="10" type="text" value="50">
-      <label>The number of residues to be displayed on each line</label>
-    </param>
-    <param name="resbreak" size="10" type="text" value="50">
-      <label>Residues before a space</label>
-    </param>
-    <param name="ccolours" type="select">
-      <label>Colour residues by their consensus value</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="cidentity" size="10" type="text" value="RED">
-      <label>Colour to display identical residues</label>
-    </param>
-    <param name="csimilarity" size="10" type="text" value="GREEN">
-      <label>Colour to display similar residues</label>
-    </param>
-    <param name="cother" size="10" type="text" value="BLACK">
-      <label>Colour to display other residues</label>
-    </param>
-    <param name="docolour" type="select">
-      <label>Colour residues by table oily, amide etc.</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="title" type="select">
-      <label>Display the title</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="pair" size="10" type="text" value="1.5,1.0,0.5">
-      <label>Values to represent identical similar related</label>
-    </param>
-    <param name="identity" size="10" type="text" value="0">
-      <label>Only match those which are identical in all sequences</label>
-    </param>
-    <param name="box" type="select">
-      <label>Display prettyboxes</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="boxcol" type="select">
-      <label>Colour the background in the boxes</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="boxcolval" size="10" type="text" value="GREY">
-      <label>Colour to be used for background</label>
-    </param>
-    <param name="name" type="select">
-      <label>Display the sequence names</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="maxnamelen" size="10" type="text" value="10">
-      <label>Margin size for the sequence name</label>
-    </param>
-    <param name="number" type="select">
-      <label>Display the residue number</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="listoptions" type="select">
-      <label>Display the date and options used</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="consensus" type="select">
-      <label>Display the consensus</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="collision" type="select">
-      <label>Allow collisions in calculating consensus</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="alternative" type="select">
-      <label>Use alternative collisions routine</label>
-      <option value="0">Normal collision check</option>
-      <option value="1">Checks identical scores with the max score found. So if any other residue matches the identical score then a collision has occurred</option>
-      <option value="2">If another residue has a greater than or equal to matching score and these do not match then a collision has occurred</option>
-      <option value="3">Checks all those not in the current consensus.If any of these give a top score for matching or identical scores then a collision has occured</option>
-    </param>
-    <param name="showscore" size="10" type="text" value="-1">
-      <label>Print residue scores</label>
-    </param>
-    <param name="portrait" type="select">
-      <label>Set page to Portrait</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/prettyplot.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_prettyseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="EMBOSS: prettyseq80" name="prettyseq" version="5.0.0">
-  <description>Output sequence with translated ranges</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>prettyseq -sequence $input1 -outfile $out_file1 -ruler $ruler -plabel $plabel -nlabel $nlabel -width $width -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="ruler" type="select">
-      <label>Add a ruler</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="plabel" type="select">
-      <label>Number translations</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="nlabel" type="select">
-      <label>Number DNA sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="width" size="4" type="text" value="60">
-      <label>Width of screen</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="prettyseq" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="ruler" value="yes"/>
-      <param name="plabel" value="yes"/>
-      <param name="nlabel" value="yes"/>
-      <param name="width" value="60"/>
-      <output name="out_file1" file="emboss_prettyseq_out.prettyseq"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/prettyseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_primersearch.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<tool id="EMBOSS: primersearch81" name="primersearch" version="5.0.0">
-  <description>Searches DNA sequences for matches with primer pairs</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>primersearch -seqall $input1 -infile $input2 -outfile $out_file1 -mismatchpercent $mismatchpercent -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Main sequences</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Primer file</label>
-    </param>
-    <param name="mismatchpercent" size="4" type="text" value="0">
-      <label>Allowed percent mismatch</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="primersearch" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="emboss_primersearch.fasta"/>
-      <param name="mismatchpercent" value="0"/>
-      <output name="out_file1" file="emboss_primersearch_out.primersearch"/>
-    </test>
-  </tests>
-  <help>
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/primersearch.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_revseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-<tool id="EMBOSS: revseq82" name="revseq" version="5.0.0">
-  <description>Reverse and complement a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>revseq -sequence $input1 -outseq $out_file1 -reverse $reverse -complement $complement -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="reverse" type="select">
-      <label>Reverse the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="complement" type="select">
-      <label>Complement the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="reverse" value="yes"/>
-      <param name="complement" value="yes"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_revseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/revseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_seqmatchall.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-<tool id="EMBOSS: seqmatchall83" name="seqmatchall" version="5.0.0">
-  <description>All-against-all comparison of a set of sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>seqmatchall -sequence $input1 -outfile $out_file1 -wordsize $wordsize -aformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="wordsize" size="4" type="text" value="4">
-      <label>Word size</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Alignment File Format</label>
-      <option value="match">Match (m)</option>
-      <option value="simple">Simple (m)</option>
-      <option value="fasta">FASTA (m)</option>
-      <option value="msf">MSF (m)</option>
-      <option value="srs">SRS (m)</option>
-      <option value="pair">Pair (p)</option>
-      <option value="markx0">Markx0 (p)</option>
-      <option value="markx1">Markx1 (p)</option>
-      <option value="markx2">Markx2 (p)</option>
-      <option value="markx3">Markx3 (p)</option>
-      <option value="markx10">Markx10 (p)</option>
-      <option value="srspair">SRS pair (p)</option>
-      <option value="score">Score (p)</option>
-      <option value="seqmatchall">Seqmatchall Output File</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="seqmatchall" name="out_file1" />.
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="wordsize" value="2"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_seqmatchall_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/seqmatchall.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_seqret.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-<tool id="EMBOSS: seqret84" name="seqret" version="5.0.0">
-  <description>Reads and writes sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>seqret -sequence $input1 -outseq $out_file1 -feature $feature -firstonly $firstonly -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="feature" type="select">
-      <label>Use feature information</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="firstonly" type="select">
-      <label>Read one sequence and stop</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="feature" value="no"/>
-      <param name="firstonly" value="no"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_seqret_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/seqret.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_showfeat.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-<tool id="EMBOSS: showfeat85" name="showfeat" version="5.0.0">
-  <!-- tool gives memory errors -->
-  <description>Show features of a sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>showfeat -sequence $input1 -outfile $out_file1 -matchsource "$matchsource" -matchtype "$matchtype" -matchtag "$matchtag" -matchvalue "$matchvalue" -sort $sort -annotation "$annotation" -id
-  $id -description "$description" -scale "$scale" -width "$width" -collapse $collapse -forward $forward -reverse $reverse -unknown $unknown -strand $strand -source $source -position $position -type
-  $type -tags $tags -values $values -stricttags $stricttags -html $html_out1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="html_out1" type="select">
-      <label>Format output as an HTML table</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="matchsource" size="50" type="text" value="*">
-      <label>Feature source to match</label>
-    </param>
-    <param name="matchtype" size="50" type="text" value="*">
-      <label>Feature type to match</label>
-    </param>
-    <param name="matchtag" size="50" type="text" value="*">
-      <label>Feature tags to match</label>
-    </param>
-    <param name="matchvalue" size="50" type="text" value="*">
-      <label>Tag values to match</label>
-    </param>
-    <param name="sort" type="select">
-      <label>Sort by</label>
-      <option value="start">Start position</option>
-      <option value="source">Source</option>
-      <option value="type">Type</option>
-      <option value="nosort">No sorting done</option>
-      <option value="join">Join coding regions together</option>
-    </param>
-    <param name="annotation" size="50" type="text" value="">
-      <label>Regions to annotate by marking</label>
-    </param>
-    <param name="id" type="select">
-      <label>Display the ID name of the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="description" type="select">
-      <label>Display the description of the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="scale" type="select">
-      <label>Display the scale line</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="width" size="50" type="text" value="60">
-      <label>Screen width</label>
-    </param>
-    <param name="collapse" type="select">
-      <label>Collapse features</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="forward" type="select">
-      <label>Display forward sense features</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="reverse" type="select">
-      <label>Display reverse sense features</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="unknown" type="select">
-      <label>Display unknown sense features</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="strand" type="select">
-      <label>Display the strand of the features</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="source" type="select">
-      <label>Display the source of the features</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="position" type="select">
-      <label>SDisplay the start and end position of the features</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="type" type="select">
-      <label>Display the type of the features</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="tags" type="select">
-      <label>Display the tags and values of the features</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="values" type="select">
-      <label>Display the tag values of the features</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="stricttags" type="select">
-      <label>Display only those tag/value pairs in a feature that match the specified tag and value</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="showfeat" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/showfeat.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_shuffleseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-<tool id="EMBOSS: shuffleseq87" name="shuffleseq" version="5.0.0">
-  <!-- produces random outputs each time -->
-  <description>Shuffles a set of sequences maintaining composition</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>shuffleseq -sequence $input1 -outseq $out_file1 -shuffle "$shuffle" -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="shuffle" size="4" type="text" value="1">
-      <label>Number of shuffles</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/shuffleseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_sigcleave.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-<tool id="EMBOSS: sigcleave88" name="sigcleave" version="5.0.0">
-  <description>Reports protein signal cleavage sites</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>sigcleave -sequence $input1 -outfile $out_file1 -minweight "$minweight" -prokaryote $prokaryote -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="minweight" size="4" type="text" value="3.5">
-      <label>Minimum scoring weight value for the predicted cleavage site</label>
-    </param>
-    <param name="prokaryote" type="select">
-      <label>Specifies the sequence is prokaryotic and changes the default scoring data file</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="motif">Motif</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="motif" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="minweight" value="3.5"/>
-      <param name="prokaryote" value="no"/>
-      <param name="out_format1" value="excel"/>
-      <output name="out_file1" file="emboss_sigcleave_out.tabular"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/sigcleave.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_single_outputfile_wrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-#! /usr/bin/perl -w
-use strict;
-use File::Copy;
-
-my $cmd_string = join (" ",@ARGV);
-my $results = `$cmd_string`;
-my @files = split("\n",$results);
-my $fileNameOut = $ARGV[6];
-my ($drive, $outputDir, $file) = File::Spec->splitpath( $fileNameOut );
-my $destination = $fileNameOut;
-
-foreach my $thisLine (@files)
-{
-	if ($thisLine =~ /Created /)
-	{
-		$thisLine =~ /[\w|\.]+$/;
-		$thisLine =$&;
-		#print "outfile: $thisLine\n";
-		#there is only one file to move, so we can quit after finding it
-		move($drive.$outputDir.$thisLine,$fileNameOut);
-		exit(1);
-	}
-	else
-	{
-		print $thisLine,"\n";
-	}
-}
--- a/tools/emboss_5/emboss_sirna.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-<tool id="EMBOSS: sirna89" name="sirna" version="5.0.0">
-  <description>Finds siRNA duplexes in mRNA</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>sirna -sequence $input1 -outfile $ofile1 -outseq $ofile2 -poliii $poliii -aa $aa -tt $tt -polybase $polybase -context $context -rformat2 $out_format1 -osformat3 $out_format2
-  -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="poliii" type="select">
-      <label>Select only the 21 base probes that start with a purine (Pol III expression vectors)</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="aa" type="select">
-      <label>Select only those 23 base regions that start with AA</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="tt" type="select">
-      <label>Select only those 23 base regions that end with TT</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="polybase" type="select">
-      <label>Report more than those 23 base regions that have no repeat of 4 or more of any bases in a row</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="context" type="select">
-      <label>Displays the whole 23 bases of the region with the first two bases in brackets</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="table">Table</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-    <param name="out_format2" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="table" name="ofile1" />
-    <data format="fasta" name="ofile2" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="poliii" value="no"/>
-      <param name="aa" value="no"/>
-      <param name="tt" value="no"/>
-      <param name="polybase" value="yes"/>
-      <param name="context" value="no"/>
-      <param name="mismatchpercent" value="0"/>
-      <param name="out_format1" value="gff"/>
-      <param name="out_format2" value="fasta"/>
-      <output name="ofile2" file="emboss_sirna_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/sirna.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_sixpack.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-<tool id="EMBOSS: sixpack90" name="sixpack" version="5.0.0">
-  <!-- tool adds file description and timestamp to output data -->
-  <description>Display a DNA sequence with 6-frame translation and ORFs</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>sixpack -sequence $input1 -outfile $ofile1 -outseq $ofile2 -table $table -firstorf $firstorf -lastorf $lastorf -mstart $mstart -reverse $reverse -orfminsize $orfminsize -uppercase
-  "$uppercase" -number $number -width "$width" -length "$length" -margin "$margin" -name $disp_name -description $description -offset "$offset" -html $html_out1 -osformat $out_format2 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="table" type="select">
-      <label>Code to use</label>
-      <option value="0">Standard</option>
-      <option value="1">Standard (with alternative initiation codons)</option>
-      <option value="2">Vertebrate Mitochondrial</option>
-      <option value="3">Yeast Mitochondrial</option>
-      <option value="4">Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
-      <option value="5">Invertebrate Mitochondrial</option>
-      <option value="6">Ciliate Macronuclear and Dasycladacean</option>
-      <option value="9">Echinoderm Mitochondrial</option>
-      <option value="10">Euplotid Nuclear</option>
-      <option value="11">Bacterial</option>
-      <option value="12">Alternative Yeast Nuclear</option>
-      <option value="13">Ascidian Mitochondrial</option>
-      <option value="14">Flatworm Mitochondrial</option>
-      <option value="15">Blepharisma Macronuclear</option>
-      <option value="16">Chlorophycean Mitochondrial</option>
-      <option value="21">Trematode Mitochondrial</option>
-      <option value="22">Scenedesmus obliquus</option>
-      <option value="23">Thraustochytrium Mitochondrial</option>
-    </param>
-    <param name="firstorf" type="select">
-      <label>Count the beginning of a sequence as a possible ORF</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="lastorf" type="select">
-      <label>Count the end of a sequence as a possible ORF</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="mstart" type="select">
-      <label>Displays only ORFs starting with an M</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="reverse" type="select">
-      <label>Display the translation of the DNA sequence in the 3 reverse frames</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="orfminsize" size="4" type="text" value="1">
-      <label>Minimum size of Open Reading Frames (ORFs) to display in the translations</label>
-    </param>
-    <param name="uppercase" size="50" type="text" value="">
-      <label>Regions to put in uppercase</label>
-    </param>
-    <param name="number" type="select">
-      <label>Number the sequence at the beginning and the end of each line</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="width" size="4" type="text" value="60">
-      <label>Number of nucleotides displayed on each line</label>
-    </param>
-    <param name="length" size="4" type="text" value="0">
-      <label>Line length of page</label>
-    </param>
-    <param name="margin" size="4" type="text" value="10">
-      <label>Margin around sequence for numbering</label>
-    </param>
-    <param name="disp_name" type="select">
-      <label>Display the ID name of the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="description" type="select">
-      <label>Display the description of the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="offset" size="4" type="text" value="1">
-      <label>Number from which you want the DNA sequence to be numbered</label>
-    </param>
-    <param name="html_out1" type="select">
-      <label>Format output as an HTML table</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format2" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="sixpack" name="ofile1" />
-    <data format="fasta" name="ofile2" />
-  </outputs>
-<!--    <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="table" value="0"/>
-      <param name="firstorf" value="no"/>
-      <param name="lastorf" value="no"/>
-      <param name="mstart" value="no"/>
-      <param name="reverse" value="no"/>
-      <param name="orfminsize" value="1"/>
-      <param name="uppercase" value=""/>
-      <param name="number" value="no"/>
-      <param name="width" value="60"/>
-      <param name="length" value="0"/>
-      <param name="margin" value="10"/>
-      <param name="disp_name" value="no"/>
-      <param name="description" value="no"/>
-      <param name="offset" value="1"/>
-      <param name="html_out1" value="no"/>
-      <param name="out_format2" value="fasta"/>
-      <output name="ofile2" file="emboss_sixpack_out.fasta"/>
-    </test>
-  </tests> -->
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/sixpack.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_skipseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-<tool id="EMBOSS: skipseq91" name="skipseq" version="5.0.0">
-  <description>Reads and writes sequences, skipping first few</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>skipseq -sequence $input1 -outseq $out_file1 -skip "$skip" -feature $feature -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="skip" size="4" type="text" value="0">
-      <label>Number of sequences to skip at start</label>
-    </param>
-    <param name="feature" type="select">
-      <label>Use feature information</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/skipseq.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_splitter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-<tool id="EMBOSS: splitter92" name="splitter" version="5.0.0">
-  <description>Split a sequence into (overlapping) smaller sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>splitter -sequence $input1 -outseq $out_file1 -size "$size" -overlap "$overlap" -addoverlap $addoverlap -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="size" size="10" type="text" value="10000">
-      <label>Size to split at</label>
-    </param>
-    <param name="overlap" size="4" type="text" value="0">
-      <label>Overlap between split sequences</label>
-    </param>
-    <param name="addoverlap" type="select">
-      <label>Add overlap to size</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="size" value="10000"/>
-      <param name="overlap" value="0"/>
-      <param name="addoverlap" value="no"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_splitter_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/splitter.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_supermatcher.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-<tool id="EMBOSS: supermatcher95" name="supermatcher" version="5.0.0">
-  <!-- puts file information in output report -->
-  <description>Match large sequences against one or more other sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>supermatcher -asequence $input1 -bsequence $input2 -gapopen "$gapopen" -gapextend "$gapextend" -width "$width" -wordlen "$wordlen" -outfile $ofile1 -errorfile $ofile2 -aformat3
-  $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Large sequences</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Sequences to match</label>
-    </param>
-    <param name="gapopen" size="4" type="text" value="10.0">
-      <label>Gap opening penalty</label>
-    </param>
-    <param name="gapextend" size="4" type="text" value="0.5">
-      <label>Gap extension penalty</label>
-    </param>
-    <param name="width" size="4" type="text" value="16">
-      <label>Alignment width</label>
-    </param>
-    <param name="wordlen" size="4" type="text" value="6">
-      <label>Word length for initial matching</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Alignment File Format</label>
-      <option value="simple">Simple (m)</option>
-      <option value="fasta">FASTA (m)</option>
-      <option value="msf">MSF (m)</option>
-      <option value="srs">SRS (m)</option>
-      <option value="pair">Pair (p)</option>
-      <option value="markx0">Markx0 (p)</option>
-      <option value="markx1">Markx1 (p)</option>
-      <option value="markx2">Markx2 (p)</option>
-      <option value="markx3">Markx3 (p)</option>
-      <option value="markx10">Markx10 (p)</option>
-      <option value="srspair">SRS pair (p)</option>
-      <option value="score">Score (p)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="simple" name="ofile1" />
-    <data format="supermatcher" name="ofile2" />
-  </outputs>
-<!--    <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="1.fasta"/>
-      <param name="gapopen" value="10.0"/>
-      <param name="gapextend" value="0.5"/>
-      <param name="width" value="16"/>
-      <param name="wordlen" value="6"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="ofile1" file="emboss_supermatcher_out.fasta"/>
-    </test>
-  </tests> -->
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/supermatcher.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_syco.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,197 +0,0 @@
-<tool id="EMBOSS: syco96" name="syco" version="5.0.0">
-  <!-- graphics output -->
-  <description>Synonymous codon usage Gribskov statistic plot</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl syco -sequence $input1 -graph png -goutfile $ofile1 -outfile $ofile2 -cfile $cfile -window "$window" -uncommon $uncommon -minimum "$minimum"
-  -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="cfile" type="select">
-      <label>Codon Usage File</label>
-      <option value="Ehum.cut">Ehum.cut</option>
-      <option value="Eyeastcai.cut">Eyeastcai.cut</option>
-      <option value="Eacc.cut">Eacc.cut</option>
-      <option value="Eadenovirus5.cut">Eadenovirus5.cut</option>
-      <option value="Eadenovirus7.cut">Eadenovirus7.cut</option>
-      <option value="Eaidlav.cut">Eaidlav.cut</option>
-      <option value="Eanasp.cut">Eanasp.cut</option>
-      <option value="Eani.cut">Eani.cut</option>
-      <option value="Eani_h.cut">Eani_h.cut</option>
-      <option value="Eanidmit.cut">Eanidmit.cut</option>
-      <option value="Easn.cut">Easn.cut</option>
-      <option value="Eath.cut">Eath.cut</option>
-      <option value="Eatu.cut">Eatu.cut</option>
-      <option value="Eavi.cut">Eavi.cut</option>
-      <option value="Ebja.cut">Ebja.cut</option>
-      <option value="Ebly.cut">Ebly.cut</option>
-      <option value="Ebme.cut">Ebme.cut</option>
-      <option value="Ebmo.cut">Ebmo.cut</option>
-      <option value="Ebna.cut">Ebna.cut</option>
-      <option value="Ebov.cut">Ebov.cut</option>
-      <option value="Ebovsp.cut">Ebovsp.cut</option>
-      <option value="Ebst.cut">Ebst.cut</option>
-      <option value="Ebsu.cut">Ebsu.cut</option>
-      <option value="Ebsu_h.cut">Ebsu_h.cut</option>
-      <option value="Ecac.cut">Ecac.cut</option>
-      <option value="Ecal.cut">Ecal.cut</option>
-      <option value="Eccr.cut">Eccr.cut</option>
-      <option value="Ecel.cut">Ecel.cut</option>
-      <option value="Echi.cut">Echi.cut</option>
-      <option value="Echicken.cut">Echicken.cut</option>
-      <option value="Echisp.cut">Echisp.cut</option>
-      <option value="Echk.cut">Echk.cut</option>
-      <option value="Echmp.cut">Echmp.cut</option>
-      <option value="Echnt.cut">Echnt.cut</option>
-      <option value="Echos.cut">Echos.cut</option>
-      <option value="Echzm.cut">Echzm.cut</option>
-      <option value="Echzmrubp.cut">Echzmrubp.cut</option>
-      <option value="Ecpx.cut">Ecpx.cut</option>
-      <option value="Ecre.cut">Ecre.cut</option>
-      <option value="Ecrisp.cut">Ecrisp.cut</option>
-      <option value="Ectr.cut">Ectr.cut</option>
-      <option value="Edayhoff.cut">Edayhoff.cut</option>
-      <option value="Eddi.cut">Eddi.cut</option>
-      <option value="Eddi_h.cut">Eddi_h.cut</option>
-      <option value="Edog.cut">Edog.cut</option>
-      <option value="Edro.cut">Edro.cut</option>
-      <option value="Edro_h.cut">Edro_h.cut</option>
-      <option value="Edrosophila.cut">Edrosophila.cut</option>
-      <option value="Eeca.cut">Eeca.cut</option>
-      <option value="Eeco.cut">Eeco.cut</option>
-      <option value="Eeco_h.cut">Eeco_h.cut</option>
-      <option value="Eecoli.cut">Eecoli.cut</option>
-      <option value="Ef1.cut">Ef1.cut</option>
-      <option value="Efish.cut">Efish.cut</option>
-      <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option>
-      <option value="Eham.cut">Eham.cut</option>
-      <option value="Ehha.cut">Ehha.cut</option>
-      <option value="Ehin.cut">Ehin.cut</option>
-      <option value="Ehma.cut">Ehma.cut</option>
-      <option value="Ehuman.cut">Ehuman.cut</option>
-      <option value="Ekla.cut">Ekla.cut</option>
-      <option value="Ekpn.cut">Ekpn.cut</option>
-      <option value="Ella.cut">Ella.cut</option>
-      <option value="Emac.cut">Emac.cut</option>
-      <option value="Emaize.cut">Emaize.cut</option>
-      <option value="Emam_h.cut">Emam_h.cut</option>
-      <option value="Emixlg.cut">Emixlg.cut</option>
-      <option value="Emouse.cut">Emouse.cut</option>
-      <option value="Emsa.cut">Emsa.cut</option>
-      <option value="Emse.cut">Emse.cut</option>
-      <option value="Emta.cut">Emta.cut</option>
-      <option value="Emtu.cut">Emtu.cut</option>
-      <option value="Emus.cut">Emus.cut</option>
-      <option value="Emussp.cut">Emussp.cut</option>
-      <option value="Emva.cut">Emva.cut</option>
-      <option value="Emze.cut">Emze.cut</option>
-      <option value="Emzecp.cut">Emzecp.cut</option>
-      <option value="Encr.cut">Encr.cut</option>
-      <option value="Eneu.cut">Eneu.cut</option>
-      <option value="Engo.cut">Engo.cut</option>
-      <option value="Eoncsp.cut">Eoncsp.cut</option>
-      <option value="Epae.cut">Epae.cut</option>
-      <option value="Epea.cut">Epea.cut</option>
-      <option value="Epet.cut">Epet.cut</option>
-      <option value="Epfa.cut">Epfa.cut</option>
-      <option value="Ephix174.cut">Ephix174.cut</option>
-      <option value="Ephv.cut">Ephv.cut</option>
-      <option value="Ephy.cut">Ephy.cut</option>
-      <option value="Epig.cut">Epig.cut</option>
-      <option value="Epolyomaa2.cut">Epolyomaa2.cut</option>
-      <option value="Epombe.cut">Epombe.cut</option>
-      <option value="Epombecai.cut">Epombecai.cut</option>
-      <option value="Epot.cut">Epot.cut</option>
-      <option value="Eppu.cut">Eppu.cut</option>
-      <option value="Epse.cut">Epse.cut</option>
-      <option value="Epsy.cut">Epsy.cut</option>
-      <option value="Epvu.cut">Epvu.cut</option>
-      <option value="Erab.cut">Erab.cut</option>
-      <option value="Erabbit.cut">Erabbit.cut</option>
-      <option value="Erabsp.cut">Erabsp.cut</option>
-      <option value="Erat.cut">Erat.cut</option>
-      <option value="Eratsp.cut">Eratsp.cut</option>
-      <option value="Erca.cut">Erca.cut</option>
-      <option value="Erhm.cut">Erhm.cut</option>
-      <option value="Eric.cut">Eric.cut</option>
-      <option value="Erle.cut">Erle.cut</option>
-      <option value="Erme.cut">Erme.cut</option>
-      <option value="Ersp.cut">Ersp.cut</option>
-      <option value="Esalsp.cut">Esalsp.cut</option>
-      <option value="Esau.cut">Esau.cut</option>
-      <option value="Esco.cut">Esco.cut</option>
-      <option value="Esgi.cut">Esgi.cut</option>
-      <option value="Eshp.cut">Eshp.cut</option>
-      <option value="Eshpsp.cut">Eshpsp.cut</option>
-      <option value="Esli.cut">Esli.cut</option>
-      <option value="Eslm.cut">Eslm.cut</option>
-      <option value="Esma.cut">Esma.cut</option>
-      <option value="Esmi.cut">Esmi.cut</option>
-      <option value="Esmu.cut">Esmu.cut</option>
-      <option value="Esoy.cut">Esoy.cut</option>
-      <option value="Espi.cut">Espi.cut</option>
-      <option value="Espn.cut">Espn.cut</option>
-      <option value="Espo.cut">Espo.cut</option>
-      <option value="Espo_h.cut">Espo_h.cut</option>
-      <option value="Espu.cut">Espu.cut</option>
-      <option value="Esta.cut">Esta.cut</option>
-      <option value="Esty.cut">Esty.cut</option>
-      <option value="Esus.cut">Esus.cut</option>
-      <option value="Esv40.cut">Esv40.cut</option>
-      <option value="Esyhsp.cut">Esyhsp.cut</option>
-      <option value="Esynsp.cut">Esynsp.cut</option>
-      <option value="Etbr.cut">Etbr.cut</option>
-      <option value="Etcr.cut">Etcr.cut</option>
-      <option value="Eter.cut">Eter.cut</option>
-      <option value="Etetsp.cut">Etetsp.cut</option>
-      <option value="Etob.cut">Etob.cut</option>
-      <option value="Etobcp.cut">Etobcp.cut</option>
-      <option value="Etom.cut">Etom.cut</option>
-      <option value="Etrb.cut">Etrb.cut</option>
-      <option value="Evco.cut">Evco.cut</option>
-      <option value="Ewht.cut">Ewht.cut</option>
-      <option value="Exel.cut">Exel.cut</option>
-      <option value="Exenopus.cut">Exenopus.cut</option>
-      <option value="Eyeast.cut">Eyeast.cut</option>
-      <option value="Eyen.cut">Eyen.cut</option>
-      <option value="Eysc.cut">Eysc.cut</option>
-      <option value="Eysc_h.cut">Eysc_h.cut</option>
-      <option value="Eyscmt.cut">Eyscmt.cut</option>
-      <option value="Eysp.cut">Eysp.cut</option>
-      <option value="Ezebrafish.cut">Ezebrafish.cut</option>
-      <option value="Ezma.cut">Ezma.cut</option>
-    </param>
-    <param name="window" size="4" type="text" value="30">
-      <label>Averaging window</label>
-    </param>
-    <param name="uncommon" type="select">
-      <label>Show common codon usage</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="minimum" size="4" type="text" value="0.15">
-      <label>Minimum value for a common codon</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="ofile1" />
-    <data format="syco" name="ofile2" />
-  </outputs>
- <!--   <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="cfile" value="Ehum.cut"/>
-      <param name="window" value="30"/>
-      <param name="uncommon" value="no"/>
-      <param name="minimum" value="0.15"/>
-      <output name="ofile2" file="emboss_syco_out.syco"/>
-    </test>
-  </tests> -->
-  <help>
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/syco.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_tcode.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-<tool id="EMBOSS: tcode97" name="tcode" version="5.0.0">
-  <description>Fickett TESTCODE statistic to identify protein-coding DNA</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>tcode -sequence $input1 -outfile $out_file1 -window "$window" -step "$step" -rformat $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="window" size="5" type="text" value="200">
-      <label>Window size</label>
-    </param>
-    <param name="step" size="5" type="text" value="3">
-      <label>Step size</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="table">Table</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="table" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/tcode.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_textsearch.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-<tool id="EMBOSS: textsearch98" name="textsearch" version="5.0.0">
-  <description>Search sequence documentation. Slow, use SRS and Entrez!</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>textsearch -sequence $input1 -outfile $out_file1 -pattern "$pattern" -casesensitive -heading $heading -usa $usa -accession $accession -name $search_name -description $description -html
-  $html_out1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="pattern" size="50" type="text" value="">
-      <label>Pattern to search for</label>
-    </param>
-    <param name="casesensitive" type="select">
-      <label>Do a case-sensitive search</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="heading" type="select">
-      <label>Display column headings</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="usa" type="select">
-      <label>Display the USA of the sequence</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="accession" type="select">
-      <label>Display accession column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="search_name" type="select">
-      <label>Display name column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="description" type="select">
-      <label>Display description column</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="html_out1" type="select">
-      <label>Format output as an HTML table</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="textsearch" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/textsearch.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_tmap.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-<tool id="EMBOSS: tmap99" name="tmap" version="5.0.0">
-  <description>Displays membrane spanning regions</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl tmap -sequences $input1 -outfile $out_file1 -goutfile $out_file2 -graph png -rformat $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="seqtable ">SeqTable</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="table">Table</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="seqtable" name="out_file1" />
-    <data format="png" name="out_file2" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/tmap.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_tranalign.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-<tool id="EMBOSS: tranalign100" name="tranalign" version="5.0.0">
-  <description>Align nucleic coding regions given the aligned proteins</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>tranalign -asequence $input1 -bsequence $input2 -outseq $out_file1 -table $table -osformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Nucleic Sequences</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Protein Sequences</label>
-    </param>
-    <param name="table" type="select">
-      <label>Code to use</label>
-      <option value="0">Standard</option>
-      <option value="1">Standard (with alternative initiation codons)</option>
-      <option value="2">Vertebrate Mitochondrial</option>
-      <option value="3">Yeast Mitochondrial</option>
-      <option value="4">Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
-      <option value="5">Invertebrate Mitochondrial</option>
-      <option value="6">Ciliate Macronuclear and Dasycladacean</option>
-      <option value="9">Echinoderm Mitochondrial</option>
-      <option value="10">Euplotid Nuclear</option>
-      <option value="11">Bacterial</option>
-      <option value="12">Alternative Yeast Nuclear</option>
-      <option value="13">Ascidian Mitochondrial</option>
-      <option value="14">Flatworm Mitochondrial</option>
-      <option value="15">Blepharisma Macronuclear</option>
-      <option value="16">Chlorophycean Mitochondrial</option>
-      <option value="21">Trematode Mitochondrial</option>
-      <option value="22">Scenedesmus obliquus</option>
-      <option value="23">Thraustochytrium Mitochondrial</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3.fasta"/>
-      <param name="input2" value="2.pep"/>
-      <param name="table" value="0"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_tranalign_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/tranalign.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_transeq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-<tool id="EMBOSS: transeq101" name="transeq" version="5.0.0">
-  <description>Translate nucleic acid sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>transeq -sequence $input1 -outseq $out_file1 -frame $frame -table $table -regions "$regions" -trim $trim -clean $clean -alternative $alternative -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="frame" type="select">
-      <label>Frame(s) to translate</label>
-      <option value="1">Frame 1</option>
-      <option value="2">Frame 2</option>
-      <option value="3">Frame 3</option>
-      <option value="F">Forward three frames</option>
-      <option value="-1">Frame -1</option>
-      <option value="-2">Frame -2</option>
-      <option value="-3">Frame -3</option>
-      <option value="R">Reverse three frames</option>
-      <option value="6">All six frames</option>
-    </param>
-    <param name="table" type="select">
-      <label>Code to use</label>
-      <option value="0">Standard</option>
-      <option value="1">Standard (with alternative initiation codons)</option>
-      <option value="2">Vertebrate Mitochondrial</option>
-      <option value="3">Yeast Mitochondrial</option>
-      <option value="4">Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
-      <option value="5">Invertebrate Mitochondrial</option>
-      <option value="6">Ciliate Macronuclear and Dasycladacean</option>
-      <option value="9">Echinoderm Mitochondrial</option>
-      <option value="10">Euplotid Nuclear</option>
-      <option value="11">Bacterial</option>
-      <option value="12">Alternative Yeast Nuclear</option>
-      <option value="13">Ascidian Mitochondrial</option>
-      <option value="14">Flatworm Mitochondrial</option>
-      <option value="15">Blepharisma Macronuclear</option>
-      <option value="16">Chlorophycean Mitochondrial</option>
-      <option value="21">Trematode Mitochondrial</option>
-      <option value="22">Scenedesmus obliquus</option>
-      <option value="23">Thraustochytrium Mitochondrial</option>
-    </param>
-    <param name="regions" size="10" type="text" value="">
-      <label>Regions to translate</label>
-    </param>
-    <param name="trim" type="select">
-      <label>Remove all 'X' and '*' characters from the right end of the translation</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="clean" type="select">
-      <label>Change all STOP codon positions from the '*' character to 'X'</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="alternative" type="select">
-      <label>Define frame '-1' as using the set of codons starting with the last codon of the sequence</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="frame" value="1"/>
-      <param name="table" value="0"/>
-      <param name="regions" value=""/>
-      <param name="trim" value="no"/>
-      <param name="clean" value="no"/>
-      <param name="alternative" value="no"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_transeq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/transeq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_trimest.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-<tool id="EMBOSS: trimest102" name="trimest" version="5.0.0">
-  <description>Trim poly-A tails off EST sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>trimest -sequence $input1 -outseq $out_file1 -minlength "$minlength" -mismatches "$mismatches" -reverse $reverse -tolower $tolower -fiveprime $fiveprime -osformat2 $out_format1
-  -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="minlength" size="4" type="text" value="4">
-      <label>Minimum length that a poly-A (or poly-T) tail must have before it is removed</label>
-    </param>
-    <param name="mismatches" size="4" type="text" value="1">
-      <label>Number of fewer mismatched non-A bases in a poly-A tail</label>
-    </param>
-    <param name="reverse" type="select">
-      <label>Change the sequence to the forward sense when it is written out</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="tolower" type="select">
-      <label>Mask poly-A by converting to lowercase</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="fiveprime" type="select">
-      <label>Inspect 5' end of the sequence for poly-T tails</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="minlength" value="4"/>
-      <param name="mismatches" value="1"/>
-      <param name="reverse" value="yes"/>
-      <param name="tolower" value="no"/>
-      <param name="fiveprime" value="yes"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_trimest_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/trimest.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_trimseq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-<tool id="EMBOSS: trimseq103" name="trimseq" version="5.0.0">
-  <description>Trim ambiguous bits off the ends of sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>trimseq -sequence $input1 -outseq $out_file1 -window "$window" -percent "$percent" -strict $strict -star $star -left $left -right $right -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="window" size="4" type="text" value="1">
-      <label>Window size</label>
-    </param>
-    <param name="percent" size="5" type="text" value="100.0">
-      <label>Threshold of the percentage ambiguity</label>
-    </param>
-    <param name="strict" type="select">
-      <label>Trim all ambiguity codes</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="star" type="select">
-      <label>In protein sequences, trim off not only X's, but also the *'s</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="left" type="select">
-      <label>Trim at the start</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="right" type="select">
-      <label>Trim at the end</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="1"/>
-      <param name="percent" value="100.0"/>
-      <param name="strict" value="no"/>
-      <param name="star" value="no"/>
-      <param name="left" value="yes"/>
-      <param name="right" value="yes"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_trimseq_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/trimseq.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_twofeat.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-<tool id="EMBOSS: twofeat104" name="twofeat" version="5.0.0">
-  <description>Finds neighbouring pairs of features in sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>twofeat -sequence $input1 -outfile $out_file1 -atype "$atype" -btype "$btype" -minrange "$minrange" -maxrange "$maxrange" -asource "$asource" -asense $asense -aminscore "$aminscore"
-  -amaxscore "$amaxscore" -atag "$atag" -avalue "$avalue" -bsource "$bsource" -bsense "$bsense" -bminscore "$bminscore" -bmaxscore "$bmaxscore" -btag "$btag" -bvalue "$bvalue" -overlap $overlap
-  -rangetype $rangetype -sense $sense -order $order -twoout $twoout -typeout "$typeout" -rformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="data" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="atype" size="50" type="text" value="*">
-      <label>Feature type you wish to allow. Feature 1</label>
-    </param>
-    <param name="btype" size="50" type="text" value="*">
-      <label>Feature type you wish to allow. Feature 2</label>
-    </param>
-    <param name="minrange" size="5" type="text" value="0">
-      <label>Minimun range</label>
-    </param>
-    <param name="maxrange" size="5" type="text" value="0">
-      <label>Maximum range</label>
-    </param>
-    <param name="asource" size="50" type="text" value="*">
-      <label>Feature source 1</label>
-    </param>
-    <param name="asense" type="select">
-      <label>Feature sense 1</label>
-      <option value="0">Any sense</option>
-      <option value="+">Forward sense</option>
-      <option value="-">Reverse sense</option>
-    </param>
-    <param name="aminscore" size="5" type="text" value="0.0">
-      <label>Feature 1 minimum score</label>
-    </param>
-    <param name="amaxscore" size="5" type="text" value="0.0">
-      <label>Feature1 maxiumum score</label>
-    </param>
-    <param name="atag" size="50" type="text" value="*">
-      <label>Feature 1 tag</label>
-    </param>
-    <param name="avalue" size="50" type="text" value="*">
-      <label>Tag 1 value</label>
-    </param>
-    <param name="bsource" size="50" type="text" value="*">
-      <label>Feature 2 source</label>
-    </param>
-    <param name="bsense" type="select">
-      <label>Feature 2 sense</label>
-      <option value="0">Any sense</option>
-      <option value="+">Forward sense</option>
-      <option value="-">Reverse sense</option>
-    </param>
-    <param name="bminscore" size="5" type="text" value="0.0">
-      <label>Feature 2 miniumum score</label>
-    </param>
-    <param name="bmaxscore" size="5" type="text" value="0.0">
-      <label>Feature 2 maximum score</label>
-    </param>
-    <param name="btag" size="50" type="text" value="*">
-      <label>Feature 2 tag</label>
-    </param>
-    <param name="bvalue" size="50" type="text" value="*">
-      <label>Feature 2 tag value</label>
-    </param>
-    <param name="overlap" type="select">
-      <label>opverlaps allowed</label>
-      <option value="A">Any</option>
-      <option value="O">Overlap required but not within</option>
-      <option value="NO">No overlaps are allowed</option>
-      <option value="NW:">Overlap required but not within</option>
-      <option value="AW">A must be all within B</option>
-      <option value="BW">B must be all within A</option>
-    </param>
-    <param name="rangetype" type="select">
-      <label>How to determine range</label>
-      <option value="N">From nearest ends</option>
-      <option value="L">From left ends</option>
-      <option value="R">From right ends</option>
-      <option value="F">From furthest ends</option>
-    </param>
-    <param name="sense" type="select">
-      <label>Required sense</label>
-      <option value="A">Any sense</option>
-      <option value="S">Same sense</option>
-      <option value="O">Opposite sense</option>
-    </param>
-    <param name="order" type="select">
-      <label>Required order of the two features</label>
-      <option value="A">Any</option>
-      <option value="AB">Feature A then feature B</option>
-      <option value="BA">Feature B then feature A</option>
-    </param>
-    <param name="twoout" type="select">
-      <label>Write out the two features themselves</label>
-      <option value="no">No</option>
-      <option value="yes">Yes</option>
-    </param>
-    <param name="typeout" size="50" type="text" value="misc_feature">
-      <label>New feature type</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Report File Format</label>
-      <option value="table">Table</option>
-      <option value="embl">EMBL</option>
-      <option value="genbank">GENBANK</option>
-      <option value="gff">GFF</option>
-      <option value="pir">PIR</option>
-      <option value="swiss">SwissProt</option>
-      <option value="dbmotif">DbMotif</option>
-      <option value="diffseq">Diffseq</option>
-      <option value="excel">Excel (tab delimited)</option>
-      <option value="feattable">FeatTable</option>
-      <option value="motif">Motif</option>
-      <option value="regions">Regions</option>
-      <option value="seqtable">SeqTable</option>
-      <option value="simple">SRS Simple</option>
-      <option value="srs">SRS</option>
-      <option value="tagseq">TagSeq</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="table" name="out_file1" />
-  </outputs>
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/twofeat.html
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/emboss_5/emboss_union.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-<tool id="EMBOSS: union105" name="union" version="5.0.0">
-  <description>Reads sequence fragments and builds one sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>union -sequence $input1 -outseq $out_file1 -osformat2 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="out_format1" value="fasta"/>
-      <output name="out_file1" file="emboss_union_out.fasta"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/union.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_vectorstrip.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-<tool id="EMBOSS: vectorstrip106" name="vectorstrip" version="5.0.0">
-  <description>Strips out DNA between a pair of vector sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>vectorstrip -sequence $input1 -vectorsfile $input2 -outseq $ofile1 -outfile $ofile2 -vectorfile yes -mismatch "$mismatch" -besthits $besthits -linkera "$linkera" -linkerb
-  "$linkerb" -osformat4 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequences</label>
-    </param>
-    <param format="data" name="input2" type="data">
-      <label>Vector file</label>
-    </param>
-    <param name="mismatch" size="4" type="text" value="10">
-      <label>Max allowed percent mismatch</label>
-    </param>
-    <param name="besthits" type="select">
-      <label>Show only the best hits (minimize mismatches)</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="linkera" size="50" type="text" value="">
-      <label>The 5' sequence</label>
-    </param>
-    <param name="linkerb" size="50" type="text" value="">
-      <label>The 3' sequence</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Sequence File Format</label>
-      <option value="fasta">FASTA (m)</option>
-      <option value="acedb">ACeDB (m)</option>
-      <option value="asn1">ASN.1 (m)</option>
-      <option value="clustal">Clustal (m)</option>
-      <option value="codata">CODATA (m)</option>
-      <option value="embl">EMBL (m)</option>
-      <option value="fitch">Fitch (m)</option>
-      <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option>
-      <option value="genbank">GENBANK (m)</option>
-      <option value="gff">GFF (m)</option>
-      <option value="hennig86">Hennig86 (m)</option>
-      <option value="ig">Intelligenetics (m)</option>
-      <option value="jackknifer">Jackknifer (m)</option>
-      <option value="jackknifernon">Jackknifernon (m)</option>
-      <option value="mega">Mega (m)</option>
-      <option value="meganon">Meganon (m)</option>
-      <option value="msf">Wisconsin Package GCG's MSF (m)</option>
-      <option value="pir">NBRF (PIR) (m)</option>
-      <option value="ncbi">NCBI style FASTA (m)</option>
-      <option value="nexus">Nexus/PAUP (m)</option>
-      <option value="nexusnon">Nexusnon/PAUPnon (m)</option>
-      <option value="phylip">PHYLIP interleaved (m)</option>
-      <option value="phylipnon">PHYLIP non-interleaved (m)</option>
-      <option value="selex">SELEX (m)</option>
-      <option value="staden">Staden (s)</option>
-      <option value="strider">DNA strider (m)</option>
-      <option value="swiss">SwisProt entry (m)</option>
-      <option value="text">Plain sequence (s)</option>
-      <option value="treecon">Treecon (m)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="ofile1" />
-    <data format="vectorstrip" name="ofile2" />
-  </outputs>
-  <!--  <tests>
-    <test>
-      <param name="input1" value="1.fasta"/>
-      <param name="input2" value="2.fasta"/>
-      <param name="mismatch" value="10"/>
-      <param name="besthits" value="yes"/>
-      <param name="linkera" value=""/>
-      <param name="linkerb" value=""/>
-      <param name="out_format1" value="fasta"/>
-      <output name="ofile1" file="emboss_vectorstrip_out.fasta"/>
-    </test>
-  </tests> -->
-  <code file="emboss_format_corrector.py" />
-  <help>
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/vectorstrip.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_water.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<tool id="EMBOSS: water107" name="water" version="5.0.0">
-  <description>Smith-Waterman local alignment</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>water -asequence $input1 -bsequence $input2 -outfile $out_file1 -gapopen "$gapopen" -gapextend "$gapextend" -brief $brief -aformat3 $out_format1 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="fasta" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="gapopen" size="6" type="text" value="10.0">
-      <label>Gap open penalty</label>
-    </param>
-    <param name="gapextend" size="6" type="text" value="0.5">
-      <label>Gap extension penalty</label>
-    </param>
-    <param name="brief" type="select">
-      <label>Brief identity and similarity</label>
-      <option value="yes">Yes</option>
-      <option value="no">No</option>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Alignment File Format</label>
-      <option value="srs">SRS (m)</option>
-      <option value="simple">Simple (m)</option>
-      <option value="fasta">FASTA (m)</option>
-      <option value="msf">MSF (m)</option>
-      <option value="pair">Pair (p)</option>
-      <option value="markx0">Markx0 (p)</option>
-      <option value="markx1">Markx1 (p)</option>
-      <option value="markx2">Markx2 (p)</option>
-      <option value="markx3">Markx3 (p)</option>
-      <option value="markx10">Markx10 (p)</option>
-      <option value="srspair">SRS pair (p)</option>
-      <option value="score">Score (p)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="srs" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="1.fasta"/>
-      <param name="gapopen" value="10.0"/>
-      <param name="gapextend" value="0.5"/>
-      <param name="brief" value="no"/>
-      <param name="out_format1" value="score"/>
-      <output name="out_file1" file="emboss_water_out.score"/>
-    </test>
-  </tests>
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input datasets need to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/water.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_wobble.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<tool id="EMBOSS: wobble108" name="wobble" version="5.0.0">
-  <description>Wobble base plot</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command interpreter="perl">emboss_single_outputfile_wrapper.pl wobble -sequence $input1 -graph png -goutfile $ofile1 -outfile $ofile2 -window "$window" -bases "$bases" -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="window" size="5" type="text" value="30">
-      <label>Window size, in codons</label>
-    </param>
-    <param name="bases" size="6" type="text" value="GC">
-      <label>Bases used</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="ofile1" />
-    <data format="wobble" name="ofile2" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="window" value="30"/>
-      <param name="bases" value="GC"/>
-      <output name="ofile2" file="emboss_wobble_out.wobble"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/wobble.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_wordcount.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-<tool id="EMBOSS: wordcount109" name="wordcount" version="5.0.0">
-  <description>Counts words of a specified size in a DNA sequence</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>wordcount -sequence $input1 -outfile $out_file1 -wordsize "$wordsize" -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence</label>
-    </param>
-    <param name="wordsize" size="5" type="text" value="4">
-      <label>Word size</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="wordcount" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="wordsize" value="4"/>
-      <output name="out_file1" file="emboss_wordcount_out.wordcount"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-The input dataset needs to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/wordcount.html
-  </help>
-</tool>
--- a/tools/emboss_5/emboss_wordmatch.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-<tool id="EMBOSS: wordmatch110" name="wordmatch" version="5.0.0">
-  <description>Finds all exact matches of a given size between 2 sequences</description>
-  <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements>
-  <command>wordmatch -asequence $input1 -bsequence $input2 -outfile $out_file1 -aoutfeat $out_file2 -boutfeat $out_file3 -wordsize "$wordsize" -aformat3 $out_format1 -offormat4 $out_format2
-  -offormat5 $out_format3 -auto</command>
-  <inputs>
-    <param format="fasta" name="input1" type="data">
-      <label>Sequence 1</label>
-    </param>
-    <param format="fasta" name="input2" type="data">
-      <label>Sequence 2</label>
-    </param>
-    <param name="wordsize" size="5" type="text" value="4">
-      <label>Word size</label>
-    </param>
-    <param name="out_format1" type="select">
-      <label>Output Alignment File Format</label>
-      <option value="match">Match (m)</option>
-      <option value="simple">Simple (m)</option>
-      <option value="fasta">FASTA (m)</option>
-      <option value="msf">MSF (m)</option>
-      <option value="srs">SRS (m)</option>
-      <option value="pair">Pair (p)</option>
-      <option value="markx0">Markx0 (p)</option>
-      <option value="markx1">Markx1 (p)</option>
-      <option value="markx2">Markx2 (p)</option>
-      <option value="markx3">Markx3 (p)</option>
-      <option value="markx10">Markx10 (p)</option>
-      <option value="srspair">SRS pair (p)</option>
-      <option value="score">Score (p)</option>
-    </param>
-    <param name="out_format2" type="select">
-      <label>Output Feature 1 File Format</label>
-      <option value="gff">GFF</option>
-      <option value="embl">EMBL</option>
-      <option value="swiss">SwissProt</option>
-    </param>
-    <param name="out_format3" type="select">
-      <label>Output Feature 2 File Format</label>
-      <option value="gff">GFF</option>
-      <option value="embl">EMBL</option>
-      <option value="swiss">SwissProt</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="match" name="out_file1" />
-    <data format="gff" name="out_file2" />
-    <data format="gff" name="out_file3" />
-  </outputs>
-  <!--   <tests>
-    <test>
-      <param name="input1" value="2.fasta"/>
-      <param name="input2" value="1.fasta"/>
-      <param name="wordsize" value="4"/>
-      <param name="out_format1" value="fasta"/>
-      <param name="out_format2" value="gff"/>
-      <param name="out_format3" value="gff"/>
-      <output name="ofile2" file="emboss_wordmatch_out.embl"/>
-    </test>
-  </tests> test takes a long time to run-->
-  <code file="emboss_format_corrector.py" />
-  <help>
-
-.. class:: warningmark
-
-The input datasets need to be sequences.
-
------
-
-    You can view the original documentation here_.
-
-    .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/wordmatch.html
-  </help>
-</tool>
--- a/tools/encode/gencode_partition.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-<tool id="gencode_partition1" name="Gencode Partition">
-  <description>an interval file</description>
-  <command interpreter="python">split_by_partitions.py ${GALAXY_DATA_INDEX_DIR} $input1 $out_file1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol}</command>
-  <inputs>
-    <param name="input1" type="data" format="interval" label="File to Partition"/>
-  </inputs>
-  <outputs>
-    <data name="out_file1" format="bed"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="encode_1.bed"/>
-      <output name="out_file1" file="gencode_partition_out.bed"/>
-    </test>
-  </tests>
-  <help>
-For detailed information about partitioning, click here_.
-
-.. _here: http://genome.imim.es/gencode/wiki/index.php/Collecting_Feature_Sets_from_All_Analysis_Groups
-
-Datasets are partitioned according to the protocol below:
-
-A partition scheme has been defined that is similar to what has previously been done with TARs/TRANSFRAGs such that any feature can be classified as falling into one of the following 6 categories:
-  1. **Coding** -- coding exons defined from the GENCODE experimentally verified coding set (coding in any transcript)
-  2. **5UTR** -- 5' UTR exons defined from the GENCODE experimentally verified coding set (5' UTR in some transcript but never coding in any other)
-  3. **3UTR** -- 3' UTR exons defined from the GENCODE experimentally verified coding set (3' UTR in some transcript but never coding in any other)
-  4. **Intronic Proximal** -- intronic and no more than 5kb away from an exon.
-  5. **Intergenic Proximal** -- between genes and no more than 5kb away from an exon.
-  6. **Intronic Distal** -- intronic and greater than 5kb away from an exon.
-  7. **Intergenic Distal** -- between genes and greater than 5kb away from an exon.
-
------
-
-.. class:: infomark
-
-**Note:** Features overlapping more than one partition will take the identity of the lower-numbered partition.
-
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/encode/random_intervals.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-<tool id="random_intervals1" name="Random Intervals">
-<description>create a random set of intervals</description>
-  <command interpreter="python">random_intervals_no_bits.py $regions $input2 $input1 $out_file1 ${input2.metadata.chromCol} ${input2.metadata.startCol} ${input2.metadata.endCol} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol} $use_mask $strand_overlaps ${GALAXY_DATA_INDEX_DIR}</command>
-  <inputs>
-    <param name="input1" type="data" format="interval" label="File to Mimick">
-      <validator type="unspecified_build" message="Unspecified build, this tool works with data from genome builds hg16 or hg17. Click the pencil icon in your history item to set the genome build."/>
-    </param>
-    <param name="input2" type="data" format="interval" label="Intervals to Mask"/>
-    <param name="use_mask" type="select" label="Use mask">
-      <option value="no_mask">No</option>
-      <option value="use_mask">Yes</option>
-    </param>
-    <param name="strand_overlaps" type="select" label="Allow overlaps">
-      <option value="all">Any</option>
-      <option value="strand">Across Strands</option>
-      <option value="none">None</option>
-    </param>
-    <param name="regions" type="select" label="Regions to use">
-      <options from_file="regions.loc">
-        <column name="name" index="2"/>
-        <column name="value" index="1"/>
-        <column name="dbkey" index="0"/>
-        <filter type="data_meta" ref="input1" key="dbkey" column="0" />
-        <validator type="no_options" message="This tool currently only works with ENCODE data from genome builds hg16 or hg17."/>
-      </options>
-    </param>
-  </inputs>
-  <outputs>
-    <data name="out_file1" format="input"/>
-  </outputs>
-  <help>
-
-.. class:: warningmark
-
-This tool currently only works with ENCODE data from genome builds hg16 or hg17.
-
------
-
-.. class:: infomark
-
-**Note:** If you do not wish to mask a set of intervals, change the Use Mask option to No, this option will override any Mask files selected.
-
------
-
-**Syntax**
-
-This tool will attempt to create a random set of intervals that mimic those found within your source file.  You may also specify a set of intervals to mask.
-
-**Allow overlaps** options
-  * **Across Strands** - random regions are allowed to overlap only if they are on different strands.
-  * **Any** - all overlaps are allowed.
-  * **None** - no overlapping regions are allowed.
-
-**Regions to use** options
-  * Bounding region of interest based on the dataset build.
-
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/encode/random_intervals_no_bits.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,253 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-#%prog bounding_region_file mask_intervals_file intervals_to_mimic_file out_file mask_chr mask_start mask_end interval_chr interval_start interval_end interval_strand use_mask allow_strand_overlaps
-import sys, random
-from copy import deepcopy
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import bx.intervals.io
-import bx.intervals.intersection
-import psyco_full
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-max_iters = 5
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-#Try to add a random region
-def add_random_region( mimic_region, bound, exist_regions, plus_mask, minus_mask, overlaps ):
-    region_length, region_strand = mimic_region
-    plus_count = plus_mask.count_range()
-    minus_count = minus_mask.count_range()
-    gaps = []
-
-    if region_strand == "-":
-        gaps = minus_mask.get_gaps( region_length )
-    else:
-        gaps = plus_mask.get_gaps( region_length )
-
-    while True:
-        try:
-            gap_length, gap_start, gap_end = gaps.pop( random.randint( 0, len( gaps ) - 1 ) )
-        except:
-            break
-        try:
-            start = random.randint( bound.start + gap_start, bound.start + gap_end - region_length - 1 )
-        except ValueError, ve:
-            stop_err( "Exception thrown generating random start value: %s" %str( ve ) )
-
-        end = start + region_length
-        try_plus_mask = plus_mask.copy()
-        try_minus_mask = minus_mask.copy()
-
-        if region_strand == "-":
-            try_minus_mask.set_range( start - bound.start, end - bound.start )
-        else:
-            try_plus_mask.set_range( start - bound.start, end - bound.start )
-
-        rand_region = bx.intervals.io.GenomicInterval( None, [bound.chrom, start, end, region_strand], 0, 1, 2, 3, "+", fix_strand=True )
-
-        if try_plus_mask.count_range() == plus_count + region_length or try_minus_mask.count_range() == minus_count + region_length:
-            if overlaps in ["strand", "all"]: #overlaps allowed across strands
-                exist_regions.append( rand_region )
-                if overlaps == "strand":
-                    return exist_regions, True, try_plus_mask, try_minus_mask
-                else: #overlaps allowed everywhere
-                    return exist_regions, True, plus_mask, minus_mask
-            else: #no overlapping anywhere
-                exist_regions.append( rand_region )
-                if region_strand == "-":
-                    return exist_regions, True, try_minus_mask.copy(), try_minus_mask
-                else:
-                    return exist_regions, True, try_plus_mask, try_plus_mask.copy()
-    return exist_regions, False, plus_mask, minus_mask
-
-def main():
-    includes_strand = False
-    region_uid = sys.argv[1]
-    mask_fname = sys.argv[2]
-    intervals_fname = sys.argv[3]
-    out_fname = sys.argv[4]
-    try:
-        mask_chr = int( sys.argv[5] ) - 1
-    except:
-        stop_err( "'%s' is an invalid chrom column for 'Intervals to Mask' dataset, click the pencil icon in the history item to edit column settings." % str( sys.argv[5] ) )
-    try:
-        mask_start = int( sys.argv[6] ) - 1
-    except:
-        stop_err( "'%s' is an invalid start column for 'Intervals to Mask' dataset, click the pencil icon in the history item to edit column settings." % str( sys.argv[6] ) )
-    try:
-        mask_end = int( sys.argv[7] ) - 1
-    except:
-        stop_err( "'%s' is an invalid end column for 'Intervals to Mask' dataset, click the pencil icon in the history item to edit column settings." % str( sys.argv[7] ) )
-    try:
-        interval_chr = int( sys.argv[8] ) - 1
-    except:
-        stop_err( "'%s' is an invalid chrom column for 'File to Mimick' dataset, click the pencil icon in the history item to edit column settings." % str( sys.argv[8] ) )
-    try:
-        interval_start = int( sys.argv[9] ) - 1
-    except:
-        stop_err( "'%s' is an invalid start column for 'File to Mimick' dataset, click the pencil icon in the history item to edit column settings." % str( sys.argv[9] ) )
-    try:
-        interval_end = int( sys.argv[10] ) - 1
-    except:
-        stop_err( "'%s' is an invalid end column for 'File to Mimick' dataset, click the pencil icon in the history item to edit column settings." % str( sys.argv[10] ) )
-    try:
-        interval_strand = int( sys.argv[11] ) - 1
-        includes_strand = True
-    except:
-        interval_strand = -1
-    if includes_strand:
-        use_mask = sys.argv[12]
-        overlaps = sys.argv[13]
-    else:
-        use_mask = sys.argv[11]
-        overlaps = sys.argv[12]
-    available_regions = {}
-    loc_file = "%s/regions.loc" % sys.argv[-1]
-
-    for i, line in enumerate( file( loc_file ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            fields = line.split( '\t' )
-            #read each line, if not enough fields, go to next line
-            try:
-                build = fields[0]
-                uid = fields[1]
-                description = fields[2]
-                filepath = fields[3]
-                available_regions[uid] = filepath
-            except:
-                continue
-
-    if region_uid not in available_regions:
-        stop_err( "Region '%s' is invalid." % region_uid )
-    region_fname = available_regions[region_uid].strip()
-
-    #set up bounding regions to hold random intervals
-    bounds = []
-    for bound in bx.intervals.io.NiceReaderWrapper( open( region_fname, 'r' ), chrom_col=0, start_col=1, end_col=2, fix_strand=True, return_header=False, return_comments=False ):
-        bounds.append( bound )
-    #set up length and number of regions to mimic
-    regions = [ [] for i in range( len( bounds ) ) ]
-
-    for region in bx.intervals.io.NiceReaderWrapper( open( intervals_fname, 'r' ), chrom_col=interval_chr, start_col=interval_start, end_col=interval_end, strand_col=interval_strand, fix_strand=True, return_header=False, return_comments=False ):
-        #loop through bounds, find first proper bounds then add
-        #if an interval crosses bounds, it will be added to the first bound
-        for i in range( len( bounds ) ):
-            if bounds[i].chrom != region.chrom:
-                continue
-            intersecter = bx.intervals.intersection.Intersecter()
-            intersecter.add_interval( bounds[i] )
-            if len( intersecter.find( region.start, region.end ) ) > 0:
-                regions[i].append( ( region.end - region.start, region.strand ) ) #add region to proper bound and go to next region
-                break
-    for region in regions:
-        region.sort()
-        region.reverse()
-
-    #read mask file
-    mask = []
-    if use_mask != "no_mask":
-        for region in bx.intervals.io.NiceReaderWrapper( open( mask_fname, 'r' ), chrom_col=mask_chr, start_col=mask_start, end_col=mask_end, fix_strand=True, return_header=False, return_comments=False ):
-            mask.append( region )
-
-    try:
-        out_file = open ( out_fname, "w" )
-    except:
-        stop_err( "Error opening output file '%s'." % out_fname )
-
-    i = 0
-    i_iters = 0
-    region_count = 0
-    best_regions = []
-    num_fail = 0
-    while i < len( bounds ):
-        i_iters += 1
-        #order regions to mimic
-        regions_to_mimic = regions[i][0:]
-        if len( regions_to_mimic ) < 1: #if no regions to mimic, skip
-            i += 1
-            i_iters = 0
-            continue
-        #set up region mask
-        plus_mask = Region( bounds[i].end - bounds[i].start )
-        for region in mask:
-            if region.chrom != bounds[i].chrom: continue
-            mask_start = region.start - bounds[i].start
-            mask_end = region.end - bounds[i].start
-            if mask_start >= 0 and mask_end > 0:
-                plus_mask.set_range( mask_start, mask_end )
-        minus_mask = plus_mask.copy()
-        random_regions = []
-        num_added = 0
-        for j in range( len( regions[i] ) ):
-            random_regions, added, plus_mask, minus_mask = add_random_region( regions_to_mimic[j], bounds[i], random_regions, plus_mask, minus_mask, overlaps )
-            if added:
-                num_added += 1
-        if num_added == len( regions_to_mimic ) or i_iters >= max_iters:
-            if len( best_regions ) > len( random_regions ):
-                random_regions = best_regions.copy()
-            num_fail += ( len( regions_to_mimic ) - len( random_regions ) )
-            i_iters = 0
-            best_regions = []
-            for region in random_regions:
-                print >>out_file, "%s\t%d\t%d\t%s\t%s\t%s" % ( region.chrom, region.start, region.end, "region_" + str( region_count ), "0", region.strand )
-                region_count += 1
-        else:
-            i -= 1
-            if len( best_regions ) < len( random_regions ):
-                best_regions = random_regions[:]
-        i+=1
-
-    out_file.close()
-    if num_fail:
-        print "After %i iterations, %i regions could not be added." % (max_iters, num_fail)
-        if use_mask == "use_mask":
-            print "The mask you have provided may be too restrictive."
-
-class Region( list ):
-    """
-    A list for on/off regions
-    """
-    def __init__( self, size=0 ):
-        for i in range( size ):
-            self.append( False )
-    def copy( self ):
-        return deepcopy( self )
-    def set_range( self, start=0, end=None ):
-        if start < 0:
-            start = 0
-        if ( not end and end != 0 ) or end > len( self ):
-            end = len( self )
-        for i in range( start, end ):
-            self[i]=True
-    def count_range( self, start=0, end=None ):
-        if start < 0:
-            start = 0
-        if ( not end and end != 0 ) or end > len( self ):
-            end = len( self )
-        return self[start:end].count( True )
-    def get_gaps( self, min_size = 0 ):
-        gaps = []
-        start = end = 0
-        while True:
-            try:
-                start = self[end:].index( False ) + end
-            except:
-                break
-            try:
-                end = self[start:].index( True ) + start
-            except:
-                end = len( self )
-            if end > start and end - start >= min_size:
-                gaps.append( ( end - start, start, end ) )
-        gaps.sort()
-        gaps.reverse()
-        return gaps
-
-if __name__ == "__main__": main()
--- a/tools/encode/split_by_partitions.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,125 +0,0 @@
-#!/usr/bin/env python
-#Original script from /home/james/work/encode/feature_partitions/split_by_partitions.py
-
-#Usage: python(2.4) split_by_partitions.py partition_index in_file out_file chrCol startCol endCol strandCol
-
-from __future__ import division
-
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.bitset import *
-from bx.bitset_builders import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    GALAXY_DATA_INDEX_DIR = sys.argv[1]
-    partition_index = '%s/encode_feature_partitions/partition_list.txt' % GALAXY_DATA_INDEX_DIR
-    partition_offset = "%s/encode_feature_partitions/" % GALAXY_DATA_INDEX_DIR
-
-    warnings = []
-
-    # Load up the partitions
-    partitions = list()
-    try:
-        for line in open( partition_index ):
-            name, score, filename = line.split()
-            partitions.append( ( name, score, binned_bitsets_from_file( open( partition_offset+filename ) ) ) )
-    except:
-        stop_err( "Error loading partitioning dataset." )
-
-    try:
-        in_file = open( sys.argv[2] )
-    except:
-        stop_err( "Bad input data." )
-
-    try:
-        out_file = open( sys.argv[3], "w" )
-    except:
-        stop_err( "Bad output file." )
-
-    try:
-        chrCol = int( sys.argv[4] ) - 1
-    except:
-        stop_err( "Bad chr column: %s" % ( str( sys.argv[4] ) ) )
-    try:
-        startCol = int( sys.argv[5] ) - 1
-    except:
-        stop_err( "Bad start column: %s" % ( str( sys.argv[5] ) ) )
-    try:
-        endCol = int( sys.argv[6] ) - 1
-    except:
-        stop_err( "Bad end column: %s" % ( str( sys.argv[6] ) ) )
-    try:
-        strandCol = int( sys.argv[7] )-1
-    except:
-        strandCol = -1
-
-    line_count = 0
-    skipped_lines = 0
-    first_invalid_line = None
-    invalid_line = ''
-    try:
-        for line in in_file:
-            line_count += 1
-            line = line.rstrip( '\r\n' )
-            if line and not line.startswith( '#' ):
-                fields = line.split( '\t' )
-                try:
-                    chr, start, end = fields[chrCol], int( fields[startCol] ), int( fields[endCol] )
-                except:
-                    skipped_lines += 1
-                    if first_invalid_line is None:
-                        first_invalid_line = line_count
-                        invalid_line = line
-                    continue
-                label = "input_line_" + str( line_count ) #if input file type was known to be bed, then could guess at label column
-
-                if strandCol < 0:
-                    strand = "+"
-                else:
-                    try:
-                        strand = fields[strandCol]
-                    except:
-                        strand = "+"
-
-                # Find which partition it overlaps
-                overlap = 0
-                for name, score, bb in partitions:
-                    # Is there at least 1bp overlap?
-                    if chr in bb:
-                        overlap = bb[chr].count_range( start, end-start )
-                        if overlap > 0:
-                            break
-                else:
-                    # No overlap with any partition? For now throw this since the
-                    # partitions tile the encode regions completely, indicate an interval
-                    # that does not even overlap an encode region
-                    warning = "warning: Interval (%s, %d, %d) does not overlap any partition" % ( chr, start, end ) + ", line[" + str( line_count ) + "]. "
-                    warnings.append( warning )
-                    name = "no_overlap"
-                    score = 0
-                # Annotate with the name of the partition
-                frac_overlap = overlap / ( end-start )
-                # BED6 plus?
-                print >>out_file, "%s\t%d\t%d\t%s\t%s\t%s\t%s\t%0.4f" % ( chr, start, end, label, score, strand, name, frac_overlap )
-    except:
-        out_file.close()
-        in_file.close()
-        stop_err( "Unknown error while processing line # %d: %s" % ( line_count, line ) )
-    out_file.close()
-    in_file.close()
-
-    if warnings:
-        warn_msg = "This tool is useful on ENCODE regions only, %d warnings, 1st is: " % len( warnings )
-        warn_msg += warnings[0]
-        print warn_msg
-    if skipped_lines:
-        print "Skipped %d invalid lines starting at line # %d: %s" % ( skipped_lines, first_invalid_line, invalid_line )
-
-if __name__ == "__main__": main()
--- a/tools/evolution/add_scores.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-<tool id="hgv_add_scores" name="phyloP" version="1.0.0">
-  <description>interspecies conservation scores</description>
-
-  <command>
-    add_scores $input1 ${input1.metadata.dbkey} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${GALAXY_DATA_INDEX_DIR}/add_scores.loc $out_file1
-  </command>
-
-  <inputs>
-    <param format="interval" name="input1" type="data" label="Dataset">
-      <validator type="unspecified_build"/>
-      <validator type="dataset_metadata_in_file" filename="add_scores.loc" metadata_name="dbkey" metadata_column="0" message="Data is currently not available for the specified build."/>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="input" name="out_file1" />
-  </outputs>
-
-  <requirements>
-    <requirement type="package">add_scores</requirement>
-  </requirements>
-
-  <tests>
-    <test>
-      <param name="input1" value="add_scores_input1.interval" ftype="interval" dbkey="hg18" />
-      <output name="output" file="add_scores_output1.interval" />
-    </test>
-    <test>
-      <param name="input1" value="add_scores_input2.bed" ftype="interval" dbkey="hg18" />
-      <output name="output" file="add_scores_output2.interval" />
-    </test>
-  </tests>
-
-  <help>
-.. class:: warningmark
-
-This currently works only for build hg18.
-
------
-
-**Dataset formats**
-
-The input can be any interval_ format dataset.  The output is also in interval format.
-(`Dataset missing?`_)
-
-.. _interval: ./static/formatHelp.html#interval
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool adds a column that measures interspecies conservation at each SNP
-position, using conservation scores for primates pre-computed by the
-phyloP program.  PhyloP performs an exact P-value computation under a
-continuous Markov substitution model.
-
-The chromosome and start position
-are used to look up the scores, so if a larger interval is in the input,
-only the score for the first nucleotide is returned.
-
------
-
-**Example**
-
-- input file, with SNPs::
-
-    chr22  16440426  14440427  C/T
-    chr22  15494851  14494852  A/G
-    chr22  14494911  14494912  A/T
-    chr22  14550435  14550436  A/G
-    chr22  14611956  14611957  G/T
-    chr22  14612076  14612077  A/G
-    chr22  14668537  14668538  C
-    chr22  14668703  14668704  A/T
-    chr22  14668775  14668776  G
-    chr22  14680074  14680075  A/T
-    etc.
-
-- output file, showing conservation scores for primates::
-
-    chr22  16440426  14440427  C/T  0.509
-    chr22  15494851  14494852  A/G  0.427
-    chr22  14494911  14494912  A/T  NA
-    chr22  14550435  14550436  A/G  NA
-    chr22  14611956  14611957  G/T  -2.142
-    chr22  14612076  14612077  A/G  0.369
-    chr22  14668537  14668538  C    0.419
-    chr22  14668703  14668704  A/T  -1.462
-    chr22  14668775  14668776  G    0.470
-    chr22  14680074  14680075  A/T  0.303
-    etc.
-
-  "NA" means that the phyloP score was not available.
-
------
-
-**Reference**
-
-Siepel A, Pollard KS, Haussler D. (2006)
-New methods for detecting lineage-specific selection.
-In Proceedings of the 10th International Conference on Research in Computational
-Molecular Biology (RECOMB 2006), pp. 190-205.
-
-  </help>
-</tool>
--- a/tools/evolution/codingSnps.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,528 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-#########################################################################
-#	codingSnps.pl
-#	This takes a bed file with the names being / separated nts
-#	and a gene bed file with cds start and stop.
-#	It then checks for changes in coding regions, reporting
-#	those that cause a frameshift or substitution in the amino acid.
-#########################################################################
-
-my $seqFlag = "2bit"; #flag to set sequence type 2bit|nib
-if (!@ARGV or scalar @ARGV < 3) {
-   print "Usage: codingSnps.pl snps.bed genes.bed (/dir/*$seqFlag|Galaxy build= loc=) [chr=# start=# end=# snp=# keepColumns=1] > codingSnps.txt\n";
-   exit;
-}
-my $uniq = 0; #flag for whether want uniq positions
-my $syn = 0;  #flag for if want synonomous changes rather than non-syn
-my $keep = 0; #keep old columns and append new ones
-my $snpFile = shift @ARGV;
-my $geneFile = shift @ARGV;
-my $nibDir = shift @ARGV;  #2bit or nib, depending on flag above
-if ($nibDir eq 'Galaxy') { getGalaxyInfo(); }
-my $col0 = 0; #bed like columns in default positions
-my $col1 = 1;
-my $col2 = 2;
-my $col3 = 3;
-#column positions 1 based coming in (for Galaxy)
-foreach (@ARGV) {
-   if (/chr=(\d+)/) { $col0 = $1 -1; }
-   elsif (/start=(\d+)/) { $col1 = $1 -1; }
-   elsif (/end=(\d+)/) { $col2 = $1 -1; }
-   elsif (/snp=(\d+)/) { $col3 = $1 -1; }
-   elsif (/keepColumns=1/) { $keep = 1; }
-}
-if ($col0 < 0 || $col1 < 0 || $col2 < 0 || $col3 < 0) {
-   print STDERR "ERROR column numbers are given with origin 1\n";
-   exit 1;
-}
-my @genes; #bed lines for genes, sorted by chrom and start
-my %chrSt; #index in array where each chrom starts
-my %codon; #hash of codon amino acid conversions
-my $ends = 0; #ends vs sizes in bed 11 position, starts relative to chrom
-my $ignoreN = 1; #skip N
-
-my %amb = (
-"R" => "A/G",
-"Y" => "C/T",
-"S" => "C/G",
-"W" => "A/T",
-"K" => "G/T",
-"M" => "A/C",
-"B" => "C/G/T",
-"D" => "A/G/T",
-"H" => "A/C/T",
-"V" => "A/C/G",
-"N" => "A/C/G/T"
-);
-fill_codon();
-open(FH, "cat $geneFile | sort -k1,1 -k2,2n |")
-   or die "Couldn't open and sort $geneFile, $!\n";
-my $i = 0;
-while(<FH>) {
-   chomp;
-   if (/refGene.cdsEnd|ccdsGene.exonEnds/) { $ends = 1; next; }
-   push(@genes, "$_");
-   my @f = split(/\t/);
-   if (!exists $chrSt{$f[0]}) { $chrSt{$f[0]} = $i; }
-   $i++;
-}
-close FH or die "Couldn't close $geneFile, $!\n";
-
-if ($ends) { print STDERR "WARNING using block ends rather than sizes\n"; }
-
-#open snps sorted as well
-my $s1 = $col0 + 1; #sort order is origin 1
-my $s2 = $col1 + 1;
-open(FH, "cat $snpFile | sort -k$s1,$s1 -k$s2,${s2}n |")
-   or die "Couldn't open and sort $snpFile, $!\n";
-$i = 0;
-my @g; #one genes fields, should be used repeatedly
-my %done;
-while(<FH>) {
-   chomp;
-   if (/^\s*#/) { next; } #comment
-   my @s = split(/\t/); #SNP fields
-   if (!@s or !$s[$col0]) { die "ERROR missing SNP data, $_\n"; }
-   my $size = $#s;
-   if ($col0 > $size || $col1 > $size || $col2 > $size || $col3 > $size) {
-      print STDERR "ERROR file has fewer columns than requested, requested columns (0 based) $col0 $col1 $col2 $col3, file has $size\n";
-      exit 1;
-   }
-   if ($s[$col1] =~ /\D/) {
-      print STDERR "ERROR the start point must be an integer not $s[$col1]\n";
-      exit 1;
-   }
-   if ($s[$col2] =~ /\D/) {
-      print STDERR "ERROR the start point must be an integer not $s[$col2]\n";
-      exit 1;
-   }
-   if ($s[$col3] eq 'N' && $ignoreN) { next; }
-   if (exists $amb{$s[$col3]}) { $s[$col3] = $amb{$s[$col3]}; }
-   if (!@g && exists $chrSt{$s[$col0]}) { #need to fetch first gene row
-      $i = $chrSt{$s[$col0]};
-      @g = split(/\t/, $genes[$i]);
-      if (scalar @g < 12) {
-         print STDERR "ERROR the gene file must be the whole genes in BED format\n";
-         exit 1;
-      }
-   }elsif (!@g) {
-      next; #no gene for this chrom
-   }elsif ($s[$col0] ne $g[0] && exists $chrSt{$s[$col0]}) { #new chrom
-      $i = $chrSt{$s[$col0]};
-      @g = split(/\t/, $genes[$i]);
-   }elsif ($s[$col0] ne $g[0]) {
-      next; #no gene for this chrom
-   }elsif ($s[$col1] < $g[1] && $i == $chrSt{$s[$col0]}) {
-      next; #before any genes
-   }elsif ($s[$col1] > $g[2] && ($i == $#genes or $genes[$i+1] !~ $s[$col0])) {
-      next; #after all genes on chr
-   }else {
-      while ($s[$col1] > $g[2] && $i < $#genes) {
-         $i++;
-         @g = split(/\t/, $genes[$i]);
-         if ($s[$col0] ne $g[0]) { last; } #end of gene
-      }
-      if ($s[$col0] ne $g[0] or $s[$col1] < $g[1] or $s[$col1] > $g[2]) {
-         next; #no overlap with genes
-      }
-   }
-
-   processSnp(\@s, \@g);
-   if ($uniq && exists $done{"$s[$col0] $s[$col1] $s[$col2]"}) { next; }
-
-   my $k = $i + 1; #check for more genes without losing data of first
-   if ($k <= $#genes) {
-      my @g2 = split(/\t/, $genes[$k]);
-      while (@g2 && $k <= $#genes) {
-         @g2 = split(/\t/, $genes[$k]);
-         if ($s[$col0] ne $g2[0]) {
-            undef @g2;
-            last; #not same chrom
-         }else {
-            while ($s[$col1] > $g2[2] && $k <= $#genes) {
-               $k++;
-               @g2 = split(/\t/, $genes[$k]);
-               if ($s[$col0] ne $g2[0]) { last; } #end of chrom
-            }
-            if ($s[$col0] ne $g2[0] or $s[$col1] < $g2[1] or $s[$col1] > $g2[2]) {
-               undef @g2;
-               last; #no overlap with more genes
-            }
-            processSnp(\@s, \@g2);
-            if ($uniq && exists $done{"$s[$col0] $s[$col1] $s[$col2]"}) { last; }
-         }
-         $k++;
-      }
-   }
-}
-close FH or die "Couldn't close $snpFile, $!\n";
-
-exit;
-
-########################################################################
-sub processSnp {
-   my $sref = shift;
-   my $gref = shift;
-   #overlaps gene, but maybe not coding seq
-   #inside cds
-   if ($sref->[$col1] + 1 < $gref->[6] or $sref->[$col2] > $gref->[7]) {
-      return; #outside of coding
-   }
-   #now check exon
-   my $i = 0;
-   my @st = split(/,/, $gref->[11]);
-   my @size = split(/,/, $gref->[10]);
-   if (scalar @st ne $gref->[9]) { return; } #cant do this gene #die "bad gene $gref->[3]\n"; }
-   my @pos;
-   my $in = 0;
-   for($i = 0; $i < $gref->[9]; $i++) {
-      my $sta = $gref->[1] + $st[$i] + 1; #1 based position
-      my $end = $sta + $size[$i] - 1; #
-      if ($ends) { $end = $size[$i]; $sta = $st[$i] + 1; } #ends instead of sizes
-      if ($end < $gref->[6]) { next; } #utr only
-      if ($sta > $gref->[7]) { next; } #utr only
-      #shorten to coding only
-      if ($sta < $gref->[6]) { $sta = $gref->[6] + 1; }
-      if ($end > $gref->[7]) { $end = $gref->[7]; }
-      if ($sref->[$col1] + 1 >= $sta && $sref->[$col2] <= $end) { $in = 1; }
-      elsif ($sref->[$col1] == $sref->[$col2] && $sref->[$col2] <= $end && $sref->[$col2] >= $sta) { $in = 1; }
-      push(@pos, ($sta .. $end)); #add exon worth of positions
-   }
-   #@pos has coding positions for whole gene (chr coors),
-   #and $in has whether we need to continue
-   if (!$in) { return; } #not in coding exon
-   if ((scalar @pos) % 3 != 0) { return; } #partial gene? not even codons
-   if ($sref->[$col3] =~ /^-+\/[ACTG]+$/ or $sref->[$col3] =~ /^[ACTG]+\/-+$/ or
-       $sref->[$col3] =~ /^-+$/) { #indel or del
-      my $copy = $sref->[$col3];
-      my $c = ($copy =~ tr/-//);
-      if ($c % 3 == 0) { return; } #not frameshift
-      #handle bed4 or any interval file
-      if (!$keep) {
-         print "$sref->[$col0]\t$sref->[$col1]\t$sref->[$col2]\t$sref->[$col3]";
-         print "\t$gref->[3]\tframeshift\n";
-      }else {
-         my @s = @{$sref};
-         print join("\t", @s), "\t$gref->[3]\tframeshift\n";
-      }
-      $done{"$sref->[$col0] $sref->[$col1] $sref->[$col2]"}++;
-      return;
-   }elsif ($sref->[$col1] == $sref->[$col2]) { #insertion
-      my $copy = $sref->[$col3];
-      my $c = ($copy =~ tr/\[ACTG]+//);
-      if ($c % 3 == 0) { return; } #not frameshift
-      #handle bed4 or any interval file
-      if (!$keep) {
-         print "$sref->[$col0]\t$sref->[$col1]\t$sref->[$col2]\t$sref->[$col3]";
-         print "\t$gref->[3]\tframeshift\n";
-      }else {
-         my @s = @{$sref};
-         print join("\t", @s), "\t$gref->[3]\tframeshift\n";
-      }
-      $done{"$sref->[$col0] $sref->[$col1] $sref->[$col2]"}++;
-      return;
-   }elsif ($sref->[$col3] =~ /-/) { #indel and sub?
-      return; #skip
-   }
-   #check for amino acid substitutions
-   my $s = $sref->[$col1] + 1;
-   my $e = $sref->[$col2];
-   my $len = $sref->[$col2] - $sref->[$col1];
-   if ($gref->[5] eq '-') {
-      @pos = reverse(@pos);
-      my $t = $s;
-      $s = $e;
-      $e = $t;
-   }
-   $i = 0;
-   my $found = 0;
-   foreach (@pos) {
-      if ($s == $_) {
-         $found = 1;
-         last;
-      }
-      $i++;
-   }
-   if ($found) {
-      my $fs = $i; #keep original start index
-      #have index where substitution starts
-      my $cp = $i % 3;
-      $i -= $cp; #i is now first position in codon
-      my $cdNum = int($i / 3) + 1;
-      my $ls = $i;
-      if (!defined $ls) { die "ERROR not defined ls for $fs $sref->[$col2]\n"; }
-      if (!@pos) { die "ERROR not defined array pos\n"; }
-      if (!defined $pos[$ls]) { die "ERROR not defined pos at $ls\n"; }
-      if (!defined $e) { die "ERROR not defined e for $pos[0] $pos[1] $pos[2]\n"; }
-      while ($ls <= $#pos && $pos[$ls] ne $e) {
-         $ls++;
-      }
-      my $i2 = $ls + (2 - ($ls % 3));
-      if ($i2 > $#pos) { return; } #not a full codon, partial gene?
-
-      if ($i2 - $i < 2) { die "not a full codon positions $i to $i2 for $sref->[3]\n"; }
-      my $oldnts = getnts($sref->[$col0], @pos[$i..$i2]);
-      if (!$oldnts) { die "Failed to get sequence for $sref->[$col0] $pos[$i] .. $pos[$i2]\n"; }
-      my @vars = split(/\//, $sref->[$col3]);
-      if ($gref->[5] eq '-') { #complement oldnts and revcomp vars
-         $oldnts = compl($oldnts);
-         if (!$oldnts) { return; } #skip this one
-         $oldnts = join('', (reverse(split(/ */, $oldnts))));
-         foreach (@vars) {
-            $_ = reverse(split(/ */)); #needed for indels
-            $_ = compl($_);
-         }
-      }
-      my $r = $fs - $i; #difference in old indexes gives new index
-      my @newnts;
-      my $changed = '';
-      foreach my $v (@vars) {
-         if (!$v or length($v) != 1) { return; } #only simple changes
-         my @new = split(/ */, $oldnts);
-         $changed = splice(@new, $r, $len, split(/ */, $v));
-         #should only change single nt
-         push(@newnts, join("", @new));
-      }
-      #now compute amino acids
-      my $oldaa = getaa($oldnts);
-      my @newaa;
-      my $change = 0; #flag for if there is a change
-      foreach my $v (@newnts) {
-         my $t = getaa($v);
-         if ($t ne $oldaa) { $change = 1; }
-         push(@newaa, $t);
-      }
-      if (!$change && $syn) {
-          if (!$keep) {
-             print "$sref->[$col0]\t$sref->[$col1]\t$sref->[$col2]\t$sref->[$col3]";
-             print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\n";
-          }else {
-             my @s = @{$sref};
-             print join("\t", @s),
-                   "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\n";
-          }
-          return;
-      }elsif ($syn) { return; } #only want synonymous changes
-      if (!$change) { return; } #no change in amino acids
-      if (!$keep) {
-         print "$sref->[$col0]\t$sref->[$col1]\t$sref->[$col2]\t$sref->[$col3]";
-         if ($gref->[5] eq '-') { $changed = compl($changed); } #use plus for ref
-         if (!$changed) { return; } #skip this one
-         print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\n";
-      }else {
-         my @s = @{$sref};
-         print join("\t", @s);
-         if ($gref->[5] eq '-') { $changed = compl($changed); } #use plus for ref
-         if (!$changed) { return; } #skip this one
-         print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\n";
-      }
-      $done{"$sref->[$col0] $sref->[$col1] $sref->[$col2]"}++;
-   }
-}
-
-sub getnts {
-   my $chr = shift;
-   my @pos = @_; #list of positions not necessarily in order
-   #list may be reversed or have gaps(introns), at least 3 bps
-   my $seq = '';
-   if (scalar @pos < 3) { die "too small region for $chr $pos[0]\n"; }
-   if ($pos[0] < $pos[1]) { #not reversed
-      my $s = $pos[0];
-      for(my $i = 1; $i <= $#pos; $i++) {
-         if ($pos[$i] == $pos[$i-1] + 1) { next; }
-         if ($seqFlag eq '2bit') {
-            $seq .= fetchSeq2bit($chr, $s, $pos[$i-1]);
-         }else {
-            $seq .= fetchSeqNib($chr, $s, $pos[$i-1]);
-         }
-         $s = $pos[$i];
-      }
-      if (length $seq != scalar @pos) { #still need to fetch seq
-         if ($seqFlag eq '2bit') {
-            $seq .= fetchSeq2bit($chr, $s, $pos[$#pos]);
-         }else {
-            $seq .= fetchSeqNib($chr, $s, $pos[$#pos]);
-         }
-      }
-   }else { #reversed
-      my $s = $pos[$#pos];
-      for(my $i = $#pos -1; $i >= 0; $i--) {
-         if ($pos[$i] == $pos[$i+1] + 1) { next; }
-         if ($seqFlag eq '2bit') {
-            $seq .= fetchSeq2bit($chr, $s, $pos[$i+1]);
-         }else {
-            $seq .= fetchSeqNib($chr, $s, $pos[$i+1]);
-         }
-         $s = $pos[$i];
-      }
-      if (length $seq != scalar @pos) { #still need to fetch seq
-         if ($seqFlag eq '2bit') {
-            $seq .= fetchSeq2bit($chr, $s, $pos[0]);
-         }else {
-            $seq .= fetchSeqNib($chr, $s, $pos[0]);
-         }
-      }
-   }
-}
-
-sub fetchSeq2bit {
-   my $chr = shift;
-   my $st = shift;
-   my $end = shift;
-   my $strand = '+';
-   $st--; #change to UCSC numbering
-   open (BIT, "twoBitToFa -seq=$chr -start=$st -end=$end $nibDir stdout |") or
-      die "Couldn't run twoBitToFa, $!\n";
-   my $seq = '';
-   while (<BIT>) {
-      chomp;
-      if (/^>/) { next; } #header
-      $seq .= uc($_);
-   }
-   close BIT or die "Couldn't finish twoBitToFa on $chr $st $end, $!\n";
-   return $seq;
-}
-
-sub fetchSeqNib {
-   my $chr = shift;
-   my $st = shift;
-   my $end = shift;
-   my $strand = '+';
-   $st--; #change to UCSC numbering
-   open (NIB, "nibFrag -upper $nibDir/${chr}.nib $st $end $strand stdout |") or die "Couldn't run nibFrag, $!\n";
-   my $seq = '';
-   while (<NIB>) {
-      chomp;
-      if (/^>/) { next; } #header
-      $seq .= $_;
-   }
-   close NIB or die "Couldn't finish nibFrag on $chr $st $end, $!\n";
-   return $seq;
-}
-
-sub compl {
-   my $nts = shift;
-   my $comp = '';
-   if (!$nts) { die "ERROR called compl with nts undefined"; }
-   foreach my $n (split(/ */, $nts)) {
-      if ($n eq 'A') { $comp .= 'T'; }
-      elsif ($n eq 'T') { $comp .= 'A'; }
-      elsif ($n eq 'C') { $comp .= 'G'; }
-      elsif ($n eq 'G') { $comp .= 'C'; }
-      elsif ($n eq 'N') { $comp .= 'N'; }
-      elsif ($n eq '-') { $comp .= '-'; } #deletion
-      else { $comp = undef; }
-   }
-   return $comp;
-}
-
-sub getaa {
-   my $nts = shift;  #in multiples of 3
-   my $aa = '';
-   my @n = split(/ */, $nts);
-   while (@n) {
-      my @t = splice(@n, 0, 3);
-      my $n = uc(join("", @t));
-      if (!exists $codon{$n}) { $aa .= 'N'; next; }
-      $aa .= $codon{$n};
-   }
-   return $aa;
-}
-
-sub fill_codon {
-$codon{GCA} = 'Ala';
-$codon{GCC} = 'Ala';
-$codon{GCG} = 'Ala';
-$codon{GCT} = 'Ala';
-$codon{CGG} = 'Arg';
-$codon{CGT} = 'Arg';
-$codon{CGC} = 'Arg';
-$codon{AGA} = 'Arg';
-$codon{AGG} = 'Arg';
-$codon{CGA} = 'Arg';
-$codon{AAC} = 'Asn';
-$codon{AAT} = 'Asn';
-$codon{GAC} = 'Asp';
-$codon{GAT} = 'Asp';
-$codon{TGC} = 'Cys';
-$codon{TGT} = 'Cys';
-$codon{CAG} = 'Gln';
-$codon{CAA} = 'Gln';
-$codon{GAA} = 'Glu';
-$codon{GAG} = 'Glu';
-$codon{GGG} = 'Gly';
-$codon{GGA} = 'Gly';
-$codon{GGC} = 'Gly';
-$codon{GGT} = 'Gly';
-$codon{CAC} = 'His';
-$codon{CAT} = 'His';
-$codon{ATA} = 'Ile';
-$codon{ATT} = 'Ile';
-$codon{ATC} = 'Ile';
-$codon{CTA} = 'Leu';
-$codon{CTC} = 'Leu';
-$codon{CTG} = 'Leu';
-$codon{CTT} = 'Leu';
-$codon{TTG} = 'Leu';
-$codon{TTA} = 'Leu';
-$codon{AAA} = 'Lys';
-$codon{AAG} = 'Lys';
-$codon{ATG} = 'Met';
-$codon{TTC} = 'Phe';
-$codon{TTT} = 'Phe';
-$codon{CCT} = 'Pro';
-$codon{CCA} = 'Pro';
-$codon{CCC} = 'Pro';
-$codon{CCG} = 'Pro';
-$codon{TCA} = 'Ser';
-$codon{AGC} = 'Ser';
-$codon{AGT} = 'Ser';
-$codon{TCC} = 'Ser';
-$codon{TCT} = 'Ser';
-$codon{TCG} = 'Ser';
-$codon{TGA} = 'Stop';
-$codon{TAG} = 'Stop';
-$codon{TAA} = 'Stop';
-$codon{ACT} = 'Thr';
-$codon{ACA} = 'Thr';
-$codon{ACC} = 'Thr';
-$codon{ACG} = 'Thr';
-$codon{TGG} = 'Trp';
-$codon{TAT} = 'Tyr';
-$codon{TAC} = 'Tyr';
-$codon{GTC} = 'Val';
-$codon{GTA} = 'Val';
-$codon{GTG} = 'Val';
-$codon{GTT} = 'Val';
-}
-
-sub getGalaxyInfo {
-   my $build;
-   my $locFile;
-   foreach (@ARGV) {
-      if (/build=(.*)/) { $build = $1; }
-      elsif (/loc=(.*)/) { $locFile = $1; }
-   }
-   if (!$build or !$locFile) {
-      print STDERR "ERROR missing build or locfile for Galaxy input\n";
-      exit 1;
-   }
-   # read $locFile to get $nibDir (ignoring commets)
-   open(LF, "< $locFile") || die "open($locFile): $!\n";
-   while(<LF>) {
-      s/#.*$//;
-      s/(?:^\s+|\s+$)//g;
-      next if (/^$/);
-
-      my @t = split(/\t/);
-      if ($t[0] eq $build) { $nibDir = $t[1]; }
-   }
-   close(LF);
-   if ($nibDir eq 'Galaxy') {
-      print STDERR "Failed to find sequence directory in locfile $locFile\n";
-   }
-   $nibDir .= "/$build.2bit";  #we want full path and filename
-}
-
--- a/tools/evolution/codingSnps.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-<tool id="hgv_codingSnps" name="aaChanges" version="1.0.0">
-  <description>amino-acid changes caused by a set of SNPs</description>
-
-  <command interpreter="perl">
-    codingSnps.pl $input1 $input2 Galaxy build=${input1.metadata.dbkey} loc=${GALAXY_DATA_INDEX_DIR}/codingSnps.loc chr=${input1.metadata.chromCol} start=${input1.metadata.startCol} end=${input1.metadata.endCol} snp=$col1 > $out_file1
-  </command>
-
-  <inputs>
-    <param format="interval" name="input1" type="data" label="SNP dataset">
-      <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
-    </param>
-    <param name="col1" type="data_column" data_ref="input1" label="Column with SNPs" />
-    <param format="interval" name="input2" type="data" label="Gene dataset">
-      <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-
-  <code file="codingSnps_filter.py"></code>
-
-  <requirements>
-    <requirement type="binary">cat</requirement>
-    <requirement type="binary">sort</requirement>
-    <requirement type="package">ucsc_tools</requirement>
-  </requirements>
-
-  <tests>
-    <test>
-      <param name="input1" ftype="interval" value="codingSnps_input1.interval" dbkey="hg18" />
-      <param name="col1" value="6" />
-      <param name="input2" ftype="interval" value="codingSnps_inputGenes1.bed" dbkey="hg18" />
-      <output name="output" file="codingSnps_output1.interval" />
-    </test>
-    <test>
-      <param name="input1" ftype="interval" value="codingSnps_input2.interval" dbkey="hg18" />
-      <param name="input2" ftype="interval" value="codingSnps_inputGenes2.bed" dbkey="hg18" />
-      <param name="col1" value="4" />
-      <output name="output" file="codingSnps_output2.interval" />
-    </test>
-  </tests>
-
-  <help>
-.. class:: infomark
-
-The build must be defined for the input files and must be the same for both files.
-Use the pencil icon to add the build to the files if necessary.
-
------
-
-**Dataset formats**
-
-The SNP dataset is in interval_ format, with a column of SNPs as described below.
-The gene dataset is in BED_ format with 12 columns.  The output dataset is also interval.
-(`Dataset missing?`_)
-
-.. _interval: ./static/formatHelp.html#interval
-.. _BED: ./static/formatHelp.html#bed
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool identifies which SNPs create amino-acid changes in the specified
-coding regions.  The first input file contains the SNPs and must be an interval file.
-It needs the chromosome, start, and end position as well as the SNP.  The
-SNP can be given using ambiguous-nucleotide symbols or a list of two to four
-alleles
-separated by '/'.  Any other columns in the first input file will not be
-used but will be kept for the output.  The second input file contains the genes
-to be used for defining the coding regions.  This file must be a BED file with
-the first 12 columns standard BED columns.  The output is the same as the
-first input file with
-several columns added: the name field from the line of the gene input file
-used, the amino acids, the codon number, and the reference nucleotide that
-changed in the amino acid.
-The amino acids are listed with the reference amino acid first, then a colon,
-and then the amino acids for the alleles.  If a SNP is not in a coding region
-or is synonymous then it is not included in the output file.
-
------
-
-**Example**
-
-- first input file, with SNPs::
-
-    chr22  15660821  15660822  A/G
-    chr22  15825725  15825726  G/T
-    chr22  15827035  15827036  G
-    chr22  15827135  15827136  C/G
-    chr22  15830928  15830929  A/G
-    chr22  15830951  15830952  G
-    chr22  15830955  15830956  C/T
-    chr22  15848885  15848886  C/T
-    chr22  15849048  15849049  A/C
-    chr22  15919711  15919712  A/G
-    etc.
-
-  or, indicating polymorphisms using ambiguous-nucleotide symbols::
-
-    chr22  15660821  15660822  R
-    chr22  15825725  15825726  K
-    chr22  15827035  15827036  G
-    chr22  15827135  15827136  S
-    chr22  15830928  15830929  R
-    chr22  15830951  15830952  G
-    chr22  15830955  15830956  Y
-    chr22  15848885  15848886  Y
-    chr22  15849048  15849049  M
-    chr22  15919711  15919712  R
-    etc.
-
-- second input file, with UCSC annotations for human genes::
-
-    chr22  15688363  15690225  uc010gqr.1  0  +  15688363  15688363  0  2   587,794,  0,1068,
-    chr22  15822826  15869112  uc002zlw.1  0  -  15823622  15869004  0  10  940,105,97,91,265,86,251,208,304,282,  0,1788,2829,3241,4163,6361,8006,26023,29936,46004,
-    chr22  15826991  15869112  uc010gqs.1  0  -  15829218  15869004  0  5   1380,86,157,304,282,  0,2196,21858,25771,41839,
-    chr22  15897459  15919682  uc002zlx.1  0  +  15897459  15897459  0  4   775,128,103,1720,  0,8303,10754,20503,
-    chr22  15945848  15971389  uc002zly.1  0  +  15945981  15970710  0  13  271,25,147,113,127,48,164,84,85,12,102,42,2193,  0,12103,12838,13816,15396,17037,17180,18535,19767,20632,20894,22768,23348,
-    etc.
-
-- output file, showing non-synonymous substitutions in coding regions::
-
-    chr22  15825725  15825726  G/T  uc002zlw.1  Gln:Pro/Gln   469  T
-    chr22  15827035  15827036  G    uc002zlw.1  Glu:Asp       414  C
-    chr22  15827135  15827136  C/G  uc002zlw.1  Gly:Gly/Ala   381  C
-    chr22  15830928  15830929  A/G  uc002zlw.1  Ala:Ser/Pro   281  C
-    chr22  15830951  15830952  G    uc002zlw.1  Leu:Pro       273  A
-    chr22  15830955  15830956  C/T  uc002zlw.1  Ser:Gly/Ser   272  T
-    chr22  15848885  15848886  C/T  uc002zlw.1  Ser:Trp/Stop  217  G
-    chr22  15848885  15848886  C/T  uc010gqs.1  Ser:Trp/Stop  200  G
-    chr22  15849048  15849049  A/C  uc002zlw.1  Gly:Stop/Gly  163  C
-    etc.
-
-  </help>
-</tool>
--- a/tools/evolution/codingSnps_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-
-# runs after the job (and after the default post-filter)
-import os
-from galaxy import eggs
-from galaxy import jobs
-from galaxy.tools.parameters import DataToolParameter
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-def validate_input( trans, error_map, param_values, page_param_map ):
-    dbkeys = set()
-    data_param_names = set()
-    data_params = 0
-    for name, param in page_param_map.iteritems():
-        if isinstance( param, DataToolParameter ):
-            # for each dataset parameter
-            if param_values.get(name, None) != None:
-                dbkeys.add( param_values[name].dbkey )
-                data_params += 1
-                # check meta data
-                try:
-                    param = param_values[name]
-                    startCol = int( param.metadata.startCol )
-                    endCol = int( param.metadata.endCol )
-                    chromCol = int( param.metadata.chromCol )
-                    if param.metadata.strandCol is not None:
-                        strandCol = int ( param.metadata.strandCol )
-                    else:
-                        strandCol = 0
-                except:
-                    error_msg = "The attributes of this dataset are not properly set. " + \
-                    "Click the pencil icon in the history item to set the chrom, start, end and strand columns."
-                    error_map[name] = error_msg
-            data_param_names.add( name )
-    if len( dbkeys ) > 1:
-        for name in data_param_names:
-            error_map[name] = "All datasets must belong to same genomic build, " \
-                "this dataset is linked to build '%s'" % param_values[name].dbkey
-    if data_params != len(data_param_names):
-        for name in data_param_names:
-            error_map[name] = "A dataset of the appropriate type is required"
--- a/tools/evolution/mutate_snp_codon.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-"""
-Script to mutate SNP codons.
-Dan Blankenberg
-"""
-
-import sys, string
-
-def strandify( fields, column ):
-    strand = '+'
-    if column >= 0 and column < len( fields ):
-        strand = fields[ column ]
-        if strand not in [ '+', '-' ]:
-            strand = '+'
-    return strand
-
-def main():
-    # parse command line
-    input_file = sys.argv[1]
-    out = open( sys.argv[2], 'wb+' )
-    codon_chrom_col = int( sys.argv[3] ) - 1
-    codon_start_col = int( sys.argv[4] ) - 1
-    codon_end_col = int( sys.argv[5] ) - 1
-    codon_strand_col = int( sys.argv[6] ) - 1
-    codon_seq_col = int( sys.argv[7] ) - 1
-
-    snp_chrom_col = int( sys.argv[8] ) - 1
-    snp_start_col = int( sys.argv[9] ) - 1
-    snp_end_col = int( sys.argv[10] ) - 1
-    snp_strand_col = int( sys.argv[11] ) - 1
-    snp_observed_col = int( sys.argv[12] ) - 1
-
-    max_field_index = max( codon_chrom_col, codon_start_col, codon_end_col, codon_strand_col, codon_seq_col, snp_chrom_col, snp_start_col, snp_end_col, snp_strand_col, snp_observed_col )
-
-    DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
-    skipped_lines = 0
-    errors = {}
-    for name, message in [ ('max_field_index','not enough fields'), ( 'codon_len', 'codon length must be 3' ), ( 'codon_seq', 'codon sequence must have length 3' ), ( 'snp_len', 'SNP length must be 3' ), ( 'snp_observed', 'SNP observed values must have length 3' ), ( 'empty_comment', 'empty or comment'), ( 'no_overlap', 'codon and SNP do not overlap' ) ]:
-        errors[ name ] = { 'count':0, 'message':message }
-    line_count = 0
-    for line_count, line in enumerate( open( input_file ) ):
-        line = line.rstrip( '\n\r' )
-        if line and not line.startswith( '#' ):
-            fields = line.split( '\t' )
-            if max_field_index >= len( fields ):
-                skipped_lines += 1
-                errors[ 'max_field_index' ]['count'] += 1
-                continue
-
-            #read codon info
-            codon_chrom = fields[codon_chrom_col]
-            codon_start = int( fields[codon_start_col] )
-            codon_end = int( fields[codon_end_col] )
-            if codon_end - codon_start != 3:
-                #codons must be length 3
-                skipped_lines += 1
-                errors[ 'codon_len' ]['count'] += 1
-                continue
-            codon_strand = strandify( fields, codon_strand_col )
-            codon_seq = fields[codon_seq_col].upper()
-            if len( codon_seq ) != 3:
-                #codon sequence must have length 3
-                skipped_lines += 1
-                errors[ 'codon_seq' ]['count'] += 1
-                continue
-
-            #read snp info
-            snp_chrom = fields[snp_chrom_col]
-            snp_start = int( fields[snp_start_col] )
-            snp_end = int( fields[snp_end_col] )
-            if snp_end - snp_start != 1:
-                #snps must be length 1
-                skipped_lines += 1
-                errors[ 'snp_len' ]['count'] += 1
-                continue
-            snp_strand = strandify( fields, snp_strand_col )
-            snp_observed = fields[snp_observed_col].split( '/' )
-            snp_observed = [ observed for observed in snp_observed if len( observed ) == 1 ]
-            if not snp_observed:
-                #sequence replacements must be length 1
-                skipped_lines += 1
-                errors[ 'snp_observed' ]['count'] += 1
-                continue
-
-            #Determine index of replacement for observed values into codon
-            offset = snp_start - codon_start
-            #Extract DNA on neg strand codons will have positions reversed relative to interval positions; i.e. position 0 == position 2
-            if codon_strand == '-':
-                offset = 2 - offset
-            if offset < 0 or offset > 2: #assert offset >= 0 and offset <= 2, ValueError( 'Impossible offset determined: %s' % offset )
-                #codon and snp do not overlap
-                skipped_lines += 1
-                errors[ 'no_overlap' ]['count'] += 1
-                continue
-
-            for observed in snp_observed:
-                if codon_strand != snp_strand:
-                    #if our SNP is on a different strand than our codon, take complement of provided observed SNP base
-                    observed = observed.translate( DNA_COMP )
-                snp_codon = [ char for char in codon_seq ]
-                snp_codon[offset] = observed.upper()
-                snp_codon = ''.join( snp_codon )
-
-                if codon_seq != snp_codon: #only output when we actually have a different codon
-                    out.write( "%s\t%s\n" % ( line, snp_codon )  )
-        else:
-            skipped_lines += 1
-            errors[ 'empty_comment' ]['count'] += 1
-    if skipped_lines:
-        print "Skipped %i (%4.2f%%) of %i lines; reasons: %s" % ( skipped_lines, ( float( skipped_lines )/float( line_count ) ) * 100, line_count, ', '.join( [ "%s (%i)" % ( error['message'], error['count'] ) for error in errors.itervalues() if error['count'] ] ) )
-
-if __name__ == "__main__": main()
--- a/tools/evolution/mutate_snp_codon.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="mutate_snp_codon_1" name="Mutate Codons" version="1.0.0">
-  <description>with SNPs</description>
-  <command interpreter="python">mutate_snp_codon.py $input1 $output1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol} $codon_seq_col $snp_chrom_col $snp_start_col $snp_end_col $snp_strand_col $snp_observed_col</command>
-  <inputs>
-    <param name="input1" type="data" format="interval" label="Interval file with joined SNPs" optional="False" help="The interval metadata for this file should be set for the codon positions."/>
-    <param name="codon_seq_col" label="Codon Sequence column" type="data_column" data_ref="input1" />
-    <param name="snp_chrom_col" label="SNP chromosome column" type="data_column" data_ref="input1" />
-    <param name="snp_start_col" label="SNP start column" type="data_column" data_ref="input1" />
-    <param name="snp_end_col" label="SNP end column" type="data_column" data_ref="input1" />
-    <param name="snp_strand_col" label="SNP strand column" type="data_column" data_ref="input1" />
-    <param name="snp_observed_col" label="SNP observed column" type="data_column" data_ref="input1" />
-  </inputs>
-  <outputs>
-    <data name="output1" format="interval" metadata_source="input1"/>
-  </outputs>
-   <tests>
-     <test>
-       <param name="input1" value="mutate_snp_codon_in.interval"/>
-       <param name="codon_seq_col" value="8"/>
-       <param name="snp_chrom_col" value="17"/>
-       <param name="snp_start_col" value="18"/>
-       <param name="snp_end_col" value="19"/>
-       <param name="snp_strand_col" value="22"/>
-       <param name="snp_observed_col" value="25"/>
-       <output name="output1" file="mutate_snp_codon_out.interval" />
-     </test>
-   </tests>
-  <help>
-This tool takes an interval file as input.  This input should contain a set of codon locations and corresponding DNA sequence (such as from the *Extract Genomic DNA* tool) joined to SNP locations with observed values (such as *all fields from selected table* from the snp130 table of hg18 at the UCSC Table browser).  This interval file should have the metadata (chromosome, start, end, strand) set for the columns containing the locations of the codons. The user needs to specify the columns containing the sequence for the codon as well as the genomic positions and observed values (values should be split by '/') for the SNP data as tool input; SNPs positions and sequence substitutes must have a length of exactly 1. Only genomic intervals which yield a different sequence string are output. All sequence characters are converted to uppercase during processing.
-
-  For example, using these settings:
-
-  * **metadata** **chromosome**, **start**, **end** and **strand** set to **1**, **2**, **3** and **6**, respectively
-  * **Codon Sequence column** set to **c8**
-  * **SNP chromosome column** set to **c17**
-  * **SNP start column** set to **c18**
-  * **SNP end column** set to **c19**
-  * **SNP strand column** set to **c22**
-  * **SNP observed column** set to **c25**
-
-  with the following input::
-
-    chr1	58995	58998	NM_001005484	0	+	GAA	GAA	Glu	GAA	1177632	28.96	0	2787607	0.422452662804	585	chr1	58996	58997	rs1638318	0	+	A	A	A/G	genomic	single	by-submitter	0	0	unknown	exact	3
-    chr1	59289	59292	NM_001005484	0	+	TTT	TTT	Phe	TTT	714298	17.57	0	1538990	0.464134269878	585	chr1	59290	59291	rs71245814	0	+	T	T	G/T	genomic	single	unknown	0	0	unknown	exact	3
-    chr1	59313	59316	NM_001005484	0	+	AAG	AAG	Lys	AAG	1295568	31.86	0	2289189	0.565950648898	585	chr1	59315	59316	rs2854682	0	-	G	G	C/T	genomic	single	by-submitter	0	0	unknown	exact	3
-    chr1	59373	59376	NM_001005484	0	+	ACA	ACA	Thr	ACA	614523	15.11	0	2162384	0.284187729839	585	chr1	59373	59374	rs2691305	0	-	A	A	C/T	genomic	single	unknown	0	0	unknown	exact	3
-    chr1	59412	59415	NM_001005484	0	+	GCG	GCG	Ala	GCG	299495	7.37	0	2820741	0.106176001271	585	chr1	59414	59415	rs2531266	0	+	G	G	C/G	genomic	single	by-submitter	0	0	unknown	exact	3
-    chr1	59412	59415	NM_001005484	0	+	GCG	GCG	Ala	GCG	299495	7.37	0	2820741	0.106176001271	585	chr1	59414	59415	rs55874132	0	+	G	G	C/G	genomic	single	unknown	0	0	coding-synon	exact	1
-
-
-  will produce::
-
-    chr1	58995	58998	NM_001005484	0	+	GAA	GAA	Glu	GAA	1177632	28.96	0	2787607	0.422452662804	585	chr1	58996	58997	rs1638318	0	+	A	A	A/G	genomic	single	by-submitter	0	0	unknown	exact	3	GGA
-    chr1	59289	59292	NM_001005484	0	+	TTT	TTT	Phe	TTT	714298	17.57	0	1538990	0.464134269878	585	chr1	59290	59291	rs71245814	0	+	T	T	G/T	genomic	single	unknown	0	0	unknown	exact	3	TGT
-    chr1	59313	59316	NM_001005484	0	+	AAG	AAG	Lys	AAG	1295568	31.86	0	2289189	0.565950648898	585	chr1	59315	59316	rs2854682	0	-	G	G	C/T	genomic	single	by-submitter	0	0	unknown	exact	3	AAA
-    chr1	59373	59376	NM_001005484	0	+	ACA	ACA	Thr	ACA	614523	15.11	0	2162384	0.284187729839	585	chr1	59373	59374	rs2691305	0	-	A	A	C/T	genomic	single	unknown	0	0	unknown	exact	3	GCA
-    chr1	59412	59415	NM_001005484	0	+	GCG	GCG	Ala	GCG	299495	7.37	0	2820741	0.106176001271	585	chr1	59414	59415	rs2531266	0	+	G	G	C/G	genomic	single	by-submitter	0	0	unknown	exact	3	GCC
-    chr1	59412	59415	NM_001005484	0	+	GCG	GCG	Ala	GCG	299495	7.37	0	2820741	0.106176001271	585	chr1	59414	59415	rs55874132	0	+	G	G	C/G	genomic	single	unknown	0	0	coding-synon	exact	1	GCC
-  </help>
-</tool>
--- a/tools/extract/extract_genomic_dna.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,283 +0,0 @@
-#!/usr/bin/env python
-"""
-usage: %prog $input $out_file1
-    -1, --cols=N,N,N,N: Columns for start, end, strand in input file
-    -d, --dbkey=N: Genome build of input file
-    -o, --output_format=N: the data type of the output file
-    -g, --GALAXY_DATA_INDEX_DIR=N: the directory containing alignseq.loc
-    -I, --interpret_features: if true, complete features are interpreted when input is GFF
-    -F, --fasta=<genomic_sequences>: genomic sequences to use for extraction
-    -G, --gff: input and output file, when it is interval, coordinates are treated as GFF format (1-based, half-open) rather than 'traditional' 0-based, closed format.
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, string, os, re, tempfile, subprocess
-from bx.cookbook import doc_optparse
-from bx.intervals.io import Header, Comment
-import bx.seq.nib
-import bx.seq.twobit
-from galaxy.tools.util.galaxyops import *
-from galaxy.datatypes.util import gff_util
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def reverse_complement( s ):
-    complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" }
-    reversed_s = []
-    for i in s:
-        reversed_s.append( complement_dna[i] )
-    reversed_s.reverse()
-    return "".join( reversed_s )
-
-def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
-    seq_file = "%s/alignseq.loc" % GALAXY_DATA_INDEX_DIR
-    seq_path = ''
-    for line in open( seq_file ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( "#" ) and line.startswith( 'seq' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seq_path = fields[2].strip()
-                break
-    return seq_path
-
-def __main__():
-    #
-    # Parse options, args.
-    #
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chrom_col, start_col, end_col, strand_col = parse_cols_arg( options.cols )
-        dbkey = options.dbkey
-        output_format = options.output_format
-        gff_format = options.gff
-        interpret_features = options.interpret_features
-        GALAXY_DATA_INDEX_DIR = options.GALAXY_DATA_INDEX_DIR
-        fasta_file = options.fasta
-        input_filename, output_filename = args
-    except:
-        doc_optparse.exception()
-
-    includes_strand_col = strand_col >= 0
-    strand = None
-    nibs = {}
-    twobits = {}
-
-    #
-    # Set path to sequence data.
-    #
-    if fasta_file:
-        # Need to create 2bit file from fasta file.
-        try:
-            seq_path = tempfile.NamedTemporaryFile( dir="." ).name
-            cmd = "faToTwoBit %s %s" % ( fasta_file, seq_path )
-
-            tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
-            tmp_stderr = open( tmp_name, 'wb' )
-            proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-
-            # Get stderr, allowing for case where it's very large.
-            tmp_stderr = open( tmp_name, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-
-            # Error checking.
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            stop_err( 'Error running faToTwoBit. ' + str( e ) )
-    else:
-        seq_path = check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR )
-        if not os.path.exists( seq_path ):
-            # If this occurs, we need to fix the metadata validator.
-            stop_err( "No sequences are available for '%s', request them by reporting this error." % dbkey )
-
-    #
-    # Fetch sequences.
-    #
-
-    # Get feature's line(s).
-    def get_lines( feature ):
-        if isinstance( feature, gff_util.GFFFeature ):
-            return feature.lines()
-        else:
-            return [ feature.rstrip( '\r\n' ) ]
-
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_lines = []
-    fout = open( output_filename, "w" )
-    warnings = []
-    warning = ''
-    twobitfile = None
-    file_iterator = open( input_filename )
-    if gff_format and interpret_features:
-        file_iterator = gff_util.GFFReaderWrapper( file_iterator, fix_strand=False )
-    line_count = 1
-    for feature in file_iterator:
-        # Ignore comments, headers.
-        if isinstance( feature, ( Header, Comment ) ):
-            line_count += 1
-            continue
-
-        if gff_format and interpret_features:
-            # Processing features.
-            gff_util.convert_gff_coords_to_bed( feature )
-            chrom = feature.chrom
-            start = feature.start
-            end = feature.end
-            strand = feature.strand
-        else:
-            # Processing lines, either interval or GFF format.
-            line = feature.rstrip( '\r\n' )
-            if line and not line.startswith( "#" ):
-                fields = line.split( '\t' )
-                try:
-                    chrom = fields[chrom_col]
-                    start = int( fields[start_col] )
-                    end = int( fields[end_col] )
-                    if gff_format:
-                        start, end = gff_util.convert_gff_coords_to_bed( [start, end] )
-                    if includes_strand_col:
-                        strand = fields[strand_col]
-                except:
-                    warning = "Invalid chrom, start or end column values. "
-                    warnings.append( warning )
-                    if not invalid_lines:
-                        invalid_lines = get_lines( feature )
-                        first_invalid_line = line_count
-                    skipped_lines += len( invalid_lines )
-                    continue
-                if start > end:
-                    warning = "Invalid interval, start '%d' > end '%d'.  " % ( start, end )
-                    warnings.append( warning )
-                    if not invalid_lines:
-                        invalid_lines = get_lines( feature )
-                        first_invalid_line = line_count
-                    skipped_lines += len( invalid_lines )
-                    continue
-
-                if strand not in ['+', '-']:
-                    strand = '+'
-                sequence = ''
-            else:
-                continue
-
-        # Open sequence file and get sequence for feature/interval.
-        if seq_path and os.path.exists( "%s/%s.nib" % ( seq_path, chrom ) ):
-            # TODO: improve support for GFF-nib interaction.
-            if chrom in nibs:
-                nib = nibs[chrom]
-            else:
-                nibs[chrom] = nib = bx.seq.nib.NibFile( file( "%s/%s.nib" % ( seq_path, chrom ) ) )
-            try:
-                sequence = nib.get( start, end-start )
-            except Exception, e:
-                warning = "Unable to fetch the sequence from '%d' to '%d' for build '%s'. " %( start, end-start, dbkey )
-                warnings.append( warning )
-                if not invalid_lines:
-                    invalid_lines = get_lines( feature )
-                    first_invalid_line = line_count
-                skipped_lines += len( invalid_lines )
-                continue
-        elif seq_path and os.path.isfile( seq_path ):
-            if not(twobitfile):
-                twobitfile = bx.seq.twobit.TwoBitFile( file( seq_path ) )
-            try:
-                if options.gff and interpret_features:
-                    # Create sequence from intervals within a feature.
-                    sequence = ''
-                    for interval in feature.intervals:
-                        sequence += twobitfile[interval.chrom][interval.start:interval.end]
-                else:
-                    sequence = twobitfile[chrom][start:end]
-            except:
-                warning = "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. " %( start, end-start, chrom )
-                warnings.append( warning )
-                if not invalid_lines:
-                    invalid_lines = get_lines( feature )
-                    first_invalid_line = line_count
-                skipped_lines += len( invalid_lines )
-                continue
-        else:
-            warning = "Chromosome by name '%s' was not found for build '%s'. " % ( chrom, dbkey )
-            warnings.append( warning )
-            if not invalid_lines:
-                invalid_lines = get_lines( feature )
-                first_invalid_line = line_count
-            skipped_lines += len( invalid_lines )
-            continue
-        if sequence == '':
-            warning = "Chrom: '%s', start: '%s', end: '%s' is either invalid or not present in build '%s'. " \
-                        % ( chrom, start, end, dbkey )
-            warnings.append( warning )
-            if not invalid_lines:
-                invalid_lines = get_lines( feature )
-                first_invalid_line = line_count
-            skipped_lines += len( invalid_lines )
-            continue
-        if includes_strand_col and strand == "-":
-            sequence = reverse_complement( sequence )
-
-        if output_format == "fasta" :
-            l = len( sequence )
-            c = 0
-            if gff_format:
-                start, end = gff_util.convert_bed_coords_to_gff( [ start, end ] )
-            fields = [dbkey, str( chrom ), str( start ), str( end ), strand]
-            meta_data = "_".join( fields )
-            fout.write( ">%s\n" % meta_data )
-            while c < l:
-                b = min( c + 50, l )
-                fout.write( "%s\n" % str( sequence[c:b] ) )
-                c = b
-        else: # output_format == "interval"
-            if gff_format and interpret_features:
-                # TODO: need better GFF Reader to capture all information needed
-                # to produce this line.
-                meta_data = "\t".join(
-                                [feature.chrom, "galaxy_extract_genomic_dna", "interval", \
-                                 str( feature.start ), str( feature.end ), feature.score, feature.strand,
-                                 ".", gff_util.gff_attributes_to_str( feature.attributes, "GTF" ) ] )
-            else:
-                meta_data = "\t".join( fields )
-            if gff_format:
-                format_str = "%s seq \"%s\";\n"
-            else:
-                format_str = "%s\t%s\n"
-            fout.write( format_str % ( meta_data, str( sequence ) ) )
-
-        # Update line count.
-        if isinstance( feature, gff_util.GFFFeature ):
-            line_count += len( feature.intervals )
-        else:
-            line_count += 1
-
-    fout.close()
-
-    if warnings:
-        warn_msg = "%d warnings, 1st is: " % len( warnings )
-        warn_msg += warnings[0]
-        print warn_msg
-    if skipped_lines:
-        # Error message includes up to the first 10 skipped lines.
-        print 'Skipped %d invalid lines, 1st is #%d, "%s"' % ( skipped_lines, first_invalid_line, '\n'.join( invalid_lines[:10] ) )
-
-if __name__ == "__main__": __main__()
--- a/tools/extract/extract_genomic_dna.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,174 +0,0 @@
-<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="2.2.2">
-  <description>using coordinates from assembled/unassembled genomes</description>
-  <command interpreter="python">
-      extract_genomic_dna.py $input $out_file1 -o $out_format -d $dbkey
-
-      #if str( $interpret_features ) == "yes":
-        -I
-      #end if
-
-      ## Columns to use in input file.
-      #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -1 1,4,5,7 --gff
-      #else:
-        -1 ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol}
-      #end if
-
-      #if $seq_source.index_source == "cached":
-        ## Genomic data from cache.
-        -g ${GALAXY_DATA_INDEX_DIR}
-      #else:
-        ## Genomic data from history.
-        -F $seq_source.ref_file
-      #end if
-  </command>
-  <inputs>
-      <param format="interval,gff" name="input" type="data" label="Fetch sequences for intervals in"/>
-      <param name="interpret_features" type="select" label="Interpret features when possible" help="Only meaningful for GFF, GTF datasets.">
-          <option value="yes">Yes</option>
-          <option value="no">No</option>
-      </param>
-      <conditional name="seq_source">
-          <param name="index_source" type="select" label="Source for Genomic Data">
-              <option value="cached">Locally cached</option>
-              <option value="history">History</option>
-          </param>
-          <when value="cached">
-          </when>
-          <when value="history">
-              <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-          </when>
-      </conditional>
-	  <param name="out_format" type="select" label="Output data type">
-    	  <option value="fasta">FASTA</option>
-    	  <option value="interval">Interval</option>
-	  </param>
-  </inputs>
-  <outputs>
-      <data format="input" name="out_file1" metadata_source="input">
-          <change_format>
-              <when input="out_format" value="fasta" format="fasta" />
-          </change_format>
-      </data>
-  </outputs>
-  <requirements>
-      <requirement type="binary">faToTwoBit</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
-      <param name="interpret_features" value="yes"/>
-      <param name="index_source" value="cached"/>
-      <param name="out_format" value="fasta"/>
-      <output name="out_file1" file="extract_genomic_dna_out1.fasta" />
-    </test>
-    <test>
-      <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
-      <param name="interpret_features" value="yes"/>
-      <param name="index_source" value="cached"/>
-      <param name="out_format" value="fasta"/>
-      <output name="out_file1" file="extract_genomic_dna_out2.fasta" />
-    </test>
-    <test>
-      <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
-      <param name="interpret_features" value="yes"/>
-      <param name="index_source" value="cached"/>
-      <param name="out_format" value="interval"/>
-      <output name="out_file1" file="extract_genomic_dna_out3.interval" />
-    </test>
-    <!-- Test GFF file support. -->
-    <test>
-      <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
-      <param name="interpret_features" value="no"/>
-      <param name="index_source" value="cached"/>
-      <param name="out_format" value="interval"/>
-      <output name="out_file1" file="extract_genomic_dna_out4.gff" />
-    </test>
-    <test>
-      <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
-      <param name="interpret_features" value="no"/>
-      <param name="out_format" value="fasta"/>
-      <param name="index_source" value="cached"/>
-      <output name="out_file1" file="extract_genomic_dna_out5.fasta" />
-    </test>
-    <!-- Test custom sequences support and GFF feature interpretation. -->
-    <test>
-      <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
-      <param name="interpret_features" value="no"/>
-      <param name="index_source" value="history"/>
-      <param name="ref_file" value="tophat_in1.fasta"/>
-      <param name="out_format" value="fasta"/>
-      <output name="out_file1" file="extract_genomic_dna_out6.fasta" />
-    </test>
-    <test>
-      <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
-      <param name="interpret_features" value="yes"/>
-      <param name="index_source" value="history"/>
-      <param name="ref_file" value="tophat_in1.fasta"/>
-      <param name="out_format" value="fasta"/>
-      <output name="out_file1" file="extract_genomic_dna_out7.fasta" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-This tool requires interval or gff (special tabular formatted data).  If your data is not TAB delimited, first use *Text Manipulation-&gt;Convert*.
-
-.. class:: warningmark
-
-Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
-
-.. class:: warningmark
-
-All of the following will cause a line from the input dataset to be skipped and a warning generated.  The number of warnings and skipped lines is documented in the resulting history item.
- - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
- - Sequences that fall outside of the range of a line's start and end coordinates.
- - Chromosome, start or end coordinates that are invalid for the specified build.
- - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
-
-.. class:: infomark
-
- **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
-
------
-
-**What it does**
-
-This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
-
-If strand is not defined, the default value is "+".
-
------
-
-**Example**
-
-If the input dataset is::
-
-    chr7  127475281  127475310  NM_000230  0  +
-    chr7  127485994  127486166  NM_000230  0  +
-    chr7  127486011  127486166  D49487     0  +
-
-Extracting sequences with **FASTA** output data type returns::
-
-    &gt;hg17_chr7_127475281_127475310_+
-    GTAGGAATCGCAGCGCCAGCGGTTGCAAG
-    &gt;hg17_chr7_127485994_127486166_+
-    GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
-    GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
-    CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
-    GATCAATGACATTTCACACACG
-    &gt;hg17_chr7_127486011_127486166_+
-    TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
-    CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
-    CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
-    ACACG
-
-Extracting sequences with **Interval** output data type returns::
-
-    chr7    127475281       127475310       NM_000230       0       +       GTAGGAATCGCAGCGCCAGCGGTTGCAAG
-    chr7    127485994       127486166       NM_000230       0       +       GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
-    chr7    127486011       127486166       D49487  0       +       TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
-
-</help>
-</tool>
--- a/tools/extract/liftOver_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-"""
-Converts coordinates from one build/assembly to another using liftOver binary and mapping files downloaded from UCSC.
-"""
-
-import os, string, subprocess, sys
-import tempfile
-import re
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def safe_bed_file(infile):
-    """Make a BED file with track and browser lines ready for liftOver.
-
-    liftOver will fail with track or browser lines. We can make it happy
-    by converting these to comments. See:
-
-    https://lists.soe.ucsc.edu/pipermail/genome/2007-May/013561.html
-    """
-    fix_pat = re.compile("^(track|browser)")
-    (fd, fname) = tempfile.mkstemp()
-    in_handle = open(infile)
-    out_handle = open(fname, "w")
-    for line in in_handle:
-        if fix_pat.match(line):
-            line = "#" + line
-        out_handle.write(line)
-    in_handle.close()
-    out_handle.close()
-    return fname
-
-if len( sys.argv ) < 9:
-    stop_err( "USAGE: prog input out_file1 out_file2 input_dbkey output_dbkey infile_type minMatch multiple <minChainT> <minChainQ> <minSizeQ>" )
-
-infile = sys.argv[1]
-outfile1 = sys.argv[2]
-outfile2 = sys.argv[3]
-in_dbkey = sys.argv[4]
-mapfilepath = sys.argv[5]
-infile_type = sys.argv[6]
-gff_option = ""
-if infile_type == "gff":
-    gff_option = "-gff "
-minMatch = sys.argv[7]
-multiple = int(sys.argv[8])
-multiple_option = ""
-if multiple:
-    minChainT = sys.argv[9]
-    minChainQ = sys.argv[10]
-    minSizeQ = sys.argv[11]
-    multiple_option = " -multiple -minChainT=%s -minChainQ=%s -minSizeQ=%s " %(minChainT,minChainQ,minSizeQ)
-
-try:
-    assert float(minMatch)
-except:
-    minMatch = 0.1
-#ensure dbkey is set
-if in_dbkey == "?":
-    stop_err( "Input dataset genome build unspecified, click the pencil icon in the history item to specify it." )
-
-if not os.path.isfile( mapfilepath ):
-    stop_err( "%s mapping is not currently available."  % ( mapfilepath.split('/')[-1].split('.')[0] ) )
-
-safe_infile = safe_bed_file(infile)
-cmd_line = "liftOver " + gff_option + "-minMatch=" + str(minMatch) + multiple_option + " "  + safe_infile + " " + mapfilepath + " " + outfile1 + " " + outfile2 + "  > /dev/null"
-
-try:
-    # have to nest try-except in try-finally to handle 2.4
-    try:
-        proc = subprocess.Popen( args=cmd_line, shell=True, stderr=subprocess.PIPE )
-        returncode = proc.wait()
-        stderr = proc.stderr.read()
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        raise Exception, 'Exception caught attempting conversion: ' + str( e )
-finally:
-    os.remove(safe_infile)
--- a/tools/extract/liftOver_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,145 +0,0 @@
-<tool id="liftOver1" name="Convert genome coordinates" version="1.0.3">
-  <description> between assemblies and genomes</description>
-  <command interpreter="python">
-  liftOver_wrapper.py
-  $input
-  "$out_file1"
-  "$out_file2"
-  $dbkey
-  $to_dbkey
-  #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__) or isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gtf').__class__):
-        "gff"
-  #else:
-        "interval"
-  #end if
-  $minMatch ${multiple.choice} ${multiple.minChainT} ${multiple.minChainQ} ${multiple.minSizeQ}
-  </command>
-  <inputs>
-    <param format="interval,gff,gtf" name="input" type="data" label="Convert coordinates of">
-      <validator type="unspecified_build" />
-      <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="Liftover mappings are currently not available for the specified build." />
-    </param>
-    <param name="to_dbkey" type="select" label="To">
-      <options from_file="liftOver.loc">
-        <column name="name" index="1"/>
-        <column name="value" index="2"/>
-        <column name="dbkey" index="0"/>
-        <filter type="data_meta" ref="input" key="dbkey" column="0" />
-      </options>
-    </param>
-    <param name="minMatch" size="10" type="float" value="0.95" label="Minimum ratio of bases that must remap" help="Recommended values: same species = 0.95, different species = 0.10" />
-    <conditional name="multiple">
-	    <param name="choice" type="select" label="Allow multiple output regions?" help="Recommended values: same species = No, different species = Yes">
-	    	<option value="0" selected="true">No</option>
-	    	<option value="1">Yes</option>
-		</param>
-		<when value="0">
-		    <param name="minSizeQ" type="hidden" value="0" />
-    		<param name="minChainQ" type="hidden" value="0" />
-    		<param name="minChainT" type="hidden" value="0" />
-    	</when>
-    	<when value="1">
-    	    <param name="minSizeQ" size="10" type="integer" value="0" label="Minimum matching region size in query" help="Recommended value: set to >= 300 bases for complete transcripts"/>
-    		<param name="minChainQ" size="10" type="integer" value="500" label="Minimum chain size in query"/>
-    		<param name="minChainT" size="10" type="integer" value="500" label="Minimum chain size in target"/>
-    	</when>
-	</conditional>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" label="${tool.name} on ${on_string} [ MAPPED COORDINATES ]">
-      <actions>
-        <action type="metadata" name="dbkey">
-          <option type="from_file" name="liftOver.loc" column="1" offset="0">
-            <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-            <filter type="param_value" ref="to_dbkey" column="2"/>
-          </option>
-        </action>
-      </actions>
-    </data>
-    <data format="input" name="out_file2" label="${tool.name} on ${on_string} [ UNMAPPED COORDINATES ]" />
-  </outputs>
-  <requirements>
-    <requirement type="package">ucsc_tools</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="5.bed" dbkey="hg18" ftype="bed" />
-      <param name="to_dbkey" value="panTro2" />
-      <param name="minMatch" value="0.95" />
-      <param name="choice" value="0" />
-      <output name="out_file1" file="5_liftover_mapped.bed"/>
-      <output name="out_file2" file="5_liftover_unmapped.bed"/>
-    </test>
-    <test>
-      <param name="input" value="5.bed" dbkey="hg18" ftype="bed" />
-      <param name="to_dbkey" value="panTro2" />
-      <param name="minMatch" value="0.10" />
-      <param name="choice" value="1" />
-      <param name="minSizeQ" value="0" />
-      <param name="minChainQ" value="500" />
-      <param name="minChainT" value="500" />
-      <output name="out_file1" file="5_mult_liftover_mapped.bed"/>
-      <output name="out_file2" file="5_mult_liftover_unmapped.bed"/>
-    </test>
-    <test>
-      <param name="input" value="cuffcompare_in1.gtf" dbkey="hg18" ftype="gtf" />
-      <param name="to_dbkey" value="panTro2" />
-      <param name="minMatch" value="0.95" />
-      <param name="choice" value="0" />
-      <output name="out_file1" file="cuffcompare_in1_liftover_mapped.bed"/>
-      <output name="out_file2" file="cuffcompare_in1_liftover_unmapped.bed"/>
-    </test>
-    <test>
-      <param name="input" value="cuffcompare_in1.gtf" dbkey="hg18" ftype="gtf" />
-      <param name="to_dbkey" value="panTro2" />
-      <param name="minMatch" value="0.10" />
-      <param name="choice" value="1" />
-      <param name="minSizeQ" value="0" />
-      <param name="minChainQ" value="500" />
-      <param name="minChainT" value="500" />
-      <output name="out_file1" file="cuffcompare_in1_mult_liftover_mapped.bed"/>
-      <output name="out_file2" file="cuffcompare_in1_mult_liftover_unmapped.bed"/>
-    </test>
-  </tests>
-  <help>
-.. class:: warningmark
-
-Make sure that the genome build of the input dataset is specified (click the pencil icon in the history item to set it if necessary).
-
-.. class:: warningmark
-
-This tool can work with interval, GFF, and GTF datasets. It requires the interval datasets to have chromosome in column 1,
-start co-ordinate in column 2 and end co-ordinate in column 3. BED comments
-and track and browser lines will be ignored, but if other non-interval lines
-are present the tool will return empty output datasets.
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool is based on the LiftOver utility and Chain track from `the UC Santa Cruz Genome Browser`__.
-
-It converts coordinates and annotations between assemblies and genomes. It produces 2 files, one containing all the mapped coordinates and the other containing the unmapped coordinates, if any.
-
- .. __: http://genome.ucsc.edu/
-
------
-
-**Example**
-
-Converting the following hg16 intervals to hg18 intervals::
-
-    chrX  85170   112199  AK002185  0  +
-    chrX  110458  112199  AK097346  0  +
-    chrX  112203  121212  AK074528  0  -
-
-will produce the following hg18 intervals::
-
-    chrX  132991  160020  AK002185  0  +
-    chrX  158279  160020  AK097346  0  +
-    chrX  160024  169033  AK074528  0  -
-
-  </help>
-</tool>
--- a/tools/extract/phastOdds/get_scores_galaxy.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-#!/usr/bin/env python
-
-"""
-usage: %prog data_file.h5 region_mapping.bed in_file out_file chrom_col start_col end_col [options]
-   -p, --perCol: standardize to lod per column
-"""
-
-from __future__ import division
-
-import sys
-from galaxy import eggs
-from numpy import *
-from tables import *
-
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-from bx import intervals
-
-# ignore wanrnings about NumArray flavor
-from warnings import filterwarnings
-from tables.exceptions import FlavorWarning
-filterwarnings("ignore", category=FlavorWarning)
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-    # Parse command line
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        h5_fname = args[0]
-        mapping_fname = args[1]
-        in_fname = args[2]
-        out_fname = args[3]
-        chrom_col, start_col, end_col = map( lambda x: int( x ) - 1, args[4:7] )
-        per_col = bool( options.perCol )
-    except Exception, e:
-        doc_optparse.exception()
-
-    if h5_fname == 'None.h5':
-        stop_err( 'Invalid genome build, this tool currently only works with data from build hg17.  Click the pencil icon in your history item to correct the build if appropriate.' )
-
-    # Open the h5 file
-    h5 = openFile( h5_fname, mode = "r" )
-    # Load intervals and names for the subregions
-    intersecters = {}
-    for i, line in enumerate( file( mapping_fname ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            chr, start, end, name = line.split()[0:4]
-            if not intersecters.has_key( chr ):
-                intersecters[ chr ] = intervals.Intersecter()
-            intersecters[ chr ].add_interval( intervals.Interval( int( start ), int( end ), name ) )
-
-    # Find the subregion containing each input interval
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_line = ''
-    out_file = open( out_fname, "w" )
-    warnings = []
-    warning = ''
-    for i, line in enumerate( file( in_fname ) ):
-        line = line.rstrip( '\r\n' )
-        if line.startswith( '#' ):
-            if i == 0:
-                out_file.write( "%s\tscore\n" % line )
-            else:
-                out_file.write( "%s\n" % line )
-        fields = line.split( "\t" )
-        try:
-            chr = fields[ chrom_col ]
-            start = int( fields[ start_col ] )
-            end = int( fields[ end_col ] )
-        except:
-            warning = "Invalid value for chrom, start or end column."
-            warnings.append( warning )
-            skipped_lines += 1
-            if not invalid_line:
-                first_invalid_line = i + 1
-                invalid_line = line
-            continue
-        # Find matching interval
-        try:
-            matches = intersecters[ chr ].find( start, end )
-        except:
-            warning = "'%s' is not a valid chrom value for the region. " %chr
-            warnings.append( warning )
-            skipped_lines += 1
-            if not invalid_line:
-                first_invalid_line = i + 1
-                invalid_line = line
-            continue
-        if not len( matches ) == 1:
-            warning = "Interval must match exactly one target region. "
-            warnings.append( warning )
-            skipped_lines += 1
-            if not invalid_line:
-                first_invalid_line = i + 1
-                invalid_line = line
-            continue
-        region = matches[0]
-        if not ( start >= region.start and end <= region.end ):
-            warning = "Interval must fall entirely within region. "
-            warnings.append( warning )
-            skipped_lines += 1
-            if not invalid_line:
-                first_invalid_line = i + 1
-                invalid_line = line
-            continue
-        region_name = region.value
-        rel_start = start - region.start
-        rel_end = end - region.start
-        if not rel_start < rel_end:
-            warning = "Region %s is empty, relative start:%d, relative end:%d. " % ( region_name, rel_start, rel_end )
-            warnings.append( warning )
-            skipped_lines += 1
-            if not invalid_line:
-                first_invalid_line = i + 1
-                invalid_line = line
-            continue
-        s = h5.getNode( h5.root, "scores_" + region_name )
-        c = h5.getNode( h5.root, "counts_" + region_name )
-        score = s[rel_end-1]
-        count = c[rel_end-1]
-        if rel_start > 0:
-            score -= s[rel_start-1]
-            count -= c[rel_start-1]
-        if per_col:
-            score /= count
-        fields.append( str( score ) )
-        out_file.write( "%s\n" % "\t".join( fields ) )
-    # Close the file handle
-    h5.close()
-    out_file.close()
-
-    if warnings:
-        warn_msg = "PhastOdds scores are only available for ENCODE regions. %d warnings, 1st is: " % len( warnings )
-        warn_msg += warnings[0]
-        print warn_msg
-    if skipped_lines:
-        print 'Skipped %d invalid lines, 1st is #%d, "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
-
-if __name__ == "__main__": main()
--- a/tools/extract/phastOdds/phastOdds_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-<tool id="phastOdds_for_intervals" name="Compute phastOdds score" version="1.0.0">
-  <description>for each interval</description>
-  <command interpreter="python">get_scores_galaxy.py $per_col ${score_file}.h5 ${score_file}.mapping.bed $input $output ${input.metadata.chromCol} ${input.metadata.startCol} ${input.metadata.endCol}</command>
-  <inputs>
-    <param format="interval" name="input" type="data" label="Interval file">
-      <validator type="unspecified_build" message="Unspecified build, this tool works with data from genome builds hg17. Click the pencil icon in your history item to set the genome build."/>
-      <validator type="dataset_metadata_in_file" filename="phastOdds.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are currently unavailable for the specified build." />
-    </param>
-    <param name="score_file" type="select" label="Available datasets">
-      <options from_file="phastOdds.loc">
-        <column name="name" index="1"/>
-        <column name="value" index="2"/>
-        <column name="dbkey" index="0"/>
-        <filter type="data_meta" ref="input" key="dbkey" column="0" />
-      </options>
-    </param>
-	<param name="per_col" type="boolean" label="Standardize" help="Standardizes the score to be per alignment column" checked="yes" truevalue="-p" falsevalue=""/>
-  </inputs>
-  <outputs>
-    <data format="interval" name="output" metadata_source="input"/>
-  </outputs>
-  <requirements>
-    <requirement type="python-module">numpy</requirement>
-    <requirement type="python-module">tables</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="4.bed" dbkey="hg17" ftype="bed"/>
-      <param name="score_file" value="/galaxy/data/phastOdds_precomputed/encode_SEP-2005_tba.v2_phastOdds" />
-      <param name="per_col" value="true" />
-      <output name="output" file="phastOdds_tool_out.interval" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-This tool currently only works with interval data from genome build hg17.
-
-.. class:: warningmark
-
-This tool assumes that the input dataset is in interval format and contains at least a chrom column, a start column and an end column.  These 3 columns can be dispersed throughout any number of other data columns.
-
------
-
-**Syntax**
-
-Append a column to each line of an interval file containing the phastOdds score for that interval.
-
------
-
-**Example**
-
-If your original data has the following format:
-
-+-----+-----+---+
-|chrom|start|end|
-+-----+-----+---+
-
-and you choose to compute phastOdds scores, your output will look like this:
-
-+-----+-----+---+-----+
-|chrom|start|end|score|
-+-----+-----+---+-----+
-
-  </help>
-</tool>
--- a/tools/fasta_tools/fasta_compute_length.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#!/usr/bin/env python
-"""
-Input: fasta, int
-Output: tabular
-Return titles with lengths of corresponding seq
-"""
-
-import sys, os
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-
-    infile = sys.argv[1]
-    out = open( sys.argv[2], 'w')
-    keep_first_char = int( sys.argv[3] )
-
-    fasta_title = ''
-    seq_len = 0
-
-    # number of char to keep in the title
-    if keep_first_char == 0:
-        keep_first_char = None
-    else:
-        keep_first_char += 1
-
-    first_entry = True
-
-    for line in open( infile ):
-        line = line.strip()
-        if not line or line.startswith( '#' ):
-            continue
-        if line[0] == '>':
-            if first_entry == False:
-                out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
-            else:
-                first_entry = False
-            fasta_title = line
-            seq_len = 0
-        else:
-            seq_len += len(line)
-
-    # last fasta-entry
-    out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
-    out.close()
-
-if __name__ == "__main__" : __main__()
\ No newline at end of file
--- a/tools/fasta_tools/fasta_compute_length.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-<tool id="fasta_compute_length" name="Compute sequence length">
-	<description></description>
-	<command interpreter="python">fasta_compute_length.py $input $output $keep_first</command>
-	<inputs>
-		<param name="input" type="data" format="fasta" label="Compute length for these sequences"/>
-		<param name="keep_first" type="integer" size="5" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
-	</inputs>
-	<outputs>
-		<data name="output" format="tabular"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_tool_compute_length_1.out" />
-		</test>
-
-		<test>
-			<param name="input" value="extract_genomic_dna_out1.fasta" />
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_tool_compute_length_2.out" />
-		</test>
-
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="keep_first" value="14"/>
-			<output name="output" file="fasta_tool_compute_length_3.out" />
-		</test>
-	</tests>
-	<help>
-
-**What it does**
-
-This tool counts the length of each fasta sequence in the file. The output file has two columns per line (separated by tab): fasta titles and lengths of the sequences. The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry.
-
------
-
-**Example**
-
-Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
-
-    &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
    TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
    TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
    &gt;EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
    AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa
-
-Running this tool while setting **How many characters to keep?** to **14** will produce this::
-
-	EYKX4VC02EQLO5  108
-	EYKX4VC02D4GS2	 60
-
-
-	</help>
-</tool>
\ No newline at end of file
--- a/tools/fasta_tools/fasta_concatenate_by_species.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-"""
-Takes a Multiple Alignment FASTA file and concatenates
-sequences for each species, resulting in one sequence
-alignment per species.
-"""
-
-import sys, tempfile
-from galaxy import eggs
-from galaxy.tools.util.maf_utilities import iter_fasta_alignment
-from galaxy.util.odict import odict
-
-def __main__():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    species = odict()
-    cur_size = 0
-    for components in iter_fasta_alignment( input_filename ):
-        species_not_written = species.keys()
-        for component in components:
-            if component.species not in species:
-                species[component.species] = tempfile.TemporaryFile()
-                species[component.species].write( "-" * cur_size )
-            species[component.species].write( component.text )
-            try:
-                species_not_written.remove( component.species )
-            except ValueError:
-                #this is a new species
-                pass
-        for spec in species_not_written:
-            species[spec].write( "-" * len( components[0].text ) )
-        cur_size += len( components[0].text )
-    out = open( output_filename, 'wb' )
-    for spec, f in species.iteritems():
-        f.seek( 0 )
-        out.write( ">%s\n%s\n" % ( spec, f.read() ) )
-    out.close()
-
-if __name__ == "__main__" : __main__()
--- a/tools/fasta_tools/fasta_concatenate_by_species.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="fasta_concatenate0" name="Concatenate" version="0.0.0">
-  <description>FASTA alignment by species</description>
-  <command interpreter="python">fasta_concatenate_by_species.py $input1 $out_file1</command>
-  <inputs>
-    <param name="input1" type="data" format="fasta" label="FASTA alignment"/>
-  </inputs>
-  <outputs>
-    <data name="out_file1" format="fasta"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="cf_maf2fasta.dat" />
-      <output name="out_file1" file="fasta_concatenate_out.fasta" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tools attempts to parse FASTA headers to determine the species for each sequence in a multiple FASTA alignment.
-It then linearly concatenates the sequences for each species in the file, creating one sequence per determined species.
-
--------
-
-**Example**
-
-Starting FASTA::
-
-  >hg18.chr1(+):10016339-10016341|hg18_0
-  GT
-  >panTro2.chr1(+):10195380-10195382|panTro2_0
-  GT
-  >rheMac2.chr1(+):13119747-13119749|rheMac2_0
-  GT
-  >mm8.chr4(-):148269679-148269681|mm8_0
-  GT
-  >canFam2.chr5(+):66213635-66213637|canFam2_0
-  GT
-
-  >hg18.chr1(-):100323677-100323679|hg18_1
-  GT
-  >panTro2.chr1(-):101678671-101678673|panTro2_1
-  GT
-  >rheMac2.chr1(-):103154011-103154013|rheMac2_1
-  GT
-  >mm8.chr3(+):116620616-116620618|mm8_1
-  GT
-  >canFam2.chr6(+):52954092-52954094|canFam2_1
-  GT
-
-
-
-becomes::
-
-  >hg18
-  GTGT
-  >panTro2
-  GTGT
-  >rheMac2
-  GTGT
-  >mm8
-  GTGT
-  >canFam2
-  GTGT
-
-
-.. class:: warningmark
-
- This tool will only work properly on files with Galaxy style FASTA headers.
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/fasta_tools/fasta_filter_by_length.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-#!/usr/bin/env python
-"""
-Input: fasta, minimal length, maximal length
-Output: fasta
-Return sequences whose lengths are within the range.
-"""
-
-import sys, os
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    input_filename = sys.argv[1]
-    try:
-        min_length = int( sys.argv[2] )
-    except:
-        stop_err( "Minimal length of the return sequence requires a numerical value." )
-    try:
-        max_length = int( sys.argv[3] )
-    except:
-        stop_err( "Maximum length of the return sequence requires a numerical value." )
-    output_filename = sys.argv[4]
-    output_handle = open( output_filename, 'w' )
-    tmp_size = 0 #-1
-    tmp_buf = ''
-    at_least_one = 0
-    for line in file(input_filename):
-        if not line or line.startswith('#'):
-            continue
-        if line[0] == '>':
-            if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
-                output_handle.write(tmp_buf)
-                at_least_one = 1
-            tmp_buf = line
-            tmp_size = 0
-        else:
-            if max_length == 0 or tmp_size < max_length:
-                tmp_size += len(line.rstrip('\r\n'))
-                tmp_buf += line
-    # final flush of buffer
-    if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
-        output_handle.write(tmp_buf.rstrip('\r\n'))
-        at_least_one = 1
-    output_handle.close()
-    if at_least_one == 0:
-        print "There is no sequence that falls within your range."
-
-if __name__ == "__main__" : __main__()
--- a/tools/fasta_tools/fasta_filter_by_length.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-<tool id="fasta_filter_by_length" name="Filter sequences by length" version="1.1">
-	<description></description>
-	<command interpreter="python">fasta_filter_by_length.py $input $min_length $max_length $output </command>
-	<inputs>
-		<param name="input" type="data" format="fasta" label="Fasta file"/>
-		<param name="min_length" type="integer" size="15" value="0" label="Minimal length" />
-		<param name="max_length" type="integer" size="15" value="0" label="Maximum length" help="Setting to '0' will return all sequences longer than the 'Minimal length'"/>
-	</inputs>
-	<outputs>
-		<data name="output" format="fasta"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="min_length" value="10" />
-			<param name="max_length" value="0" />
-			<output name="output" file="fasta_tool_filter_length_1.out" />
-		</test>
-	</tests>
-	<help>
-
-.. class:: infomark
-
-**TIP**. To return sequences longer than a certain length, set *Minimal length* to desired value and leave *Maximum length* set to '0'.
-
------
-
-**What it does**
-
-Outputs sequences between *Minimal length* and *Maximum length*.
-
------
-
-**Example**
-
-Suppose you have the following FASTA formatted sequences::
-
-	&gt;seq1
-	TCATTTAATGAC
-	&gt;seq2
-	ATGGC
-	&gt;seq3
-	TCACATGATGCCG
-	&gt;seq4
-	ATGGAAGC
-
-Setting the **Minimal length** to **10**, and the **Maximum length** to **0** will return all sequences longer than 10 bp::
-
- 	&gt;seq1
-	TCATTTAATGAC
-	&gt;seq3
-	TCACATGATGCCG
-
-
-	</help>
-</tool>
\ No newline at end of file
--- a/tools/fasta_tools/fasta_to_tabular.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-# This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
-"""
-Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
-Output: tabular
-format convert: fasta to tabular
-"""
-
-import sys, os
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    if len(sys.argv) != 5:
-        stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
-    infile = sys.argv[1]
-    outfile = sys.argv[2]
-    keep_first = int( sys.argv[3] )
-    descr_split = int( sys.argv[4] )
-    fasta_title = fasta_seq = ''
-    if keep_first == 0:
-        keep_first = None
-    elif descr_split == 1:
-        #Added one for the ">" character
-        #(which is removed if using descr_split > 1)
-        keep_first += 1
-    if descr_split < 1:
-        stop_err("Bad description split value (should be 1 or more)")
-    out = open( outfile, 'w' )
-    for i, line in enumerate( open( infile ) ):
-        line = line.rstrip( '\r\n' )
-        if not line or line.startswith( '#' ):
-            continue
-        if line.startswith( '>' ):
-            #Don't want any existing tabs to trigger extra columns:
-            line = line.replace('\t', ' ')
-            if i > 0:
-                out.write('\n')
-            if descr_split == 1:
-                out.write(line[1:keep_first])
-            else:
-                words = line[1:].split(None, descr_split-1)
-                #apply any truncation to first word (the id)
-                words[0] = words[0][0:keep_first]
-                #pad with empty columns if required
-                words += [""]*(descr_split-len(words))
-                out.write("\t".join(words))
-            out.write('\t')
-        else:
-            out.write(line)
-    if i > 0:
-        out.write('\n')
-    out.close()
-
-if __name__ == "__main__" : __main__()
--- a/tools/fasta_tools/fasta_to_tabular.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-<tool id="fasta2tab" name="FASTA-to-Tabular" version="1.1.0">
-	<description>converter</description>
-	<command interpreter="python">fasta_to_tabular.py $input $output $keep_first $descr_columns</command>
-	<inputs>
-		<param name="input" type="data" format="fasta" label="Convert these sequences"/>
-		<param name="descr_columns" type="integer" size="2" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
-			<validator type="in_range" min="1" />
-		</param>
-		<param name="keep_first" type="integer" size="5" value="0" label="How many title characters to keep?" help="Applies only to the first column taken from the title string ('0' = keep the whole thing), useful when your sequence identifiers are all the same length.">
-			<validator type="in_range" min="0" />
-		</param>
-	</inputs>
-	<outputs>
-		<data name="output" format="tabular"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="1"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out1.tabular" />
-		</test>
-
-		<test>
-			<param name="input" value="4.fasta" />
-			<param name="descr_columns" value="1"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out2.tabular" />
-		</test>
-
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="1"/>
-			<param name="keep_first" value="14"/>
-			<output name="output" file="fasta_to_tabular_out3.tabular" />
-		</test>
-
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="2"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out4.tabular" />
-		</test>
-
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="5"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out5.tabular" />
-		</test>
-
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="5"/>
-			<param name="keep_first" value="10"/>
-			<output name="output" file="fasta_to_tabular_out6.tabular" />
-		</test>
-
-	</tests>
-	<help>
-
-**What it does**
-
-This tool converts FASTA formatted sequences to TAB-delimited format.
-
-Many tools consider the first word of the FASTA "&gt;" title line to be an identifier, and any remaining text to be a free form description.
-It is therefore useful to split this text into two columns in Galaxy (identifier and any description) by setting **How many columns to divide title string into?** to **2**.
-In some cases the description can be usefully broken up into more columns -- see the examples .
-
-The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry.
-With the introduction of the **How many columns to divide title string into?** option this setting is of limited use, but does still allow you to truncate the identifier.
-
------
-
-**Example**
-
-Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
-
-    &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
-    TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
-    TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
-    &gt;EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
-    AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAA
-
-Running this tool with the default settings will produce this (2 column output):
-
-========================================================================== =======================================
-EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
-EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_  AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
-========================================================================== =======================================
-
-Having the full title line (the FASTA "&gt;" line text) as a column is not always ideal.
-
-The **How many characters to keep?** option is useful if your identifiers are all the same length.
-In this example the identifier is 14 characters, so setting **How many characters to keep?** to **14** (and leaving **How many columns to divide title string into?** as the default, **1**) will produce this (2 column output):
-
-============== =======================================
-EYKX4VC02EQLO5 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
-EYKX4VC02D4GS2 AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
-============== =======================================
-
-If however your FASTA file has identifiers of variable length, it is better to split the text into at least two columns.
-Running this tool with **How many columns to divide title string into?** to **2** will produce this (3 column output):
-
-============== =========================================================== =======================================
-EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
-EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_  AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
-============== =========================================================== =======================================
-
-Running this tool with **How many columns to divide title string into?** to **5** will produce this (5 column output):
-
-============== ========== ============ ======== ========================== =======================================
-EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
-EYKX4VC02D4GS2 length=60  xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
-============== ========== ============ ======== ========================== =======================================
-
-Running this tool with **How many columns to divide title string into?** to **5** and **How many characters to keep?** to **10** will produce this (5 column output).
-Notice that only the first column is truncated to 10 characters -- and be careful not to trim your sequence names too much (generally they should be unique):
-
-========== ========== ============ ======== ========================== =======================================
-EYKX4VC02E length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
-EYKX4VC02D length=60  xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
-========== ========== ============ ======== ========================== =======================================
-
-Note the sequences have been truncated for display purposes in the above tables.
-
-	</help>
-</tool>
--- a/tools/fasta_tools/tabular_to_fasta.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#!/usr/bin/env python
-"""
-Input: fasta, minimal length, maximal length
-Output: fasta
-Return sequences whose lengths are within the range.
-"""
-import sys, os
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    infile = sys.argv[1]
-    title_col = sys.argv[2]
-    seq_col = sys.argv[3]
-    outfile = sys.argv[4]
-
-    if title_col == None or title_col == 'None' or seq_col == None or seq_col == 'None':
-        stop_err( "Columns not specified." )
-    try:
-        seq_col = int( seq_col ) - 1
-    except:
-        stop_err( "Invalid Sequence Column: %s." %str( seq_col ) )
-
-    title_col_list = title_col.split( ',' )
-    out = open( outfile, 'w' )
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_line = ""
-    i = 0
-
-    for i, line in enumerate( open( infile ) ):
-        error = False
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            fields = line.split( '\t' )
-            fasta_title = []
-            for j in title_col_list:
-                try:
-                    j = int( j ) - 1
-                    fasta_title.append( fields[j] )
-                except:
-                    skipped_lines += 1
-                    if not invalid_line:
-                        first_invalid_line = i + 1
-                        invalid_line = line
-                    error = True
-                    break
-            if not error:
-                try:
-                    fasta_seq = fields[seq_col]
-                    if fasta_title[0].startswith( ">" ):
-                        fasta_title[0] = fasta_title[0][1:]
-                    print >> out, ">%s\n%s" % ( "_".join( fasta_title ), fasta_seq )
-                except:
-                    skipped_lines += 1
-                    if not invalid_line:
-                        first_invalid_line = i + 1
-                        invalid_line = line
-    out.close()
-
-    if skipped_lines > 0:
-        print 'Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
-
-if __name__ == "__main__" : __main__()
\ No newline at end of file
--- a/tools/fasta_tools/tabular_to_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-<tool id="tab2fasta" name="Tabular-to-FASTA" version="1.1.0">
-	<description>converts tabular file to FASTA format</description>
-	<command interpreter="python">tabular_to_fasta.py $input $title_col $seq_col $output </command>
-	<inputs>
-		<param name="input" type="data" format="tabular" label="Tab-delimited file"/>
-		<param name="title_col" type="data_column" data_ref="input" multiple="True" numerical="False" label="Title column(s)" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"/>
-		<param name="seq_col" type="data_column" data_ref="input" numerical="False" label="Sequence column" />
-	</inputs>
-	<outputs>
-		<data name="output" format="fasta"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="input" value="solexa.tabular" />
-			<param name="title_col" value="1,2,3,4" />
-			<param name="seq_col" value="5" />
-			<output name="output" file="tabular_to_fasta_out1.fasta" />
-		</test>
-	</tests>
-	<help>
-
-**What it does**
-
-Converts tab delimited data into FASTA formatted sequences.
-
------------
-
-**Example**
-
-Suppose this is a sequence file produced by Illumina (Solexa) sequencer::
-
-	5	300	902	419	GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
-	5	300	880	431	GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
-
-Selecting **c3** and **c4** as the **Title column(s)** and **c5** as the **Sequence column** will result in::
-
-	&gt;902_419
-	GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
-	&gt;880_431
-	GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
-
-	</help>
-</tool>
\ No newline at end of file
--- a/tools/fastq/fastq_combiner.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-#Dan Blankenberg
-import sys, os, shutil
-from galaxy_utils.sequence.fastq import fastqWriter, fastqSequencingRead, fastqCombiner, fastqFakeFastaScoreReader
-from galaxy_utils.sequence.fasta import fastaReader, fastaNamedReader
-
-def main():
-    #Read command line arguments
-    fasta_filename = sys.argv[1]
-    fasta_type = sys.argv[2] or 'fasta' #should always be fasta or csfasta? what if txt?
-    qual_filename = sys.argv[3]
-    qual_type = sys.argv[4] or 'qualsanger' #qual454 qualsolid
-    output_filename = sys.argv[5]
-    force_quality_encoding = sys.argv[6]
-    if force_quality_encoding == 'None':
-        force_quality_encoding = None
-
-    format = 'sanger'
-    if fasta_type == 'csfasta' or qual_type == 'qualsolid':
-        format = 'cssanger'
-    elif qual_type == 'qualsolexa':
-        format = 'solexa'
-    elif qual_type == 'qualillumina':
-        format = 'illumina'
-
-    out = fastqWriter( open( output_filename, 'wb' ), format = format, force_quality_encoding = force_quality_encoding )
-    if qual_filename == 'None':
-        qual_input = fastqFakeFastaScoreReader( format, quality_encoding = force_quality_encoding )
-    else:
-        qual_input = fastaNamedReader( open( qual_filename, 'rb' )  )
-
-    fastq_combiner = fastqCombiner( format )
-    i = None
-    skip_count = 0
-    for i, sequence in enumerate( fastaReader( open( fasta_filename, 'rb' ) ) ):
-        quality = qual_input.get( sequence )
-        if quality:
-            fastq_read = fastq_combiner.combine( sequence, quality )
-            out.write( fastq_read )
-        else:
-            skip_count += 1
-    out.close()
-    if i is None:
-        print "Your file contains no valid FASTA sequences."
-    else:
-        print qual_input.has_data()
-        print 'Combined %s of %s sequences with quality scores (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
-    main()
--- a/tools/fastq/fastq_combiner.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-<tool id="fastq_combiner" name="Combine FASTA and QUAL" version="1.0.1">
-  <description>into FASTQ</description>
-  <command interpreter="python">fastq_combiner.py '$fasta_file' '${fasta_file.extension}' '$qual_file' '${qual_file.extension}' '$output_file' '$force_quality_encoding'</command>
-  <inputs>
-    <param name="fasta_file" type="data" format="fasta,csfasta" label="FASTA File" />
-    <param name="qual_file" type="data" format="qual" label="Quality Score File" optional="True" />
-    <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
-      <option value="None">Use Source Encoding</option>
-      <option value="ascii" selected="True">ASCII</option>
-      <option value="decimal">Decimal</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="fastqsanger">
-      <change_format>
-        <when input_dataset="fasta_file" attribute="extension" value="csfasta" format="fastqcssanger" />
-        <when input_dataset="qual_file" attribute="extension" value="qualsolid" format="fastqcssanger" />
-        <when input_dataset="qual_file" attribute="extension" value="qualsolexa" format="fastqsolexa" />
-        <when input_dataset="qual_file" attribute="extension" value="qualillumina" format="fastqillumina" />
-      </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="fasta_file" value="s2fq_phiX.csfasta" ftype="csfasta" />
-      <param name="qual_file" value="s2fq_phiX.qualsolid" ftype="qualsolid" />
-      <param name="force_quality_encoding" value="None" />
-      <output name="output_file" file="combine_phiX_out_1.fastqcssanger" />
-    </test>
-    <test>
-      <param name="fasta_file" value="s2fq_phiX.csfasta" ftype="csfasta" />
-      <param name="qual_file" value="s2fq_phiX.qualsolid" ftype="qualsolid" />
-      <param name="force_quality_encoding" value="ascii" />
-      <output name="output_file" file="combine_phiX_out_2.fastqcssanger" />
-    </test>
-    <test>
-      <param name="fasta_file" value="fastq_combiner_in_1.fasta" ftype="fasta" />
-      <param name="qual_file" value="fastq_combiner_in_1.qual454" ftype="qual454" />
-      <param name="force_quality_encoding" value="None" />
-      <output name="output_file" file="wrapping_as_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="fasta_file" value="fastq_combiner_in_1.fasta" ftype="fasta" />
-      <param name="qual_file" value="fastq_combiner_in_1.qual454" ftype="qual454" />
-      <param name="force_quality_encoding" value="decimal" />
-      <output name="output_file" file="wrapping_as_sanger_decimal.fastqsanger" />
-    </test>
-    <test>
-      <param name="fasta_file" value="fastq_combiner_in_1.fasta" ftype="fasta" />
-      <param name="qual_file" />
-      <param name="force_quality_encoding" value="decimal" />
-      <output name="output_file" file="fastq_combiner_no_qual_decimal_out_1.fastqsanger" />
-    </test>
-    <test>
-      <param name="fasta_file" value="s2fq_phiX.csfasta" ftype="csfasta" />
-      <param name="qual_file" />
-      <param name="force_quality_encoding" value="ascii" />
-      <output name="output_file" file="fastq_combiner_no_qual_ascii_out_1.fastqcssanger" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool joins a FASTA file to a Quality Score file, creating a single FASTQ block for each read.
-
-Specifying a set of quality scores is optional; when not provided, the output will be fastqsanger or fastqcssanger (when a csfasta is provided) with each quality score being the maximal allowed value (93).
-
-Use this tool, for example, to convert 454-type output to FASTQ.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-#Dan Blankenberg
-import sys, os, shutil
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def main():
-    #Read command line arguments
-    input_filename = sys.argv[1]
-    script_filename = sys.argv[2]
-    output_filename = sys.argv[3]
-    additional_files_path = sys.argv[4]
-    input_type = sys.argv[5] or 'sanger'
-
-    #Save script file for debuging/verification info later
-    os.mkdir( additional_files_path )
-    shutil.copy( script_filename, os.path.join( additional_files_path, 'debug.txt' ) )
-
-    out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
-
-    i = None
-    reads_kept = 0
-    for i, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        local = {'fastq_read':fastq_read, 'ret_val':False}
-        execfile( script_filename, {}, local )
-        if local['ret_val']:
-            out.write( fastq_read )
-            reads_kept += 1
-    out.close()
-    if i is None:
-        print "Your file contains no valid fastq reads."
-    else:
-        print 'Kept %s of %s reads (%.2f%%).' % ( reads_kept, i + 1, float( reads_kept ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
-    main()
--- a/tools/fastq/fastq_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,318 +0,0 @@
-<tool id="fastq_filter" name="Filter FASTQ" version="1.0.0">
-  <description>reads by quality score and length</description>
-  <command interpreter="python">fastq_filter.py $input_file $fastq_filter_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <page>
-      <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/>
-       <param name="min_size" label="Minimum Size" value="0" type="integer">
-        <validator type="in_range" message="Minimum size must be positive" min="0"/>
-      </param>
-      <param name="max_size" label="Maximum Size" value="0" type="integer" help="A maximum size less than 1 indicates no limit."/>
-      <param name="min_quality" label="Minimum Quality" value="0" type="float"/>
-      <param name="max_quality" label="Maximum Quality" value="0" type="float" help="A maximum quality less than 1 indicates no limit."/>
-      <param name="max_num_deviants" label="Maximum number of bases allowed outside of quality range" value="0" type="integer">
-        <validator type="in_range" message="Maximum number of deviate bases must be positive" min="0"/>
-      </param>
-     <param name="paired_end" label="This is paired end data" type="boolean" truevalue="paired_end" falsevalue="single_end" checked="False"/>
-      <repeat name="fastq_filters" title="Quality Filter on a Range of Bases" help="The above settings do not apply to these filters.">
-        <conditional name="offset_type">
-          <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
-            <option value="offsets_absolute" selected="true">Absolute Values</option>
-            <option value="offsets_percent">Percentage of Read Length</option>
-          </param>
-          <when value="offsets_absolute">
-            <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
-              <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-              <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-            </param>
-            <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
-              <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-              <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-            </param>
-          </when>
-          <when value="offsets_percent">
-            <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
-              <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-            </param>
-            <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
-              <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-            </param>
-          </when>
-        </conditional>
-        <param name="score_operation" type="select" label="Aggregate read score for specified range">
-          <option value="min" selected="True">min score</option>
-          <option value="max">max score</option>
-          <option value="sum">sum of scores</option>
-          <option value="mean">mean of scores</option>
-        </param>
-        <param name="score_comparison" type="select" label="Keep read when aggregate score is">
-          <option value="&gt;">&gt;</option>
-          <option value="&gt;=" selected="true">&gt;=</option>
-          <option value="==">==</option>
-          <option value="&lt;">&lt;</option>
-          <option value="&lt;=">&lt;=</option>
-          <sanitizer sanitize="False"/>
-        </param>
-        <param name="score" label="Quality Score" value="0" type="float" />
-      </repeat>
-    </page>
-  </inputs>
-  <configfiles>
-    <configfile name="fastq_filter_file">
-def fastq_read_pass_filter( fastq_read ):
-    def mean( score_list ):
-        return float( sum( score_list ) ) / float( len( score_list ) )
-    if len( fastq_read ) &lt; $min_size:
-        return False
-    if $max_size &gt; 0 and len( fastq_read ) &gt; $max_size:
-        return False
-    num_deviates = $max_num_deviants
-    qual_scores = fastq_read.get_decimal_quality_scores()
-    for qual_score in qual_scores:
-        if qual_score &lt; $min_quality or ( $max_quality &gt; 0 and qual_score &gt; $max_quality ):
-            if num_deviates == 0:
-                return False
-            else:
-                num_deviates -= 1
-#if $paired_end.value == 'single_end':
-    qual_scores_split = [ qual_scores ]
-#else:
-    qual_scores_split = [ qual_scores[ 0:int( len( qual_scores ) / 2 ) ], qual_scores[ int( len( qual_scores ) / 2 ): ] ]
-#end if
-#for $fastq_filter in $fastq_filters:
-    for split_scores in qual_scores_split:
-        left_column_offset = $fastq_filter[ 'offset_type' ][ 'left_column_offset' ]
-        right_column_offset = $fastq_filter[ 'offset_type' ][ 'right_column_offset' ]
-#if $fastq_filter[ 'offset_type' ]['base_offset_type'] == 'offsets_percent':
-        left_column_offset = int( round( float( left_column_offset ) / 100.0 * float( len( split_scores ) ) ) )
-        right_column_offset = int( round( float( right_column_offset ) / 100.0 * float( len( split_scores ) ) ) )
-#end if
-        if right_column_offset > 0:
-            split_scores = split_scores[ left_column_offset:-right_column_offset]
-        else:
-            split_scores = split_scores[ left_column_offset:]
-        if split_scores: ##if a read doesn't have enough columns, it passes by default
-            if not ( ${fastq_filter[ 'score_operation' ]}( split_scores ) $fastq_filter[ 'score_comparison' ] $fastq_filter[ 'score' ]  ):
-                return False
-#end for
-    return True
-ret_val = fastq_read_pass_filter( fastq_read )
-</configfile>
-  </configfiles>
-  <outputs>
-    <data format="input" name="output_file" />
-  </outputs>
-  <tests>
-    <!-- Do nothing filter -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="0"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="0"/>
-      <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/>
-    </test>
-    <!-- crippled input types prevent this test <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="-5"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="-5"/>
-      <output name="out_file1" file="solexa_full_range_original_solexa.fastqsolexa"/>
-    </test> -->
-    <!-- No trim, so does not remove Adapter from cssanger -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="0"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="0"/>
-      <output name="out_file1" file="sanger_full_range_as_cssanger.fastqcssanger"/>
-    </test>
-    <!-- Remove all Filter -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="1"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="0"/>
-      <output name="out_file1" file="empty_file.dat"/>
-    </test>
-    <!-- crippled input types prevent this test <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="-4"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="-5"/>
-      <output name="out_file1" file="empty_file.dat"/>
-    </test> -->
-    <!-- Keep all by allowing 1 deviant -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="1"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="1"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="0"/>
-      <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/>
-    </test>
-    <!-- crippled input types prevent this test<test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="-5"/>
-      <param name="max_quality" value="61"/>
-      <param name="max_num_deviants" value="1"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="-5"/>
-      <output name="out_file1" file="solexa_full_range_original_solexa.fastqsolexa"/>
-    </test> -->
-    <!-- Filter inner range -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="0"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="1"/>
-      <param name="right_column_offset" value="1"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="1"/>
-      <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/>
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="0"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="1"/>
-      <param name="right_column_offset" value="1"/>
-      <param name="score_operation" value="max"/>
-      <param name="score_comparison" value="&lt;="/>
-      <param name="score" value="92"/>
-      <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/>
-    </test>
-    <!-- percent based offsets -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="0"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="1.075"/>
-      <param name="right_column_offset" value="1.075"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="1"/>
-      <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/>
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="min_quality" value="0"/>
-      <param name="max_quality" value="0"/>
-      <param name="max_num_deviants" value="0"/>
-      <param name="paired_end" value="single_end"/>
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="1"/>
-      <param name="right_column_offset" value="1"/>
-      <param name="score_operation" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="score" value="1"/>
-      <output name="out_file1" file="empty_file.dat"/>
-    </test>
-  </tests>
-<help>
-This tool allows you to build complex filters to be applied to each read in a FASTQ file.
-
-**Basic Options:**
-    * You can specify a minimum and maximum read lengths.
-    * You can specify minimum and maximum per base quality scores, with optionally specifying the number of bases that are allowed to deviate from this range (default of 0 deviant bases).
-    * If your data is paired-end, select the proper checkbox; this will cause each read to be internally split down the middle and filters applied to each half using the offsets specified.
-
-**Advance Options:**
-    * You can specify any number of advanced filters.
-    * 5' and 3' offsets are defined, starting at zero, increasing from the respective end of the reads. For example, a quality string of "ABCDEFG", with 5' and 3' offsets of 1 and 1, respectively, specified will yield "BCDEF".
-    * You can specify either absolute offset values, or percentage offset values. *Absolute Values* based offsets are useful for fixed length reads (e.g. Illumina or SOLiD data). *Percentage of Read Length* based offsets are useful for variable length reads (e.g. 454 data). When using the percent-based method, offsets are rounded to the nearest integer.
-    * The user specifies the aggregating action (min, max, sum, mean) to perform on the quality score values found between the specified offsets to be used with the user defined comparison operation and comparison value.
-    * If a set of offsets is specified that causes the remaining quality score list to be of length zero, then the read will **pass** the quality filter unless the size range filter is used to remove these reads.
-
------
-
-.. class:: warningmark
-
-Adapter bases in color space reads are excluded from filtering.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-</help>
-</tool>
--- a/tools/fastq/fastq_groomer.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter
-
-def main():
-    input_filename = sys.argv[1]
-    input_type = sys.argv[2]
-    output_filename = sys.argv[3]
-    output_type = sys.argv[4]
-    force_quality_encoding = sys.argv[5]
-    summarize_input = sys.argv[6] == 'summarize_input'
-    if force_quality_encoding == 'None':
-        force_quality_encoding = None
-
-    aggregator = fastqAggregator()
-    out = fastqWriter( open( output_filename, 'wb' ), format = output_type, force_quality_encoding = force_quality_encoding )
-    read_count = None
-    if summarize_input:
-        reader = fastqVerboseErrorReader
-    else:
-        reader = fastqReader
-    for read_count, fastq_read in enumerate( reader( open( input_filename ), format = input_type, apply_galaxy_conventions = True ) ):
-        if summarize_input:
-            aggregator.consume_read( fastq_read )
-        out.write( fastq_read )
-    out.close()
-
-    if read_count is not None:
-        print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type )
-        if input_type != output_type and 'solexa' in [ input_type, output_type ]:
-            print "Converted between Solexa and PHRED scores."
-        if summarize_input:
-            print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() )  or "None" )
-            ascii_range = aggregator.get_ascii_range()
-            decimal_range =  aggregator.get_decimal_range()
-            print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed
-            print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] )
-    else:
-        print "No valid FASTQ reads were provided."
-
-
-if __name__ == "__main__": main()
--- a/tools/fastq/fastq_groomer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,371 +0,0 @@
-<tool id="fastq_groomer" name="FASTQ Groomer" version="1.0.4">
-  <description>convert between various FASTQ quality formats</description>
-  <command interpreter="python">fastq_groomer.py '$input_file' '$input_type' '$output_file'
-#if str( $options_type['options_type_selector'] ) == 'basic':
-#if str( $input_type ) == 'cssanger':
-'cssanger'
-#else:
-'sanger'
-#end if
-'ascii' 'summarize_input'
-#else:
-'${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}'
-#end if
-</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastq" label="File to groom" />
-    <param name="input_type" type="select" label="Input FASTQ quality scores type">
-      <option value="solexa">Solexa</option>
-      <option value="illumina">Illumina 1.3+</option>
-      <option value="sanger" selected="True">Sanger</option>
-      <option value="cssanger">Color Space Sanger</option>
-    </param>
-    <conditional name="options_type">
-    <param name="options_type_selector" type="select" label="Advanced Options">
-      <option value="basic" selected="True">Hide Advanced Options</option>
-      <option value="advanced">Show Advanced Options</option>
-    </param>
-    <when value="basic">
-      <!-- no options -->
-    </when>
-    <when value="advanced">
-      <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format.">
-        <option value="solexa">Solexa</option>
-        <option value="illumina">Illumina 1.3+</option>
-        <option value="sanger" selected="True">Sanger (recommended)</option>
-        <option value="cssanger">Color Space Sanger</option>
-      </param>
-      <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
-        <option value="None">Use Source Encoding</option>
-        <option value="ascii" selected="True">ASCII</option>
-        <option value="decimal">Decimal</option>
-      </param>
-      <param name="summarize_input" type="select" label="Summarize input data">
-        <option value="summarize_input" selected="True">Summarize Input</option>
-        <option value="dont_summarize_input">Do not Summarize Input (faster)</option>
-      </param>
-    </when>
-  </conditional>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="fastqsanger">
-      <change_format>
-        <when input="input_type" value="cssanger" format="fastqcssanger" />
-        <when input="options_type.output_type" value="solexa" format="fastqsolexa" />
-        <when input="options_type.output_type" value="illumina" format="fastqillumina" />
-        <when input="options_type.output_type" value="sanger" format="fastqsanger" />
-        <when input="options_type.output_type" value="cssanger" format="fastqcssanger" />
-      </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <!-- These tests include test files adapted from supplemental material in Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16. -->
-    <!-- Unfortunately, cannot test for expected failures -->
-    <!-- Test basic options -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="basic" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
-      <param name="input_type" value="cssanger" />
-      <param name="options_type_selector" value="basic" />
-      <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
-    </test>
-    <test>
-      <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
-      <param name="input_type" value="illumina" />
-      <param name="options_type_selector" value="basic" />
-      <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
-      <param name="input_type" value="solexa" />
-      <param name="options_type_selector" value="basic" />
-      <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_as_illumina.fastqillumina" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="basic" />
-      <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" />
-    </test>
-    <!-- Test grooming from illumina -->
-    <test>
-      <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
-      <param name="input_type" value="illumina" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="illumina" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="illumina_full_range_original_illumina.fastqillumina" />
-    </test>
-    <test>
-      <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
-      <param name="input_type" value="illumina" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
-      <param name="input_type" value="illumina" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="solexa" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="illumina_full_range_as_solexa.fastqsolexa" />
-    </test>
-    <test>
-      <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
-      <param name="input_type" value="illumina" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="cssanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="illumina_full_range_as_cssanger.fastqcssanger" />
-    </test>
-    <!-- Test grooming from sanger -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="illumina" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="solexa" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="cssanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
-    </test>
-    <!-- Test grooming from solexa -->
-    <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
-      <param name="input_type" value="solexa" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="solexa" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" />
-    </test>
-    <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
-      <param name="input_type" value="solexa" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="illumina" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="solexa_full_range_as_illumina.fastqillumina" />
-    </test>
-    <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
-      <param name="input_type" value="solexa" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
-      <param name="input_type" value="solexa" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="cssanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="solexa_full_range_as_cssanger.fastqcssanger" />
-    </test>
-    <!-- Test grooming from cssanger -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
-      <param name="input_type" value="cssanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="cssanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
-      <param name="input_type" value="cssanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
-      <param name="input_type" value="cssanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="illumina" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
-      <param name="input_type" value="cssanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="solexa" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger_adapter_base_with_quality_score.fastqcssanger_fake_score" ftype="fastq" />
-      <param name="input_type" value="cssanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="cssanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
-    </test>
-    <!-- Test fastq with line wrapping -->
-    <test>
-      <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="wrapping_as_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="illumina" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="wrapping_as_illumina.fastqillumina" />
-    </test>
-    <test>
-      <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="solexa" />
-      <param name="force_quality_encoding" value="None" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="wrapping_as_solexa.fastqsolexa" />
-    </test>
-    <!-- Test forcing quality score encoding -->
-    <!-- Sanger, range 0 - 93 -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_decimal_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="ascii" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="decimal" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_as_decimal_sanger.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_as_tab_decimal_sanger.fastqsanger" ftype="fastq" />
-      <param name="input_type" value="sanger" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="sanger" />
-      <param name="force_quality_encoding" value="ascii" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- Solexa, range -5 - 62 -->
-    <test>
-      <param name="input_file" value="solexa_full_range_as_decimal_solexa.fastqsolexa" ftype="fastq" />
-      <param name="input_type" value="solexa" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="solexa" />
-      <param name="force_quality_encoding" value="ascii" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" />
-    </test>
-    <test>
-      <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
-      <param name="input_type" value="solexa" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="output_type" value="solexa" />
-      <param name="force_quality_encoding" value="decimal" />
-      <param name="summarize_input" value="summarize_input" />
-      <output name="output_file" file="solexa_full_range_as_decimal_solexa.fastqsolexa" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool offers several conversions options relating to the FASTQ format.
-
-When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger).
-
-When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum).
-
-When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_
-
-When converting between color space (csSanger) and base/sequence space (Sanger, Illumina, Solexa) formats, adapter bases are lost or gained; if gained, the base 'G' is used as the adapter. You cannot convert a color space read to base space if there is no adapter present in the color space sequence. Any masked or ambiguous nucleotides in base space will be converted to 'N's when determining color space encoding.
-
------
-
-**Quality Score Comparison**
-
-::
-
-    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
-    ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
-    ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-    !"#$%&amp;'()*+,-./0123456789:;&lt;=&gt;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
-    |                         |    |        |                              |                     |
-   33                        59   64       73                            104                   126
-
-   S - Sanger       Phred+33,  93 values  (0, 93) (0 to 60 expected in raw reads)
-   I - Illumina 1.3 Phred+64,  62 values  (0, 62) (0 to 40 expected in raw reads)
-   X - Solexa       Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads)
-
-Diagram adapted from http://en.wikipedia.org/wiki/FASTQ_format
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-.. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_manipulation.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#Dan Blankenberg
-import sys, os, shutil
-import imp
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def main():
-    #Read command line arguments
-    input_filename = sys.argv[1]
-    script_filename = sys.argv[2]
-    output_filename = sys.argv[3]
-    additional_files_path = sys.argv[4]
-    input_type = sys.argv[5] or 'sanger'
-
-    #Save script file for debuging/verification info later
-    os.mkdir( additional_files_path )
-    shutil.copy( script_filename, os.path.join( additional_files_path, 'debug.txt' ) )
-
-    fastq_manipulator = imp.load_module( 'fastq_manipulator', open( script_filename ), script_filename, ( '', 'r', imp.PY_SOURCE ) )
-
-    out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
-
-    i = None
-    reads_manipulated = 0
-    for i, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        new_read = fastq_manipulator.match_and_manipulate_read( fastq_read )
-        if new_read:
-            out.write( new_read )
-        if new_read != fastq_read:
-            reads_manipulated += 1
-    out.close()
-    if i is None:
-        print "Your file contains no valid FASTQ reads."
-    else:
-        print 'Manipulated %s of %s reads (%.2f%%).' % ( reads_manipulated, i + 1, float( reads_manipulated ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
-    main()
--- a/tools/fastq/fastq_manipulation.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,429 +0,0 @@
-<tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.0.1">
-  <options sanitize="False" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters -->
-  <description>reads on various attributes</description>
-  <command interpreter="python">fastq_manipulation.py $input_file $fastq_manipulation_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility -->
-    <page>
-      <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/>
-      <!-- Match Reads -->
-      <repeat name="match_blocks" title="Match Reads">
-        <conditional name="match_type">
-          <param name="match_type_selector" type="select" label="Match Reads by">
-            <option value="identifier">Name/Identifier</option>
-            <option value="sequence">Sequence Content</option>
-            <option value="quality">Quality Score Content</option>
-          </param>
-          <when value="identifier">
-            <conditional name="match">
-              <param name="match_selector" type="select" label="Identifier Match Type">
-                <option value="regex">Regular Expression</option>
-              </param>
-              <when value="regex">
-                <param type="text" name="match_by" label="Match by" value=".*" />
-              </when>
-            </conditional>
-          </when>
-          <when value="sequence">
-            <conditional name="match">
-              <param name="match_selector" type="select" label="Sequence Match Type">
-                <option value="regex">Regular Expression</option>
-              </param>
-              <when value="regex">
-                <param type="text" name="match_by" label="Match by" value=".*" />
-              </when>
-            </conditional>
-          </when>
-          <when value="quality">
-            <conditional name="match">
-              <param name="match_selector" type="select" label="Quality Match Type">
-                <option value="regex">Regular Expression</option>
-              </param>
-              <when value="regex">
-                <param type="text" name="match_by" label="Match by" value=".*" />
-              </when>
-            </conditional>
-          </when>
-        </conditional>
-      </repeat>
-      <!-- Manipulate Matched Reads -->
-      <repeat name="manipulate_blocks" title="Manipulate Reads">
-        <conditional name="manipulation_type">
-          <param name="manipulation_type_selector" type="select" label="Manipulate Reads on">
-            <option value="identifier">Name/Identifier</option>
-            <option value="sequence">Sequence Content</option>
-            <option value="quality">Quality Score Content</option>
-            <option value="miscellaneous">Miscellaneous Actions</option>
-          </param>
-          <when value="identifier">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Identifier Manipulation Type">
-                <option value="translate">String Translate</option>
-              </param>
-              <when value="translate">
-                <param name="from" type="text" label="From" value="" />
-                <param name="to" type="text" label="To" value="" />
-              </when>
-            </conditional>
-          </when>
-          <when value="sequence">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Sequence Manipulation Type">
-                <option value="rev_comp">Reverse Complement</option>
-                <option value="rev_no_comp">Reverse, No Complement</option>
-                <option value="no_rev_comp">Complement, No Reverse</option>
-                <option value="trim">Trim</option>
-                <option value="dna_to_rna">DNA to RNA</option>
-                <option value="rna_to_dna">RNA to DNA</option>
-                <option value="translate">String Translate</option>
-                <option value="change_adapter">Change Adapter Base</option>
-              </param>
-              <when value="rev_comp">
-                <!-- no extra settings -->
-              </when>
-              <when value="rev_no_comp">
-                <!-- no extra settings -->
-              </when>
-              <when value="no_rev_comp">
-                <!-- no extra settings -->
-              </when>
-              <when value="trim">
-                <conditional name="offset_type">
-                  <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
-                    <option value="offsets_absolute" selected="true">Absolute Values</option>
-                    <option value="offsets_percent">Percentage of Read Length</option>
-                  </param>
-                  <when value="offsets_absolute">
-                    <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
-                      <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-                      <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-                    </param>
-                    <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
-                      <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-                      <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-                    </param>
-                  </when>
-                  <when value="offsets_percent">
-                    <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
-                      <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-                    </param>
-                    <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
-                      <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-                    </param>
-                  </when>
-                </conditional>
-                <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
-              </when>
-              <when value="dna_to_rna">
-                <!-- no extra settings -->
-              </when>
-              <when value="rna_to_dna">
-                <!-- no extra settings -->
-              </when>
-              <when value="translate">
-                <param name="from" type="text" label="From" value="" />
-                <param name="to" type="text" label="To" value="" />
-              </when>
-              <when value="change_adapter">
-                <param name="new_adapter" label="New Adapter" type="text" value="G" help="An empty string will remove the adapter base" />
-              </when>
-            </conditional>
-          </when>
-          <when value="quality">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Quality Manipulation Type">
-                <option value="translate">String Translate</option>
-                <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet-->
-              </param>
-              <when value="translate">
-                <param name="from" type="text" label="From" value="" />
-                <param name="to" type="text" label="To" value="" />
-              </when>
-              <when value="modify_each_score">
-                <param name="map_score" type="text" label="Modify Score by" value="$score + 1" />
-              </when>
-            </conditional>
-          </when>
-          <when value="miscellaneous">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type">
-                <option value="remove">Remove Read</option>
-              </param>
-              <when value="remove">
-                <!-- no extra settings -->
-              </when>
-            </conditional>
-          </when>
-        </conditional>
-      </repeat>
-    </page>
-  </inputs>
-  <configfiles>
-    <configfile name="fastq_manipulation_file">##create an importable module
-#import binascii
-import re
-import binascii
-from string import maketrans
-##does read match
-def match_read( fastq_read ):
-    #for $match_block in $match_blocks:
-        #if $match_block['match_type']['match_type_selector'] == 'identifier':
-    search_target = fastq_read.identifier[1:] ##don't include @
-        #elif $match_block['match_type']['match_type_selector'] == 'sequence':
-    search_target = fastq_read.sequence
-        #elif $match_block['match_type']['match_type_selector'] == 'quality':
-    search_target = fastq_read.quality
-        #else:
-        #continue
-        #end if
-    if not re.search( binascii.unhexlify( "${ binascii.hexlify( str( match_block['match_type']['match']['match_by'] ) ) }" ), search_target  ):
-        return False
-    #end for
-    return True
-##modify matched reads
-def manipulate_read( fastq_read ):
-    new_read = fastq_read.clone()
-    #for $manipulate_block in $manipulate_blocks:
-        #if $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'identifier':
-            #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
-    new_read.identifier = "@%s" % new_read.identifier[1:].translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
-            #end if
-        #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'sequence':
-            #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
-    new_read.sequence = new_read.sequence.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_comp':
-    new_read = new_read.reverse_complement()
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_no_comp':
-    new_read = new_read.reverse()
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'no_rev_comp':
-    new_read = new_read.complement()
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'trim':
-                #if $manipulate_block['manipulation_type']['manipulation']['offset_type']['base_offset_type'] == 'offsets_percent':
-    left_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
-    right_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
-                #else
-    left_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }
-    right_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }
-                #end if
-    if right_column_offset > 0:
-        right_column_offset = -right_column_offset
-    else:
-        right_column_offset = None
-    new_read = new_read.slice( left_column_offset, right_column_offset )
-    if not ( ${str( manipulate_block['manipulation_type']['manipulation']['keep_zero_length'] ) == 'keep_zero_length'} or len( new_read ) ):
-        return None
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'dna_to_rna':
-    new_read = new_read.sequence_as_DNA()
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rna_to_dna':
-    new_read = new_read.sequence_as_RNA()
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'change_adapter':
-    if new_read.sequence_space == 'color':
-        new_read = new_read.change_adapter( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['new_adapter'] ) ) }" ) )
-            #end if
-        #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'quality':
-            #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
-    new_read.quality = new_read.quality.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
-            #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'map_score':
-    def score_method( score ):
-        raise Exception, "Unimplemented" ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions
-        new_read.quality_map( score_method )
-            #end if
-        #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'miscellaneous':
-            #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'remove':
-    return None
-            #end if
-        #else:
-        #continue
-        #end if
-    #end for
-    if new_read.description != "+":
-        new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid
-    return new_read
-def match_and_manipulate_read( fastq_read ):
-    new_read = fastq_read
-    if match_read( fastq_read ):
-        new_read = manipulate_read( fastq_read )
-    return new_read
-</configfile>
-  </configfiles>
-  <outputs>
-    <data format="input" name="output_file" />
-  </outputs>
-  <tests>
-    <!-- match all and do nothing -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="identifier" />
-      <param name="manipulation_selector" value="translate" />
-      <param name="from" value="" />
-      <param name="to" value="" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match None and do nothing -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="STRINGDOESNOTEXIST" />
-      <param name="manipulation_type_selector" value="identifier" />
-      <param name="manipulation_selector" value="translate" />
-      <param name="from" value="" />
-      <param name="to" value="" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match all and remove -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="miscellaneous" />
-      <param name="manipulation_selector" value="remove" />
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-    <!-- match None and remove -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="STRINGDOESNOTEXIST" />
-      <param name="manipulation_type_selector" value="miscellaneous" />
-      <param name="manipulation_selector" value="remove" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match all and trim to 4 inner-most bases -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="trim" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="45"/>
-      <param name="right_column_offset" value="45"/>
-      <param name="keep_zero_length" value="true" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="trim" />
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="47.87"/>
-      <param name="right_column_offset" value="47.87"/>
-      <param name="keep_zero_length" value="true" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-    <!-- match all and rev comp -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" />
-    </test>
-    <!-- match all and rev comp, with ambiguous DNA -->
-    <test>
-      <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" />
-    </test>
-    <!-- match all and rev comp, with ambiguous RNA -->
-    <test>
-      <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" />
-    </test>
-    <!-- match first seq and rev comp -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="FAKE0001" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" />
-    </test>
-    <!-- match first seq and rev comp: i.e. undo above -->
-    <test>
-      <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="FAKE0001" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match all and DNA to RNA -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="dna_to_rna" />
-      <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" />
-    </test>
-    <!-- match all and RNA to DNA -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rna_to_dna" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-  </tests>
-<help>
-This tool allows you to build complex manipulations to be applied to each matching read in a FASTQ file. A read must match all matching directives in order for it to be manipulated; if a read does not match, it is output in a non-modified manner. All reads matching will have each of the specified manipulations performed upon them, in the order specified.
-
-Regular Expression Matches are made using re.search, see http://docs.python.org/library/re.html for more information.
-  All matching is performed on a single line string, regardless if e.g. the sequence or quality score spans multiple lines in the original file.
-
-String translations are performed using string.translate, see http://docs.python.org/library/string.html#string.translate and http://docs.python.org/library/string.html#string.maketrans for more information.
-
-.. class:: warningmark
-
-Only color space reads can have adapter bases substituted.
-
-
------
-
-**Example**
-
-Suppose you have a color space sanger formatted sequence (fastqcssanger) and you want to double-encode the color space into psuedo-nucleotide space (this is different from converting) to allow these reads to be used in tools which do not natively support it (using specially designed indexes). This tool can handle this manipulation, however, this is generally not recommended as results tend to be poorer than those produced from tools which are specially designed to handle color space data.
-
-Steps:
-
-1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads).
-2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field).
-3. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "String Translate" and set **From** to "0123." and **To** to "ACGTN".
-4. Click Execute. The new history item will contained double-encoded psuedo-nucleotide space reads.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-</help>
-</tool>
--- a/tools/fastq/fastq_masker_by_quality.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-#Dan Blankenberg
-import string
-from optparse import OptionParser
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-
-def get_score_comparer( operator ):
-    if operator == 'gt':
-        return compare_gt
-    elif operator == 'ge':
-        return compare_ge
-    elif operator == 'eq':
-        return compare_eq
-    elif operator == 'lt':
-        return compare_lt
-    elif operator == 'le':
-        return compare_le
-    elif operator == 'ne':
-        return compare_ne
-    raise 'Invalid operator provided: %s' % operator
-
-def compare_gt( quality_score, threshold_value ):
-    return quality_score > threshold_value
-
-def compare_ge( quality_score, threshold_value ):
-    return quality_score >= threshold_value
-
-def compare_eq( quality_score, threshold_value ):
-    return quality_score == threshold_value
-
-def compare_ne( quality_score, threshold_value ):
-    return quality_score != threshold_value
-
-def compare_lt( quality_score, threshold_value ):
-    return quality_score < threshold_value
-
-def compare_le( quality_score, threshold_value ):
-    return quality_score <= threshold_value
-
-class BaseReplacer( object ):
-    def __init__( self, replace_character ):
-        self.replace_character = replace_character
-    def __call__( self, base_character ):
-        return self.replace_character
-
-def main():
-    usage = "usage: %prog [options] input_file output_file"
-    parser = OptionParser( usage=usage )
-    parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'solexa', 'illumina' ), help='FASTQ variant type' )
-    parser.add_option( '-m', '--mask_character', dest='mask_character', default='N', help='Mask Character to use' )
-    parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='le', choices=('gt','ge','eq','lt', 'le', 'ne' ), help='Mask base when score is' )
-    parser.add_option( '-s', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' )
-    parser.add_option( "-l", "--lowercase", action="store_true", dest="lowercase", default=False, help="Use lowercase masking")
-    ( options, args ) = parser.parse_args()
-
-    if len ( args ) != 2:
-        parser.error( "Need to specify an input file and an output file" )
-
-    score_comparer = get_score_comparer( options.score_comparison )
-
-    if options.lowercase:
-        base_masker = string.lower
-    else:
-        base_masker = BaseReplacer( options.mask_character )
-
-    out = fastqWriter( open( args[1], 'wb' ), format = options.format )
-
-    num_reads = None
-    num_reads_excluded = 0
-    for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ):
-        sequence_list = list( fastq_read.sequence )
-        for i, quality_score in enumerate( fastq_read.get_decimal_quality_scores() ):
-            if score_comparer( quality_score, options.quality_score ):
-                sequence_list[ i ] = base_masker( sequence_list[ i ] )
-        fastq_read.sequence = "".join( sequence_list )
-        out.write( fastq_read )
-
-    if num_reads is not None:
-        print "Processed %i %s reads." % ( num_reads + 1, options.format )
-    else:
-        print "No valid FASTQ reads were provided."
-
-if __name__ == "__main__": main()
--- a/tools/fastq/fastq_masker_by_quality.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="fastq_masker_by_quality" name="FASTQ Masker" version="1.0.0">
-  <description>by quality score</description>
-  <command interpreter="python">fastq_masker_by_quality.py '$input_file' '$output_file' -f '${input_file.extension[len( 'fastq' ):]}' -s '${quality_score}' -c '${score_comparison}'
-      #if $mask_type.value == 'lowercase'
-      --lowercase
-      #else
-      -m '${mask_type}'
-      #end if
-  </command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger" label="File to mask" />
-    <param name="mask_type" type="select" label="Mask input with">
-      <option value="N">N's</option>
-      <option value="lowercase">Lowercase</option>
-    </param>
-    <param name="score_comparison" type="select" label="When score is">
-      <option value="le" selected="True">Less than or equal</option>
-      <option value="lt">Less than</option>
-      <option value="eq">Equal to</option>
-      <option value="ne">Not Equal to</option>
-      <option value="ge">Greater than</option>
-      <option value="gt">Greater than or equal</option>
-    </param>
-    <param name="quality_score" type="integer" value="0" label="Quality score"/>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="fastqsanger" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="mask_type" value="N" />
-      <param name="score_comparison" value="le" />
-      <param name="quality_score" value="20" />
-      <output name="output_file" file="sanger_full_range_masked_N.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="mask_type" value="lowercase" />
-      <param name="score_comparison" value="le" />
-      <param name="quality_score" value="20" />
-      <output name="output_file" file="sanger_full_range_masked_lowercase.fastqsanger" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool allows masking base characters in FASTQ format files dependent upon user specified quality score value and comparison method.
-
-This tool is not available for use on color space (csSanger) formats.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_paired_end_deinterlacer.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-#Florent Angly
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
-
-def main():
-    input_filename   = sys.argv[1]
-    input_type       = sys.argv[2] or 'sanger'
-    mate1_filename   = sys.argv[3]
-    mate2_filename   = sys.argv[4]
-    single1_filename = sys.argv[5]
-    single2_filename = sys.argv[6]
-
-    type        = input_type
-    input       = fastqNamedReader( open( input_filename, 'rb' ), format = type  )
-    mate1_out   = fastqWriter( open( mate1_filename, 'wb' ), format = type )
-    mate2_out   = fastqWriter( open( mate2_filename, 'wb' ), format = type )
-    single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type )
-    single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type )
-    joiner      = fastqJoiner( type )
-
-    i = None
-    skip_count = 0
-    found = {}
-    for i, mate1 in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ):
-
-        if mate1.identifier in found:
-            del found[mate1.identifier]
-            continue
-
-        mate2 = input.get( joiner.get_paired_identifier( mate1 ) )
-
-        if mate2:
-            # This is a mate pair
-            found[mate2.identifier] = None
-            if joiner.is_first_mate( mate1 ):
-                mate1_out.write( mate1 )
-                mate2_out.write( mate2 )
-            else:
-                mate1_out.write( mate2 )
-                mate2_out.write( mate1 )
-        else:
-            # This is a single
-            skip_count += 1
-            if joiner.is_first_mate( mate1 ):
-                single1_out.write( mate1 )
-            else:
-                single2_out.write( mate1 )
-
-    if i is None:
-        print "Your input file contained no valid FASTQ sequences."
-    else:
-        if skip_count:
-            print 'There were %i reads with no mate.' % skip_count
-        print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 )
-
-    input.close()
-    mate1_out.close()
-    mate2_out.close()
-    single1_out.close()
-    single2_out.close()
-
-
-if __name__ == "__main__":
-    main()
--- a/tools/fastq/fastq_paired_end_deinterlacer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-<tool id="fastq_paired_end_deinterlacer" name="FASTQ de-interlacer" version="1.1">
-  <description>on paired end reads</description>
-  <command interpreter="python">fastq_paired_end_deinterlacer.py '$input_file' '${input_file.extension[len( 'fastq' ):]}' '$output1_pairs_file' '$output2_pairs_file' '$output1_singles_file' '$output2_singles_file'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ reads" />
-  </inputs>
-  <outputs>
-    <data name="output1_pairs_file" format="input" label="FASTQ de-interlacer left mates from data ${input_file.hid}" />
-    <data name="output2_pairs_file" format="input" label="FASTQ de-interlacer right mates from data ${input_file.hid}"/>
-    <data name="output1_singles_file" format="input" label="FASTQ de-interlacer left singles from data ${input_file.hid}"/>
-    <data name="output2_singles_file" format="input" label="FASTQ de-interlacer right singles from data ${input_file.hid}"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="paired_end_merged.fastqsanger" ftype="fastqsanger" />
-      <output name="output1_pairs_file" file="paired_end_1.fastqsanger" />
-      <output name="output2_pairs_file" file="paired_end_2.fastqsanger" />
-      <output name="output1_singles_file" file="paired_end_1_singles.fastqsanger" />
-      <output name="output2_singles_file" file="paired_end_2_singles.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="paired_end_merged_errors.fastqsanger" ftype="fastqsanger" />
-      <output name="output1_pairs_file" file="paired_end_1_cleaned.fastqsanger" />
-      <output name="output2_pairs_file" file="paired_end_2_cleaned.fastqsanger" />
-      <output name="output1_singles_file" file="paired_end_1_cleaned_singles.fastqsanger" />
-      <output name="output2_singles_file" file="paired_end_2_cleaned_singles.fastqsanger" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-De-interlaces a single fastq dataset representing paired-end run into two fastq datasets containing only the first or second mate read. Reads without mate are saved in separate output files.
-
-Sequence identifiers for paired-end reads must follow the /1 and /2 convention.
-
------
-
-**Input**
-
-A multiple-fastq file containing paired-end reads, for example::
-
-    @1539:931/1
-    ACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG
-    +1539:931/1
-    BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-    @1539:931/2
-    CGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT
-    +1539:931/2
-    WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-
------
-
-**Output**
-
-Multi-fastq file with left-hand mate only::
-
-    @1539:931/1
-    ACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG
-    +1539:931/1
-    BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-
-Multi-fastq file with right-hand mate only::
-
-    @1539:931/2
-    CGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT
-    +1539:931/2
-    WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_paired_end_interlacer.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-#Florent Angly
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
-
-def main():
-    mate1_filename   = sys.argv[1]
-    mate1_type       = sys.argv[2] or 'sanger'
-    mate2_filename   = sys.argv[3]
-    mate2_type       = sys.argv[4] or 'sanger'
-    outfile_pairs    = sys.argv[5]
-    outfile_singles = sys.argv[6]
-
-    if mate1_type != mate2_type:
-        print "WARNING: You are trying to interlace files of two different types: %s and %s." % ( mate1_type, mate2_type )
-        return
-
-    type = mate1_type
-    joiner = fastqJoiner( type )
-    out_pairs = fastqWriter( open( outfile_pairs, 'wb' ), format = type )
-    out_singles = fastqWriter( open( outfile_singles, 'wb' ), format = type )
-
-    # Pairs + singles present in mate1
-    nof_singles = 0
-    nof_pairs   = 0
-    mate2_input = fastqNamedReader( open( mate2_filename, 'rb' ), format = type )
-    i = None
-    for i, mate1 in enumerate( fastqReader( open( mate1_filename, 'rb' ), format = type ) ):
-        mate2 = mate2_input.get( joiner.get_paired_identifier( mate1 ) )
-        if mate2:
-            out_pairs.write( mate1 )
-            out_pairs.write( mate2 )
-            nof_pairs += 1
-        else:
-            out_singles.write( mate1 )
-            nof_singles += 1
-
-    # Singles present in mate2
-    mate1_input = fastqNamedReader( open( mate1_filename, 'rb' ), format = type )
-    j = None
-    for j, mate2 in enumerate( fastqReader( open( mate2_filename, 'rb' ), format = type ) ):
-        mate1 = mate1_input.get( joiner.get_paired_identifier( mate2 ) )
-        if not mate1:
-            out_singles.write( mate2 )
-            nof_singles += 1
-
-    if (i is None) and (j is None):
-        print "Your input files contained no valid FASTQ sequences."
-    else:
-        print 'There were %s single reads.' % ( nof_singles )
-        print 'Interlaced %s pairs of sequences.' % ( nof_pairs )
-
-    mate1_input.close()
-    mate2_input.close()
-    out_pairs.close()
-    out_singles.close()
-
-
-if __name__ == "__main__":
-    main()
--- a/tools/fastq/fastq_paired_end_interlacer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="fastq_paired_end_interlacer" name="FASTQ interlacer" version="1.1">
-  <description>on paired end reads</description>
-  <command interpreter="python">fastq_paired_end_interlacer.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$input2_file' '${input2_file.extension[len( 'fastq' ):]}' '$outfile_pairs' '$outfile_singles'</command>
-  <inputs>
-    <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="Left-hand mates" />
-    <param name="input2_file" type="data" format="fastqsanger,fastqcssanger" label="Right-hand mates" />
-  </inputs>
-  <outputs>
-    <!-- $input1_file.name = filename  , e.g. paired_end_2_errors.fastqsanger -->
-    <!-- $input1_file.id   = ID        , e.g. 10 -->
-    <!-- $input1_file.hid  = history ID, e.g. 5  -->
-    <data name="outfile_pairs"   format="input" label="FASTQ interlacer pairs from data ${input1_file.hid} and data ${input2_file.hid}"/>
-    <data name="outfile_singles" format="input" label="FASTQ interlacer singles from data ${input1_file.hid} and data ${input2_file.hid}"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1_file" value="paired_end_1.fastqsanger" ftype="fastqsanger" />
-      <param name="input2_file" value="paired_end_2.fastqsanger" ftype="fastqsanger" />
-      <output name="outfile_pairs" file="paired_end_merged.fastqsanger" />
-      <output name="outfile_singles" file="paired_end_merged_singles.fastqsanger" />
-    </test>
-    <test>
-      <param name="input1_file" value="paired_end_1_errors.fastqsanger" ftype="fastqsanger" />
-      <param name="input2_file" value="paired_end_2_errors.fastqsanger" ftype="fastqsanger" />
-      <output name="outfile_pairs" file="paired_end_merged_cleaned.fastqsanger" />
-      <output name="outfile_singles" file="paired_end_merged_cleaned_singles.fastqsanger" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool joins paired end FASTQ reads from two separate files, one with the left mates and one with the right mates, into a single files where left mates alternate with their right mates. The join is performed using sequence identifiers, allowing the two files to contain differing ordering. If a sequence identifier does not appear in both files, it is included in a separate file.
-
-Sequence identifiers with /1 and /2 appended override the left-hand and right-hand designation; i.e. if the reads end with /1 and /2, the read containing /1 will be used as the left-hand read and the read containing /2 will be used as the right-hand read. Sequences without this designation will follow the left-hand and right-hand settings set by the user.
-
------
-
-**Input**
-
-Left-hand mates, for example::
-
-    @1539:931/1
-    ACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG
-    +1539:931/1
-    BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-
-Right-hand mates, for example::
-
-    @1539:931/2
-    CGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT
-    +1539:931/2
-    WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-
------
-
-**Output**
-
-A multiple-fastq file containing interlaced left and right paired reads::
-
-    @1539:931/1
-    ACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG
-    +1539:931/1
-    BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-    @1539:931/2
-    CGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT
-    +1539:931/2
-    WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-
-A multiple-fastq file containing reads that have no mate is also produced.
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_paired_end_joiner.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#Dan Blankenberg
-import sys, os, shutil
-from galaxy_utils.sequence.fastq import fastqReader, fastqNamedReader, fastqWriter, fastqJoiner
-
-def main():
-    #Read command line arguments
-    input1_filename = sys.argv[1]
-    input1_type = sys.argv[2] or 'sanger'
-    input2_filename = sys.argv[3]
-    input2_type = sys.argv[4] or 'sanger'
-    output_filename = sys.argv[5]
-
-    if input1_type != input2_type:
-        print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type )
-
-    input2 = fastqNamedReader( open( input2_filename, 'rb' ), input2_type )
-    joiner = fastqJoiner( input1_type )
-    out = fastqWriter( open( output_filename, 'wb' ), format = input1_type )
-
-    i = None
-    skip_count = 0
-    for i, fastq_read in enumerate( fastqReader( open( input1_filename, 'rb' ), format = input1_type ) ):
-        identifier = joiner.get_paired_identifier( fastq_read )
-        fastq_paired = input2.get( identifier )
-        if fastq_paired is None:
-            skip_count += 1
-        else:
-            out.write( joiner.join( fastq_read, fastq_paired ) )
-    out.close()
-
-    if i is None:
-        print "Your file contains no valid FASTQ reads."
-    else:
-        print input2.has_data()
-        print 'Joined %s of %s read pairs (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
-    main()
--- a/tools/fastq/fastq_paired_end_joiner.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-<tool id="fastq_paired_end_joiner" name="FASTQ joiner" version="1.0.0">
-  <description>on paired end reads</description>
-  <command interpreter="python">fastq_paired_end_joiner.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$input2_file' '${input2_file.extension[len( 'fastq' ):]}' '$output_file'</command>
-  <inputs>
-    <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="Left-hand Reads" />
-    <param name="input2_file" type="data" format="fastqsanger,fastqcssanger" label="Right-hand Reads" />
-  </inputs>
-  <outputs>
-    <data name="output_file" format="input" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1_file" value="split_pair_reads_1.fastqsanger" ftype="fastqsanger" />
-      <param name="input2_file" value="split_pair_reads_2.fastqsanger" ftype="fastqsanger" />
-      <output name="output_file" file="3.fastqsanger" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool joins paired end FASTQ reads from two separate files into a single read in one file. The join is performed using sequence identifiers, allowing the two files to contain differing ordering. If a sequence identifier does not appear in both files, it is excluded from the output.
-
-Sequence identifiers with /1 and /2 appended override the left-hand and right-hand designation; i.e. if the reads end with /1 and /2, the read containing /1 will be used as the left-hand read and the read containing /2 will be used as the right-hand read. Sequences without this designation will follow the left-hand and right-hand settings set by the user.
-
------
-
-**Input formats**
-
-Left-hand Read::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
-    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
-    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-
-Right-hand Read::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
-    GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
-    hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
-
------
-
-**Output**
-
-A multiple-fastq file, for example::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758
-    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758
-    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_paired_end_splitter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#Dan Blankenberg
-import sys, os, shutil
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqSplitter
-
-def main():
-    #Read command line arguments
-    input_filename = sys.argv[1]
-    input_type = sys.argv[2] or 'sanger'
-    output1_filename = sys.argv[3]
-    output2_filename = sys.argv[4]
-
-    splitter = fastqSplitter()
-    out1 = fastqWriter( open( output1_filename, 'wb' ), format = input_type )
-    out2 = fastqWriter( open( output2_filename, 'wb' ), format = input_type )
-
-    i = None
-    skip_count = 0
-    for i, fastq_read in enumerate( fastqReader( open( input_filename, 'rb' ), format = input_type ) ):
-        read1, read2 = splitter.split( fastq_read )
-        if read1 and read2:
-            out1.write( read1 )
-            out2.write( read2 )
-        else:
-            skip_count += 1
-    out1.close()
-    out2.close()
-    if i is None:
-        print "Your file contains no valid FASTQ reads."
-    else:
-        print 'Split %s of %s reads (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
-    main()
--- a/tools/fastq/fastq_paired_end_splitter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<tool id="fastq_paired_end_splitter" name="FASTQ splitter" version="1.0.0">
-  <description>on joined paired end reads</description>
-  <command interpreter="python">fastq_paired_end_splitter.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$output1_file' '$output2_file'</command>
-  <inputs>
-    <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ reads" />
-  </inputs>
-  <outputs>
-    <data name="output1_file" format="input" />
-    <data name="output2_file" format="input" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1_file" value="3.fastqsanger" ftype="fastqsanger" />
-      <output name="output1_file" file="split_pair_reads_1.fastqsanger" />
-      <output name="output2_file" file="split_pair_reads_2.fastqsanger" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length.
-
-Sequence identifiers will have /1 or /2 appended for the split left-hand and right-hand reads, respectively.
-
------
-
-**Input format**
-
-A multiple-fastq file, for example::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758
-    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758
-    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
-
-
------
-
-**Outputs**
-
-Left-hand Read::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
-    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
-    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-
-Right-hand Read::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
-    GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
-    hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_stats.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqAggregator
-
-VALID_NUCLEOTIDES = [ 'A', 'C', 'G', 'T', 'N' ]
-VALID_COLOR_SPACE = map( str, range( 7 ) ) + [ '.' ]
-SUMMARY_STAT_ORDER = ['read_count', 'min_score', 'max_score', 'sum_score', 'mean_score', 'q1', 'med_score', 'q3', 'iqr', 'left_whisker', 'right_whisker' ]
-
-def main():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    input_type = sys.argv[3] or 'sanger'
-
-    aggregator = fastqAggregator()
-    num_reads = None
-    fastq_read = None
-    for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        aggregator.consume_read( fastq_read )
-    out = open( output_filename, 'wb' )
-    valid_nucleotides = VALID_NUCLEOTIDES
-    if fastq_read:
-        if fastq_read.sequence_space == 'base':
-            out.write( '#column\tcount\tmin\tmax\tsum\tmean\tQ1\tmed\tQ3\tIQR\tlW\trW\toutliers\tA_Count\tC_Count\tG_Count\tT_Count\tN_Count\tother_bases\tother_base_count\n' )
-        else:
-            out.write( '#column\tcount\tmin\tmax\tsum\tmean\tQ1\tmed\tQ3\tIQR\tlW\trW\toutliers\t0_Count\t1_Count\t2_Count\t3_Count\t4_Count\t5_Count\t6_Count\t._Count\tother_bases\tother_base_count\n' )
-            valid_nucleotides = VALID_COLOR_SPACE
-    for i in range( aggregator.get_max_read_length() ):
-        column_stats = aggregator.get_summary_statistics_for_column( i )
-        out.write( '%i\t' % ( i + 1 ) )
-        out.write( '%s\t' * len( SUMMARY_STAT_ORDER ) % tuple( [ column_stats[ key ] for key in SUMMARY_STAT_ORDER ] ) )
-        out.write( '%s\t' % ','.join( map( str, column_stats['outliers'] ) ) )
-        base_counts = aggregator.get_base_counts_for_column( i )
-        for nuc in valid_nucleotides:
-            out.write( "%s\t" % base_counts.get( nuc, 0 ) )
-        extra_nucs = sorted( [ nuc for nuc in base_counts.keys() if nuc not in valid_nucleotides ] )
-        out.write( "%s\t%s\n" % ( ','.join( extra_nucs ), ','.join( str( base_counts[nuc] ) for nuc in extra_nucs ) ) )
-    out.close()
-    if num_reads is None:
-        print "No valid fastq reads could be processed."
-    else:
-        print "%i fastq reads were processed." % ( num_reads + 1 )
-        print "Based upon quality values and sequence characters, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() ) or "None" )
-        ascii_range = aggregator.get_ascii_range()
-        decimal_range =  aggregator.get_decimal_range()
-        print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed
-        print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] )
-
-if __name__ == "__main__": main()
--- a/tools/fastq/fastq_stats.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-<tool id="fastq_stats" name="FASTQ Summary Statistics" version="1.0.0">
-  <description>by column</description>
-  <command interpreter="python">fastq_stats.py '$input_file' '$output_file' '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" label="FASTQ File"/>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="tabular" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="fastq_stats1.fastq" ftype="fastqsanger" />
-      <output name="output_file" file="fastq_stats_1_out.tabular" />
-    </test>
-  </tests>
-  <help>
-This tool creates summary statistics on a FASTQ file.
-
-.. class:: infomark
-
-**TIP:** This statistics report can be used as input for the **Boxplot** and **Nucleotides Distribution** tools.
-
------
-
-**The output file will contain the following fields:**
-
-* column      = column number (1 to 36 for a 36-cycles read Solexa file)
-* count       = number of bases found in this column.
-* min         = Lowest quality score value found in this column.
-* max         = Highest quality score value found in this column.
-* sum         = Sum of quality score values for this column.
-* mean        = Mean quality score value for this column.
-* Q1          = 1st quartile quality score.
-* med         = Median quality score.
-* Q3          = 3rd quartile quality score.
-* IQR         = Inter-Quartile range (Q3-Q1).
-* lW          = 'Left-Whisker' value (for boxplotting).
-* rW          = 'Right-Whisker' value (for boxplotting).
-* outliers    = Scores falling beyond the left and right whiskers (comma separated list).
-* A_Count     = Count of 'A' nucleotides found in this column.
-* C_Count     = Count of 'C' nucleotides found in this column.
-* G_Count     = Count of 'G' nucleotides found in this column.
-* T_Count     = Count of 'T' nucleotides found in this column.
-* N_Count     = Count of 'N' nucleotides found in this column.
-* Other_Nucs  = Comma separated list of other nucleotides found in this column.
-* Other_Count = Comma separated count of other nucleotides found in this column.
-
-For example::
-
-  #column   count   min max sum mean    Q1  med Q3  IQR lW  rW  outliers    A_Count C_Count G_Count T_Count N_Count other_bases other_base_count
-  1   14336356    2   33  450600675   31.4306281875   32.0    33.0    33.0    1.0 31  33  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30    4482314 2199633 4425957 3208745 19707
-  2   14336356    2   34  441135033   30.7703737965   30.0    33.0    33.0    3.0 26  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25   4419184 2170537 4627987 3118567 81
-  3   14336356    2   34  433659182   30.2489127642   29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4310988 2941988 3437467 3645784 129
-  4   14336356    2   34  433635331   30.2472490917   29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4110637 3007028 3671749 3546839 103
-  5   14336356    2   34  432498583   30.167957813    29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4348275 2935903 3293025 3759029 124
-
------
-
-.. class:: warningmark
-
-Adapter bases in color space reads are excluded from statistics.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_to_fasta.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader
-from galaxy_utils.sequence.fasta import fastaWriter
-
-def main():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    input_type = sys.argv[3] or 'sanger' #input type should ordinarily be unnecessary
-
-    num_reads = None
-    fastq_read = None
-    out = fastaWriter( open( output_filename, 'wb' ) )
-    for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        out.write( fastq_read )
-    out.close()
-    if num_reads is None:
-        print "No valid FASTQ reads could be processed."
-    else:
-        print "%i FASTQ reads were converted to FASTA." % ( num_reads + 1 )
-
-if __name__ == "__main__": main()
--- a/tools/fastq/fastq_to_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<tool id="fastq_to_fasta_python" name="FASTQ to FASTA" version="1.0.0">
-  <description>converter</description>
-  <command interpreter="python">fastq_to_fasta.py '$input_file' '$output_file' '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastq" label="FASTQ file to convert" />
-  </inputs>
-  <outputs>
-    <data name="output_file" format="fasta" />
-  </outputs>
-  <tests>
-    <!-- basic test -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <output name="output_file" file="fastq_to_fasta_python_1.out" />
-    </test>
-    <!-- color space test -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" />
-      <output name="output_file" file="fastq_to_fasta_python_2.out" />
-    </test>
-    <!-- test of ignoring invalid score values: this input has ascii characters falling outside of illumina range, but they should not matter -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqillumina" />
-      <output name="output_file" file="fastq_to_fasta_python_1.out" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool converts FASTQ sequencing reads to FASTA sequences.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_to_tabular.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    if len(sys.argv) != 5:
-        stop_err("Wrong number of arguments. Expect: fasta tabular desrc_split [type]")
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    descr_split = int( sys.argv[3] ) - 1
-    if descr_split < 0:
-        stop_err("Bad description split value (should be 1 or more)")
-    input_type = sys.argv[4] or 'sanger' #input type should ordinarily be unnecessary
-
-    num_reads = None
-    fastq_read = None
-    out = open( output_filename, 'wb' )
-    if descr_split == 0:
-        #Don't divide the description into multiple columns
-        for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-            out.write( "%s\t%s\t%s\n" % ( fastq_read.identifier[1:].replace( '\t', ' ' ), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) )
-    else:
-        for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-            words = fastq_read.identifier[1:].replace( '\t', ' ' ).split(None, descr_split)
-            #pad with empty columns if required
-            words += [""]*(descr_split-len(words))
-            out.write( "%s\t%s\t%s\n" % ("\t".join(words), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) )
-    out.close()
-    if num_reads is None:
-        print "No valid FASTQ reads could be processed."
-    else:
-        print "%i FASTQ reads were converted to Tabular." % ( num_reads + 1 )
-
-if __name__ == "__main__": main()
--- a/tools/fastq/fastq_to_tabular.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-<tool id="fastq_to_tabular" name="FASTQ to Tabular" version="1.1.0">
-  <description>converter</description>
-  <command interpreter="python">fastq_to_tabular.py '$input_file' '$output_file' $descr_columns '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqillumina,fastqsolexa" label="FASTQ file to convert" />
-    <param name="descr_columns" type="integer" size="2" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
-      <validator type="in_range" min="1" />
-    </param>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="tabular" />
-  </outputs>
-  <tests>
-    <!-- basic test -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="descr_columns" value="1"/>
-      <output name="output_file" file="fastq_to_tabular_out_1.tabular" />
-    </test>
-    <!-- color space test -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" />
-      <param name="descr_columns" value="1"/>
-      <output name="output_file" file="fastq_to_tabular_out_2.tabular" />
-    </test>
-    <!-- split title into columns -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="descr_columns" value="2"/>
-      <output name="output_file" file="fastq_to_tabular_out_3.tabular" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool converts FASTQ sequencing reads to a Tabular file.
-
-It is conventional to take the first word of the FASTQ "@" title line as the identifier, and any remaining text to be a free form description.
-It is therefore often useful to split this text into two columns in Galaxy (identifier and any description) by setting **How many columns to divide title string into?** to **2**.
-In some cases the description can be usefully broken up into more columns -- see the examples .
-
-Tab characters, if present in the source FASTQ title, will be converted to spaces.
-
------
-
-**Example**
-
-Consider the following two 454 reads in Sanger FASTQ format (using line wrapping for display, but do note not all tools will accept line wrapped FASTQ files)::
-
- @FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95]
- tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTAT
- GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaact
- ttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtca
- tttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaatta
- aaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn
- +FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95]
- FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF
- D???:3104/76=:5...4.3,,,366////4&lt;ABBAAA=CCFDDDDDDDD:666CDFFFF=&lt;ABA=;:333111&lt;===9
- 9;B889FFFFFFDDBDBDDD=8844231..,,,-,,,,,,,,1133..---17111,,,,,22555131121.--.,333
- 11,.,,3--,,.,,--,3511123..--!,,,,--,----9,,,,8=,,-,,,-,,,,---26:9:5-..1,,,,11//,
- ,,,!,,1917--,,,,-3.,--,,17,,,,---+11113.030000,,,044400036;96662.//;7&gt;&lt;;!!!
- @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74]
- tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtt
- taatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn
- +FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74]
- FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=&lt;4444@@B=555:BBBBB@@?8:8&lt;?&lt;89898&lt;84442;==3,,,514,,
- ,11,,,.,,21777555513,..--1115758.//34488&gt;&lt;&lt;;;;;9944/!/4,,,57855!!
-
-By default this is converted into a 3 column tabular file, with the full FASTQ title used as column 1:
-
-=================================================================================================== ============== ==============
-FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAA...nnn FFFDDDDD...!!!
-FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAG...gnn FFFFFFFF...5!!
-=================================================================================================== ============== ==============
-
-If you specified the title should be turned into 2 columns, you'd get 4 columns in total:
-
-============== ==================================================================================== ============== ==============
-FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAA...nnn FFFDDDDD...!!!
-FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAG...gnn FFFFFFFF...5!!
-============== ==================================================================================== ============== ==============
-
-Similarly, for this example treating the title string as 7 columns makes sense:
-
-============== ============ ========== =========== ============= ============== =================== ============== ==============
-FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAA...nnn FFFDDDDD...!!!
-FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAG...gnn FFFFFFFF...5!!
-============== ============ ========== =========== ============= ============== =================== ============== ==============
-
-Note the sequences and quality strings have been truncated for display purposes in the above tables.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_trimmer.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def main():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    left_offset = sys.argv[3]
-    right_offset = sys.argv[4]
-    percent_offsets = sys.argv[5] == 'offsets_percent'
-    input_type = sys.argv[6] or 'sanger'
-    keep_zero_length = sys.argv[7] == 'keep_zero_length'
-
-    out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
-    num_reads_excluded = 0
-    num_reads = None
-    for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        if percent_offsets:
-            left_column_offset = int( round( float( left_offset ) / 100.0 * float( len( fastq_read ) ) ) )
-            right_column_offset = int( round( float( right_offset ) / 100.0 * float( len( fastq_read ) ) ) )
-        else:
-            left_column_offset = int( left_offset )
-            right_column_offset = int( right_offset )
-        if right_column_offset > 0:
-            right_column_offset = -right_column_offset
-        else:
-            right_column_offset = None
-        fastq_read = fastq_read.slice( left_column_offset, right_column_offset )
-        if keep_zero_length or len( fastq_read ):
-            out.write( fastq_read )
-        else:
-            num_reads_excluded += 1
-    out.close()
-    if num_reads is None:
-        print "No valid fastq reads could be processed."
-    else:
-        print "%i fastq reads were processed." % ( num_reads + 1 )
-    if num_reads_excluded:
-        print "%i reads of zero length were excluded from the output." % num_reads_excluded
-
-if __name__ == "__main__": main()
--- a/tools/fastq/fastq_trimmer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0">
-  <description>by column</description>
-  <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
-    <conditional name="offset_type">
-      <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
-        <option value="offsets_absolute" selected="true">Absolute Values</option>
-        <option value="offsets_percent">Percentage of Read Length</option>
-      </param>
-      <when value="offsets_absolute">
-        <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
-          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-        </param>
-        <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
-          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-        </param>
-      </when>
-      <when value="offsets_percent">
-        <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
-          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-        </param>
-        <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
-          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-        </param>
-      </when>
-    </conditional>
-  <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="input" />
-  </outputs>
-  <tests>
-    <test>
-      <!-- Do nothing trim -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="keep_zero_length" value="keep_zero_length" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- Trim to empty File -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="30"/>
-      <param name="right_column_offset" value="64"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="50"/>
-      <param name="right_column_offset" value="50"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-    <!-- Trim to 4 inner-most bases -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="45"/>
-      <param name="right_column_offset" value="45"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="47.87"/>
-      <param name="right_column_offset" value="47.87"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-  </tests>
-  <help>
-This tool allows you to trim the ends of reads.
-
-You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer.
-
-For example, if you have a read of length 36::
-
-  @Some FASTQ Sanger Read
-  CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
-  +
-  =@@.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%&lt;;;%&lt;B@
-
-And you set absolute offsets of 2 and 9::
-
-  @Some FASTQ Sanger Read
-  ATATGTNCTCACTGATAAGTGGATA
-  +
-  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-4
-
-Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
-
-  @Some FASTQ Sanger Read
-  ATATGTNCTCACTGATAAGTGGATATN
-  +
-  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%
-
------
-
-.. class:: warningmark
-
-Trimming a color space read will cause any adapter base to be lost.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/fastq_trimmer_by_quality.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#Dan Blankenberg
-from optparse import OptionParser
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def mean( score_list ):
-    return float( sum( score_list ) ) / float( len( score_list ) )
-
-ACTION_METHODS = { 'min':min, 'max':max, 'sum':sum, 'mean':mean }
-
-def compare( aggregated_value, operator, threshold_value ):
-    if operator == '>':
-        return aggregated_value > threshold_value
-    elif operator == '>=':
-        return aggregated_value >= threshold_value
-    elif operator == '==':
-        return aggregated_value == threshold_value
-    elif operator == '<':
-        return aggregated_value < threshold_value
-    elif operator == '<=':
-        return aggregated_value <= threshold_value
-    elif operator == '!=':
-        return aggregated_value != threshold_value
-
-def exclude( value_list, exclude_indexes ):
-    rval = []
-    for i, val in enumerate( value_list ):
-        if i not in exclude_indexes:
-            rval.append( val )
-    return rval
-
-def exclude_and_compare( aggregate_action, aggregate_list, operator, threshold_value, exclude_indexes = None ):
-    if not aggregate_list or compare( aggregate_action( aggregate_list ), operator, threshold_value ):
-        return True
-    if exclude_indexes:
-        for exclude_index in exclude_indexes:
-            excluded_list = exclude( aggregate_list, exclude_index )
-            if not excluded_list or compare( aggregate_action( excluded_list ), operator, threshold_value ):
-                return True
-    return False
-
-def main():
-    usage = "usage: %prog [options] input_file output_file"
-    parser = OptionParser( usage=usage )
-    parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'cssanger', 'solexa', 'illumina' ), help='FASTQ variant type' )
-    parser.add_option( '-s', '--window_size', type="int", dest='window_size', default='1', help='Window size' )
-    parser.add_option( '-t', '--window_step', type="int", dest='window_step', default='1', help='Window step' )
-    parser.add_option( '-e', '--trim_ends', type="choice", dest='trim_ends', default='53', choices=('5','3','53','35' ), help='Ends to Trim' )
-    parser.add_option( '-a', '--aggregation_action', type="choice", dest='aggregation_action', default='min', choices=('min','max','sum','mean' ), help='Aggregate action for window' )
-    parser.add_option( '-x', '--exclude_count', type="int", dest='exclude_count', default='0', help='Maximum number of bases to exclude from the window during aggregation' )
-    parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='>=', choices=('>','>=','==','<', '<=', '!=' ), help='Keep read when aggregate score is' )
-    parser.add_option( '-q', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' )
-    parser.add_option( "-k", "--keep_zero_length", action="store_true", dest="keep_zero_length", default=False, help="Keep reads with zero length")
-    ( options, args ) = parser.parse_args()
-
-    if len ( args ) != 2:
-        parser.error( "Need to specify an input file and an output file" )
-
-    if options.window_size < 1:
-        parser.error( 'You must specify a strictly positive window size' )
-
-    if options.window_step < 1:
-        parser.error( 'You must specify a strictly positive step size' )
-
-    #determine an exhaustive list of window indexes that can be excluded from aggregation
-    exclude_window_indexes = []
-    last_exclude_indexes = []
-    for exclude_count in range( min( options.exclude_count, options.window_size ) ):
-        if last_exclude_indexes:
-            new_exclude_indexes = []
-            for exclude_list in last_exclude_indexes:
-                for window_index in range( options.window_size ):
-                    if window_index not in exclude_list:
-                        new_exclude = sorted( exclude_list + [ window_index ] )
-                        if new_exclude not in exclude_window_indexes + new_exclude_indexes:
-                            new_exclude_indexes.append( new_exclude )
-            exclude_window_indexes += new_exclude_indexes
-            last_exclude_indexes = new_exclude_indexes
-        else:
-            for window_index in range( options.window_size ):
-                last_exclude_indexes.append( [ window_index ] )
-            exclude_window_indexes = list( last_exclude_indexes )
-
-    out = fastqWriter( open( args[1], 'wb' ), format = options.format )
-    action = ACTION_METHODS[ options.aggregation_action ]
-
-    num_reads = None
-    num_reads_excluded = 0
-    for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ):
-        for trim_end in options.trim_ends:
-            quality_list = fastq_read.get_decimal_quality_scores()
-            if trim_end == '5':
-                lwindow_position = 0 #left position of window
-                while True:
-                    if lwindow_position >= len( quality_list ):
-                        fastq_read.sequence = ''
-                        fastq_read.quality = ''
-                        break
-                    if exclude_and_compare( action, quality_list[ lwindow_position:lwindow_position + options.window_size ], options.score_comparison, options.quality_score, exclude_window_indexes ):
-                        fastq_read = fastq_read.slice( lwindow_position, None )
-                        break
-                    lwindow_position += options.window_step
-            else:
-                rwindow_position = len( quality_list ) #right position of window
-                while True:
-                    lwindow_position = rwindow_position - options.window_size #left position of window
-                    if rwindow_position <= 0 or lwindow_position < 0:
-                        fastq_read.sequence = ''
-                        fastq_read.quality = ''
-                        break
-                    if exclude_and_compare( action, quality_list[ lwindow_position:rwindow_position ], options.score_comparison, options.quality_score, exclude_window_indexes ):
-                        fastq_read = fastq_read.slice( None, rwindow_position )
-                        break
-                    rwindow_position -= options.window_step
-        if options.keep_zero_length or len( fastq_read ):
-            out.write( fastq_read )
-        else:
-            num_reads_excluded += 1
-    out.close()
-    if num_reads is None:
-        print "No valid FASTQ reads could be processed."
-    else:
-        print "%i FASTQ reads were processed." % ( num_reads + 1 )
-    if num_reads_excluded:
-        print "%i reads of zero length were excluded from the output." % num_reads_excluded
-
-if __name__ == "__main__": main()
--- a/tools/fastq/fastq_trimmer_by_quality.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,145 +0,0 @@
-<tool id="fastq_quality_trimmer" name="FASTQ Quality Trimmer" version="1.0.0">
-  <description>by sliding window</description>
-  <command interpreter="python">fastq_trimmer_by_quality.py '$input_file' '$output_file' -f '${input_file.extension[len( 'fastq' ):]}' -s '$window_size'
-    -t '$step_size' -e '$trim_ends' -a '$aggregation_action' -x '$exclude_count' -c '$score_comparison' -q '$quality_score'
-    #if $keep_zero_length.value:
-        -k
-    #end if
-  </command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
-    <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
-    <param name="trim_ends" type="select" label="Trim ends">
-      <option value="53" selected="True">5' and 3'</option>
-      <option value="5">5' only</option>
-      <option value="3">3' only</option>
-    </param>
-    <param name="window_size" type="integer" value="1" label="Window size"/>
-    <param name="step_size" type="integer" value="1" label="Step Size" />
-    <param name="exclude_count" label="Maximum number of bases to exclude from the window during aggregation" value="0" type="integer" />
-    <param name="aggregation_action" type="select" label="Aggregate action for window">
-      <option value="min" selected="True">min score</option>
-      <option value="max">max score</option>
-      <option value="sum">sum of scores</option>
-      <option value="mean">mean of scores</option>
-    </param>
-    <param name="score_comparison" type="select" label="Trim until aggregate score is">
-      <sanitizer>
-        <valid initial="none">
-            <add value="&lt;&gt;=!"/> <!-- only allow lt, gt, e, le, ge, ne for this parameter; will be single-quote escaped on commandline -->
-        </valid>
-      </sanitizer>
-      <option value="&gt;">&gt;</option>
-      <option value="&gt;=" selected="true">&gt;=</option>
-      <option value="==">==</option>
-      <option value="!=">!=</option>
-      <option value="&lt;">&lt;</option>
-      <option value="&lt;=">&lt;=</option>
-    </param>
-    <param name="quality_score" label="Quality Score" value="0" type="float" />
-  </inputs>
-  <outputs>
-    <data name="output_file" format="input" />
-  </outputs>
-  <tests>
-    <test>
-      <!-- Trim until window size 1 >= 20;both ends -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="20"/>
-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_1.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim until window size 1 >= 20; 5' end only -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <param name="trim_ends" value="5"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="20"/>
-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim until window size 1 >= 20; 3' end only -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <param name="trim_ends" value="3"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="20"/>
-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_3.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim until window size 2 >= 1;both ends, 1 deviant score -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="2"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="1"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="1"/>
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim entire sequences; keep empty reads -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="true" />
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="999"/>
-      <output name="output_file" file="sanger_full_range_empty_reads.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim entire sequences; discard empty reads -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length"/>
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="999"/>
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-  </tests>
-  <help>
-This tool allows you to trim the ends of reads based upon the aggregate value of quality scores found within a sliding window; a sliding window of size 1 is equivalent to 'simple' trimming of the ends.
-
-The user specifies the aggregating action (min, max, sum, mean) to perform on the quality score values found within the sliding window to be used with the user defined comparison operation and comparison value.
-
-The user can provide a maximum count of bases that can be excluded from the aggregation within the window. When set, this tool will first check the aggregation of the entire window, then after removing 1 value, then after removing 2 values, up to the number declared. Setting this value to be equal to or greater than the window size will cause no trimming to occur.
-
------
-
-.. class:: warningmark
-
-Trimming a color space read will cause any adapter base to be lost.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastq/tabular_to_fastq.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#Dan Blankenberg
-import sys
-
-def main():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    identifier_col = int( sys.argv[3] ) - 1
-    sequence_col = int( sys.argv[4] ) - 1
-    quality_col = int( sys.argv[5] ) - 1
-
-    max_col = max( identifier_col, sequence_col, quality_col )
-    num_reads = None
-    fastq_read = None
-    skipped_lines = 0
-    out = open( output_filename, 'wb' )
-    for num_reads, line in enumerate( open( input_filename ) ):
-        fields = line.rstrip( '\n\r' ).split( '\t' )
-        if len( fields ) > max_col:
-            out.write( "@%s\n%s\n+\n%s\n" % ( fields[identifier_col], fields[sequence_col], fields[quality_col] ) )
-        else:
-            skipped_lines += 1
-
-    out.close()
-    if num_reads is None:
-        print "Input was empty."
-    else:
-        print "%i tabular lines were written as FASTQ reads. Be sure to use the FASTQ Groomer tool on this output before further analysis." % ( num_reads + 1 - skipped_lines )
-
-if __name__ == "__main__": main()
--- a/tools/fastq/tabular_to_fastq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-<tool id="tabular_to_fastq" name="Tabular to FASTQ" version="1.0.0">
-  <description>converter</description>
-  <command interpreter="python">tabular_to_fastq.py '$input_file' '$output_file' '$identifier' '$sequence' '$quality'</command>
-  <inputs>
-    <param name="input_file" type="data" format="tabular" label="Tabular file to convert" />
-    <param name="identifier" label="Identifier column" type="data_column" data_ref="input_file" />
-    <param name="sequence" label="Sequence column" type="data_column" data_ref="input_file" />
-    <param name="quality" label="Quality column" type="data_column" data_ref="input_file" />
-  </inputs>
-  <outputs>
-    <data name="output_file" format="fastq" />
-  </outputs>
-  <tests>
-    <!-- basic test -->
-    <test>
-      <param name="input_file" value="fastq_to_tabular_out_1.tabular" ftype="tabular" />
-      <param name="identifier" value="1" />
-      <param name="sequence" value="2" />
-      <param name="quality" value="3" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- color space test -->
-    <test>
-      <param name="input_file" value="fastq_to_tabular_out_2.tabular" ftype="tabular" />
-      <param name="identifier" value="1" />
-      <param name="sequence" value="2" />
-      <param name="quality" value="3" />
-      <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool attempts to convert a tabular file containing sequencing read data to a FASTQ formatted file. The FASTQ Groomer tool should always be used on the output of this tool.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/fastx_toolkit/fasta_clipping_histogram.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-<tool id="cshl_fasta_clipping_histogram" name="Length Distribution">
-	<description>chart</description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>fasta_clipping_histogram.pl $input $outfile</command>
-
-	<inputs>
-		<param format="fasta" name="input" type="data" label="Library to analyze" />
-	</inputs>
-
-	<outputs>
-		<data format="png" name="outfile" metadata_source="input" />
-	</outputs>
-<help>
-
-**What it does**
-
-This tool creates a histogram image of sequence lengths distribution in a given fasta dataset file.
-
-**TIP:** Use this tool after clipping your library (with **FASTX Clipper tool**), to visualize the clipping results.
-
------
-
-**Output Examples**
-
-In the following library, most sequences are 24-mers to 27-mers.
-This could indicate an abundance of endo-siRNAs (depending of course of what you've tried to sequence in the first place).
-
-.. image:: ./static/fastx_icons/fasta_clipping_histogram_1.png
-
-
-In the following library, most sequences are 19,22 or 23-mers.
-This could indicate an abundance of miRNAs (depending of course of what you've tried to sequence in the first place).
-
-.. image:: ./static/fastx_icons/fasta_clipping_histogram_2.png
-
-
------
-
-
-**Input Formats**
-
-This tool accepts short-reads FASTA files. The reads don't have to be short, but they do have to be on a single line, like so::
-
-   >sequence1
-   AGTAGTAGGTGATGTAGAGAGAGAGAGAGTAG
-   >sequence2
-   GTGTGTGTGGGAAGTTGACACAGTA
-   >sequence3
-   CCTTGAGATTAACGCTAATCAAGTAAAC
-
-
-If the sequences span over multiple lines::
-
-   >sequence1
-   CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG
-   TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG
-   aactggtctttacctTTAAGTTG
-
-Use the **FASTA Width Formatter** tool to re-format the FASTA into a single-lined sequences::
-
-   >sequence1
-   CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG
-
-
------
-
-
-
-**Multiplicity counts (a.k.a reads-count)**
-
-If the sequence identifier (the text after the '>') contains a dash and a number, it is treated as a multiplicity count value (i.e. how many times that individual sequence repeated in the original FASTA file, before collapsing).
-
-Example 1 - The following FASTA file *does not* have multiplicity counts::
-
-    >seq1
-    GGATCC
-    >seq2
-    GGTCATGGGTTTAAA
-    >seq3
-    GGGATATATCCCCACACACACACAC
-
-Each sequence is counts as one, to produce the following chart:
-
-.. image:: ./static/fastx_icons/fasta_clipping_histogram_3.png
-
-
-Example 2 - The following FASTA file have multiplicity counts::
-
-    >seq1-2
-    GGATCC
-    >seq2-10
-    GGTCATGGGTTTAAA
-    >seq3-3
-    GGGATATATCCCCACACACACACAC
-
-The first sequence counts as 2, the second as 10, the third as 3, to produce the following chart:
-
-.. image:: ./static/fastx_icons/fasta_clipping_histogram_4.png
-
-Use the **FASTA Collapser** tool to create FASTA files with multiplicity counts.
-
-</help>
-</tool>
-<!-- FASTA-Clipping-Histogram is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fasta_formatter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-<tool id="cshl_fasta_formatter" name="FASTA Width">
-	<description>formatter</description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<!--
-		Note:
-			fasta_formatter also has a tabular output mode (-t),
-			but Galaxy already contains such a tool, so no need
-			to offer the user a duplicated tool.
-
-			So this XML tool only changes the width (line-wrapping) of a
-			FASTA file.
-	-->
-	<command>zcat -f '$input' | fasta_formatter -w $width -o $output</command>
-	<inputs>
-		<param format="fasta" name="input" type="data" label="Library to re-format" />
-
-		<param name="width" type="integer" value="0" label="New width for nucleotides strings" help="Use 0 for single line out." />
-	</inputs>
-
-	<tests>
-		<test>
-			<!-- Re-format a FASTA file into a single line -->
-			<param name="input" value="fasta_formatter1.fasta" />
-			<param name="width" value="0" />
-			<output name="output" file="fasta_formatter1.out" />
-		</test>
-		<test>
-			<!-- Re-format a FASTA file into multiple lines wrapping at 60 charactes -->
-			<param name="input" value="fasta_formatter1.fasta" />
-			<param name="width" value="60" />
-			<output name="output" file="fasta_formatter2.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="input" name="output" metadata_source="input" />
-	</outputs>
-
-<help>
-**What it does**
-
-This tool re-formats a FASTA file, changing the width of the nucleotides lines.
-
-**TIP:** Outputting a single line (with **width = 0**) can be useful for scripting (with **grep**, **awk**, and **perl**). Every odd line is a sequence identifier, and every even line is a nucleotides line.
-
---------
-
-**Example**
-
-Input FASTA file (each nucleotides line is 50 characters long)::
-
-    >Scaffold3648
-    AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTC
-    CCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTG
-    TTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACA
-    ATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT
-    >Scaffold9299
-    CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG
-    TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG
-    aactggtctttacctTTAAGTTG
-
-
-Output FASTA file (with width=80)::
-
-    >Scaffold3648
-    AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTT
-    ATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCA
-    ATTTTAATGAACATGTAGTAAAAACT
-    >Scaffold9299
-    CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTAC
-    GTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG
-
-Output FASTA file (with width=0 => single line)::
-
-    >Scaffold3648
-    AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT
-    >Scaffold9299
-    CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG
-</help>
-</tool>
--- a/tools/fastx_toolkit/fasta_nucleotide_changer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<tool id="cshl_fasta_nucleotides_changer" name="RNA/DNA" >
-	<description>converter</description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f '$input' | fasta_nucleotide_changer $mode -v -o $output</command>
-	<inputs>
-		<param format="fasta" name="input" type="data" label="Library to convert" />
-
-		<param name="mode" type="select" label="Convert">
-			<option value="-d">RNA to DNA (U to T)</option>
-			<option value="-r">DNA to RNA (T to U)</option>
-		</param>
-	</inputs>
-
-    <!--
-    Functional tests with param value starting with - fail.
-	<tests>
-		<test>
-			<param name="input" value="fasta_nuc_changer1.fasta" />
-			<param name="mode" value="-r" />
-			<output name="output" file="fasta_nuc_change1.out" />
-		</test>
-		<test>
-			<param name="input" value="fasta_nuc_changer2.fasta" />
-			<param name="mode" value="-d" />
-			<output name="output" file="fasta_nuc_change2.out" />
-		</test>
-	</tests>
-     -->
-
-	<outputs>
-		<data format="input" name="output" metadata_source="input" />
-	</outputs>
-
-<help>
-**What it does**
-
-This tool converts RNA FASTA files to DNA (and vice-versa).
-
-In **RNA-to-DNA** mode, U's are changed into T's.
-
-In **DNA-to-RNA** mode, T's are changed into U's.
-
---------
-
-**Example**
-
-Input RNA FASTA file ( from Sanger's mirBase )::
-
-    >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
-    UGAGGUAGUAGGUUGUAUAGUU
-    >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
-    UCCCUGAGACCUCAAGUGUGA
-    >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
-    UGGAAUGUAAAGAAGUAUGUA
-
-Output DNA FASTA file (with RNA-to-DNA mode)::
-
-    >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
-    TGAGGTAGTAGGTTGTATAGTT
-    >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
-    TCCCTGAGACCTCAAGTGTGA
-    >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
-    TGGAATGTAAAGAAGTATGTA
-
-</help>
-</tool>
--- a/tools/fastx_toolkit/fastq_quality_boxplot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-<tool id="cshl_fastq_quality_boxplot" name="Draw quality score boxplot">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-
-	<command>fastq_quality_boxplot_graph.sh -t '$input.name' -i $input -o $output</command>
-
-	<inputs>
-		<param format="txt" name="input" type="data" label="Statistics report file"  help="output of 'FASTQ Statistics' tool" />
-	</inputs>
-
-	<outputs>
-		<data format="png" name="output" metadata_source="input" />
-	</outputs>
-<help>
-
-**What it does**
-
-Creates a boxplot graph for the quality scores in the library.
-
-.. class:: infomark
-
-**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool.
-
------
-
-**Output Examples**
-
-* Black horizontal lines are medians
-* Rectangular red boxes show the Inter-quartile Range (IQR) (top value is Q3, bottom value is Q1)
-* Whiskers show outlier at max. 1.5*IQR
-
-
-An excellent quality library (median quality is 40 for almost all 36 cycles):
-
-.. image:: ./static/fastx_icons/fastq_quality_boxplot_1.png
-
-
-A relatively good quality library (median quality degrades towards later cycles):
-
-.. image:: ./static/fastx_icons/fastq_quality_boxplot_2.png
-
-A low quality library (median drops quickly):
-
-.. image:: ./static/fastx_icons/fastq_quality_boxplot_3.png
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-
-
-
-</help>
-</tool>
-<!-- FASTQ-Quality-Boxplot is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastq_quality_converter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-<tool id="cshl_fastq_quality_converter" name="Quality format converter">
-	<description>(ASCII-Numeric)</description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f $input | fastq_quality_converter $QUAL_FORMAT -o $output -Q $offset</command>
-	<inputs>
-		<param format="fastq" name="input" type="data" label="Library to convert" />
-
-		<param name="QUAL_FORMAT" type="select" label="Desired output format">
-			<option value="-a">ASCII (letters) quality scores</option>
-			<option value="-n">Numeric quality scores</option>
-		</param>
-
-        <param name="offset" type="select" label="FASTQ ASCII offset">
-            <option value="33">33</option>
-            <option selected="true" value="64">64</option>
-        </param>
-    </inputs>
-
-	<tests>
-		<test>
-			<!-- ASCII to NUMERIC -->
-			<param name="input" value="fastq_qual_conv1.fastq" />
-			<param name="QUAL_FORMAT" value="Numeric quality scores" />
-			<param name="offset" value="64" />
-			<output name="output" file="fastq_qual_conv1.out" />
-		</test>
-		<test>
-			<!-- ASCII to ASCII (basically, a no-op, but it should still produce a valid output -->
-			<param name="input" value="fastq_qual_conv1.fastq" />
-			<param name="QUAL_FORMAT" value="ASCII (letters) quality scores" />
-			<param name="offset" value="64" />
-			<output name="output" file="fastq_qual_conv1a.out" />
-		</test>
-		<test>
-			<!-- NUMERIC to ASCII -->
-			<param name="input" value="fastq_qual_conv2.fastq" />
-			<param name="QUAL_FORMAT" value="ASCII (letters) quality scores" />
-			<param name="offset" value="64" />
-			<output name="output" file="fastq_qual_conv2.out" />
-		</test>
-		<test>
-			<!-- NUMERIC to NUMERIC (basically, a no-op, but it should still produce a valid output -->
-			<param name="input" value="fastq_qual_conv2.fastq" />
-			<param name="QUAL_FORMAT" value="Numeric quality scores" />
-			<param name="offset" value="64" />
-			<output name="output" file="fastq_qual_conv2n.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="fastq" name="output" metadata_source="input" />
-	</outputs>
-<help>
-
-**What it does**
-
-Converts a Solexa FASTQ file to/from numeric or ASCII quality format.
-
-.. class:: warningmark
-
-Re-scaling is **not** performed. (e.g. conversion from Phred scale to Solexa scale).
-
-
------
-
-FASTQ with Numeric quality scores::
-
-    @CSHL__2_FC042AGWWWXX:8:1:120:202
-    ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC
-    +CSHL__2_FC042AGWWWXX:8:1:120:202
-    40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8
-    @CSHL__2_FC042AGWWWXX:8:1:103:1185
-    ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC
-    +CSHL__2_FC042AGWWWXX:8:1:103:1185
-    40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2
-
-
-FASTQ with ASCII quality scores::
-
-    @CSHL__2_FC042AGWWWXX:8:1:120:202
-    ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC
-    +CSHL__2_FC042AGWWWXX:8:1:120:202
-    hhhhThhhhFhh\hhYhTh?^hN[hHACG?KJ?UJH
-    @CSHL__2_FC042AGWWWXX:8:1:103:1185
-    ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC
-    +CSHL__2_FC042AGWWWXX:8:1:103:1185
-    hhhhhca_hhh`^Vh@IVQNHdObVLWCJ@HBDY^B
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-
-</help>
-</tool>
-<!-- FASTQ-Quality-Converter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastq_quality_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-<tool id="cshl_fastq_quality_filter" name="Filter by quality">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-
-	<command>zcat -f '$input' | fastq_quality_filter -q $quality -p $percent -v -o $output
-#if $input.ext == "fastqsanger":
--Q 33
-#end if
-	</command>
-
-	<inputs>
-		<param format="fastqsolexa,fastqsanger" name="input" type="data" label="Library to filter" />
-
-		<param name="quality" size="4" type="integer" value="20">
-			<label>Quality cut-off value</label>
-		</param>
-
-		<param name="percent" size="4" type="integer" value="90">
-			<label>Percent of bases in sequence that must have quality equal to / higher than cut-off value</label>
-		</param>
-	</inputs>
-
-	<tests>
-		<test>
-			<!-- Test1:  100% of bases with quality 33 or higher (pretty steep requirement...) -->
-			<param name="input" value="fastq_qual_filter1.fastq" ftype="fastqsolexa" />
-			<param name="quality" value="33"/>
-			<param name="percent" value="100"/>
-			<output name="output" file="fastq_qual_filter1a.out" />
-		</test>
-		<test>
-			<!-- Test2:  80% of bases with quality 20 or higher -->
-			<param name="input" value="fastq_qual_filter1.fastq" ftype="fastqsolexa"/>
-			<param name="quality" value="20"/>
-			<param name="percent" value="80"/>
-			<output name="output" file="fastq_qual_filter1b.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="input" name="output" metadata_source="input" />
-	</outputs>
-
-	<help>
-**What it does**
-
-This tool filters reads based on quality scores.
-
-.. class:: infomark
-
-Using **percent = 100** requires all cycles of all reads to be at least the quality cut-off value.
-
-.. class:: infomark
-
-Using **percent = 50** requires the median quality of the cycles (in each read) to be at least the quality cut-off value.
-
---------
-
-Quality score distribution (of all cycles) is calculated for each read. If it is lower than the quality cut-off value - the read is discarded.
-
-
-**Example**::
-
-    @CSHL_4_FC042AGOOII:1:2:214:584
-    GACAATAAAC
-    +CSHL_4_FC042AGOOII:1:2:214:584
-    30 30 30 30 30 30 30 30 20 10
-
-Using **percent = 50** and **cut-off = 30** - This read will not be discarded (the median quality is higher than 30).
-
-Using **percent = 90** and **cut-off = 30** - This read will be discarded (90% of the cycles do no have quality equal to / higher than 30).
-
-Using **percent = 100** and **cut-off = 20** - This read will be discarded (not all cycles have quality equal to / higher than 20).
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-	</help>
-</tool>
-<!-- FASTQ-Quality-Filter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastq_to_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-<tool id="cshl_fastq_to_fasta" name="FASTQ to FASTA">
-	<description>converter</description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>gunzip -cf $input | fastq_to_fasta $SKIPN $RENAMESEQ -o $output -v
-#if $input.ext == "fastqsanger":
--Q 33
-#end if
-	</command>
-
-	<inputs>
-		<param format="fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="FASTQ Library to convert" />
-
-		<param name="SKIPN" type="select" label="Discard sequences with unknown (N) bases ">
-			<option value="">yes</option>
-			<option value="-n">no</option>
-		</param>
-
-		<param name="RENAMESEQ" type="select" label="Rename sequence names in output file (reduces file size)">
-			<option value="-r">yes</option>
-			<option value="">no</option>
-		</param>
-
-	</inputs>
-
-	<tests>
-		<test>
-			<!-- FASTQ-To-FASTA, keep N, don't rename -->
-			<param name="input" value="fastq_to_fasta1.fastq" ftype="fastqsolexa" />
-			<param name="SKIPN" value=""/>
-			<param name="RENAMESEQ" value=""/>
-			<output name="output" file="fastq_to_fasta1a.out" />
-		</test>
-		<test>
-			<!-- FASTQ-To-FASTA, discard N, rename -->
-			<param name="input" value="fastq_to_fasta1.fastq" ftype="fastqsolexa" />
-			<param name="SKIPN" value="no"/>
-			<param name="RENAMESEQ" value="yes"/>
-			<output name="output" file="fastq_to_fasta1b.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="fasta" name="output" metadata_source="input" />
-	</outputs>
-
-<help>
-
-**What it does**
-
-This tool converts data from Solexa format to FASTA format (scroll down for format description).
-
---------
-
-**Example**
-
-The following data in Solexa-FASTQ format::
-
-    @CSHL_4_FC042GAMMII_2_1_517_596
-    GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-    +CSHL_4_FC042GAMMII_2_1_517_596
-    40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40
-
-Will be converted to FASTA (with 'rename sequence names' = NO)::
-
-    >CSHL_4_FC042GAMMII_2_1_517_596
-    GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-
-Will be converted to FASTA (with 'rename sequence names' = YES)::
-
-    >1
-    GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-</help>
-</tool>
-<!-- FASTQ-to-FASTA is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastx_artifacts_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-<tool id="cshl_fastx_artifacts_filter" name="Remove sequencing artifacts">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f '$input' | fastx_artifacts_filter -v -o "$output"
-#if $input.ext == "fastqsanger":
--Q 33
-#end if
-	</command>
-
-	<inputs>
-		<param format="fasta,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to filter" />
-
-	</inputs>
-
-	<tests>
-		<test>
-			<!-- Filter FASTA file -->
-			<param name="input" value="fastx_artifacts1.fasta" />
-			<output name="output" file="fastx_artifacts1.out" />
-		</test>
-		<test>
-			<!-- Filter FASTQ file -->
-			<param name="input" value="fastx_artifacts2.fastq" ftype="fastqsanger" />
-			<output name="output" file="fastx_artifacts2.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="input" name="output" metadata_source="input" />
-	</outputs>
-<help>
-**What it does**
-
-This tool filters sequencing artifacts (reads with all but 3 identical bases).
-
---------
-
-**The following is an example of sequences which will be filtered out**::
-
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
-    AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA
-    AAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAA
-    AAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAA
-    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAA
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-</help>
-</tool>
-<!-- FASTX-Artifacts-filter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastx_barcode_splitter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-<tool id="cshl_fastx_barcode_splitter" name="Barcode Splitter">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command interpreter="bash">fastx_barcode_splitter_galaxy_wrapper.sh $BARCODE $input "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > $output </command>
-
-	<inputs>
-		<param format="txt" name="BARCODE" type="data" label="Barcodes to use" />
-		<param format="fasta,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to split" />
-
-		<param name="EOL" type="select" label="Barcodes found at">
-			<option value="--bol">Start of sequence (5' end)</option>
-			<option value="--eol">End of sequence (3' end)</option>
-		</param>
-
-		<param name="mismatches" type="integer" size="3" value="2" label="Number of allowed mismatches" />
-
-		<param name="partial" type="integer" size="3" value="0" label="Number of allowed barcodes nucleotide deletions" />
-
-	</inputs>
-
-	<tests>
-		<test>
-			<!-- Split a FASTQ file -->
-			<param name="BARCODE" value="fastx_barcode_splitter1.txt" />
-			<param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" />
-			<param name="EOL" value="Start of sequence (5' end)" />
-			<param name="mismatches" value="2" />
-			<param name="partial" value="0" />
-			<output name="output" file="fastx_barcode_splitter1.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="html" name="output" />
-	</outputs>
-<help>
-
-**What it does**
-
-This tool splits a Solexa library (FASTQ file) or a regular FASTA file into several files, using barcodes as the split criteria.
-
---------
-
-**Barcode file Format**
-
-Barcode files are simple text files.
-Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character.
-Example::
-
-    #This line is a comment (starts with a 'number' sign)
-    BC1	GATCT
-    BC2	ATCGT
-    BC3	GTGAT
-    BC4 TGTCT
-
-For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name).
-Sequences matching the barcode will be stored in the appropriate file.
-
-One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored.
-
-The output of this tool is an HTML file, displaying the split counts and the file locations.
-
-**Output Example**
-
-.. image:: ./static/fastx_icons/barcode_splitter_output_example.png
-
-</help>
-</tool>
-<!-- FASTX-barcode-splitter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastx_barcode_splitter_galaxy_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-#    FASTX-toolkit - FASTA/FASTQ preprocessing tools.
-#    Copyright (C) 2009  A. Gordon (gordon@cshl.edu)
-#
-#   This program is free software: you can redistribute it and/or modify
-#   it under the terms of the GNU Affero General Public License as
-#   published by the Free Software Foundation, either version 3 of the
-#   License, or (at your option) any later version.
-#
-#   This program is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU Affero General Public License for more details.
-#
-#    You should have received a copy of the GNU Affero General Public License
-#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-#
-#This is a shell script wrapper for 'fastx_barcode_splitter.pl'
-#
-# 1. Output files are saved at the dataset's files_path directory.
-#
-# 2. 'fastx_barcode_splitter.pl' outputs a textual table.
-#    This script turns it into pretty HTML with working URL
-#    (so lazy users can just click on the URLs and get their files)
-
-BARCODE_FILE="$1"
-FASTQ_FILE="$2"
-LIBNAME="$3"
-OUTPUT_PATH="$4"
-shift 4
-# The rest of the parameters are passed to the split program
-
-if [ "$OUTPUT_PATH" == "" ]; then
-	echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2
-	exit 1
-fi
-
-#Sanitize library name, make sure we can create a file with this name
-LIBNAME=${LIBNAME//\.gz/}
-LIBNAME=${LIBNAME//\.txt/}
-LIBNAME=${LIBNAME//[^[:alnum:]]/_}
-
-if [ ! -r "$FASTQ_FILE" ]; then
-	echo "Error: Input file ($FASTQ_FILE) not found!" >&2
-	exit 1
-fi
-if [ ! -r "$BARCODE_FILE" ]; then
-	echo "Error: barcode file ($BARCODE_FILE) not found!" >&2
-	exit 1
-fi
-mkdir -p "$OUTPUT_PATH"
-if [ ! -d "$OUTPUT_PATH" ]; then
-	echo "Error: failed to create output path '$OUTPUT_PATH'" >&2
-	exit 1
-fi
-
-PUBLICURL=""
-BASEPATH="$OUTPUT_PATH/"
-#PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__"
-PREFIX="$BASEPATH""${LIBNAME}__"
-SUFFIX=".txt"
-
-RESULTS=`zcat -f "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"`
-if [ $? != 0 ]; then
-	echo "error"
-fi
-
-#
-# Convert the textual tab-separated table into simple HTML table,
-# with the local path replaces with a valid URL
-echo "<html><body><table border=1>"
-echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|<a href=\"\\1\">\\1</a>|" | sed '
-i<tr><td>
-s|\t|</td><td>|g
-a<\/td><\/tr>
-'
-echo "<p>"
-echo "</table></body></html>"
--- a/tools/fastx_toolkit/fastx_clipper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-<tool id="cshl_fastx_clipper" name="Clip" version="1.0.1" >
-  <description>adapter sequences</description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-  <command>
-    zcat -f $input | fastx_clipper -l $minlength -a $clip_source.clip_sequence -d $keepdelta -o $output -v $KEEP_N $DISCARD_OPTIONS
-#if $input.ext == "fastqsanger":
- -Q 33
-#end if
-  </command>
-
-  <inputs>
-    <param format="fasta,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to clip" />
-
-    <param name="minlength" size="4" type="integer" value="15">
-      <label>Minimum sequence length (after clipping, sequences shorter than this length will be discarded)</label>
-    </param>
-
-	<conditional name="clip_source">
-		<param name="clip_source_list" type="select" label="Source">
-			<option value="prebuilt" selected="true">Standard (select from the list below)</option>
-			<option value="user">Enter custom sequence</option>
-		</param>
-
-		<when value="user">
-			<param name="clip_sequence" size="30" label="Enter custom clipping sequence" type="text" value="AATTGGCC" />
-		</when>
-
-		<when value="prebuilt">
-			<param name="clip_sequence" type="select" label="Choose Adapter">
-				<options from_file="fastx_clipper_sequences.txt">
-					<column name="name" index="1"/>
-					<column name="value" index="0"/>
-				</options>
-			</param>
-		</when>
-	</conditional>
-
-	<param name="keepdelta" size="2" type="integer" value="0">
-		<label>enter non-zero value to keep the adapter sequence and x bases that follow it</label>
-		<help>use this for hairpin barcoding. keep at 0 unless you know what you're doing.</help>
-	</param>
-
-	<param name="KEEP_N" type="select" label="Discard sequences with unknown (N) bases">
-		<option value="">Yes</option>
-		<option value="-n">No</option>
-	</param>
-
-	<param name="DISCARD_OPTIONS" type="select" label="Output options">
-		<option value="-c">Output only clipped sequences (i.e. sequences which contained the adapter)</option>
-		<option value="-C">Output only non-clipped sequences (i.e. sequences which did not contained the adapter)</option>
-		<option value="">Output both clipped and non-clipped sequences</option>
-	</param>
-
-  </inputs>
-	<!--
-	#functional test with param value starting with - fails.
-	<tests>
-		<test>
-			<param name="input" value="fastx_clipper1.fastq" ftype="fastqsolexa"/>
-			<param name="maxmismatches" value="2" />
-			<param name="minlength" value="15" />
-			<param name="clip_source_list" value="user" />
-			<param name="clip_sequence" value="CAATTGGTTAATCCCCCTATATA" />
-			<param name="keepdelta" value="0" />
-			<param name="KEEP_N" value="-n" />
-			<param name="DISCARD_OPTIONS" value="-c" />
-			<output name="output" file="fastx_clipper1a.out" />
-		</test>
-	</tests>
-	-->
-  <outputs>
-    <data format="input" name="output" metadata_source="input" />
-  </outputs>
-
-<help>
-**What it does**
-
-This tool clips adapters from the 3'-end of the sequences in a FASTA/FASTQ file.
-
---------
-
-
-**Clipping Illustration:**
-
-.. image:: ./static/fastx_icons/fastx_clipper_illustration.png
-
-
-
-
-
-
-
-
-**Clipping Example:**
-
-.. image:: ./static/fastx_icons/fastx_clipper_example.png
-
-
-
-**In the above example:**
-
-* Sequence no. 1 was discarded since it wasn't clipped (i.e. didn't contain the adapter sequence). (**Output** parameter).
-* Sequence no. 5 was discarded --- it's length (after clipping) was shorter than 15 nt (**Minimum Sequence Length** parameter).
-
-
-
-
-</help>
-</tool>
--- a/tools/fastx_toolkit/fastx_collapser.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-<tool id="cshl_fastx_collapser" name="Collapse">
-	<description>sequences</description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f '$input' | fastx_collapser -v -o '$output'
-#if $input.ext == "fastqsanger":
--Q 33
-#end if
-	</command>
-
-	<inputs>
-		<param format="fasta,fastqsanger,fastqsolexa" name="input" type="data" label="Library to collapse" />
-	</inputs>
-
-    <!-- The order of sequences in the test output differ between 32 bit and 64 bit machines.
-	<tests>
-		<test>
-			<param name="input" value="fasta_collapser1.fasta" />
-			<output name="output" file="fasta_collapser1.out" />
-		</test>
-	</tests>
-    -->
-	<outputs>
-		<data format="fasta" name="output" metadata_source="input" />
-	</outputs>
-  <help>
-
-**What it does**
-
-This tool collapses identical sequences in a FASTA file into a single sequence.
-
---------
-
-**Example**
-
-Example Input File (Sequence "ATAT" appears multiple times)::
-
-    >CSHL_2_FC0042AGLLOO_1_1_605_414
-    TGCG
-    >CSHL_2_FC0042AGLLOO_1_1_537_759
-    ATAT
-    >CSHL_2_FC0042AGLLOO_1_1_774_520
-    TGGC
-    >CSHL_2_FC0042AGLLOO_1_1_742_502
-    ATAT
-    >CSHL_2_FC0042AGLLOO_1_1_781_514
-    TGAG
-    >CSHL_2_FC0042AGLLOO_1_1_757_487
-    TTCA
-    >CSHL_2_FC0042AGLLOO_1_1_903_769
-    ATAT
-    >CSHL_2_FC0042AGLLOO_1_1_724_499
-    ATAT
-
-Example Output file::
-
-    >1-1
-    TGCG
-    >2-4
-    ATAT
-    >3-1
-    TGGC
-    >4-1
-    TGAG
-    >5-1
-    TTCA
-
-.. class:: infomark
-
-Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded.
-
-The output sequence name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value.
-
-The following output::
-
-    >2-4
-    ATAT
-
-means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file.
-
-</help>
-</tool>
--- a/tools/fastx_toolkit/fastx_nucleotides_distribution.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-<tool id="cshl_fastx_nucleotides_distribution" name="Draw nucleotides distribution chart">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>fastx_nucleotide_distribution_graph.sh -t '$input.name' -i $input -o $output</command>
-
-	<inputs>
-		<param format="txt" name="input" type="data" label="Statistics Text File" help="output of 'FASTX Statistics' tool" />
-	</inputs>
-
-	<outputs>
-		<data format="png" name="output" metadata_source="input" />
-	</outputs>
-<help>
-
-**What it does**
-
-Creates a stacked-histogram graph for the nucleotide distribution in the Solexa library.
-
-.. class:: infomark
-
-**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool.
-
------
-
-**Output Examples**
-
-The following chart clearly shows the barcode used at the 5'-end of the library: **GATCT**
-
-.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_1.png
-
-In the following chart, one can almost 'read' the most abundant sequence by looking at the dominant values: **TGATA TCGTA TTGAT GACTG AA...**
-
-.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_2.png
-
-The following chart shows a growing number of unknown (N) nucleotides towards later cycles (which might indicate a sequencing problem):
-
-.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_3.png
-
-But most of the time, the chart will look rather random:
-
-.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_4.png
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-
-</help>
-</tool>
-<!-- FASTQ-Nucleotides-Distribution is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastx_quality_statistics.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-<tool id="cshl_fastx_quality_statistics" name="Compute quality statistics">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f $input | fastx_quality_stats -o $output -Q 33</command>
-
-	<inputs>
-		<param format="fastqsanger" name="input" type="data" label="Library to analyse" />
-	</inputs>
-
-	<tests>
-		<test>
-			<param name="input" value="fastq_stats1.fastq" ftype="fastqsanger"/>
-			<output name="output" file="fastq_stats1.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="txt" name="output" metadata_source="input" />
-	</outputs>
-
-<help>
-
-**What it does**
-
-Creates quality statistics report for the given Solexa/FASTQ library.
-
-.. class:: infomark
-
-**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
-
------
-
-**The output file will contain the following fields:**
-
-* column	= column number (1 to 36 for a 36-cycles read Solexa file)
-* count   = number of bases found in this column.
-* min     = Lowest quality score value found in this column.
-* max     = Highest quality score value found in this column.
-* sum     = Sum of quality score values for this column.
-* mean    = Mean quality score value for this column.
-* Q1	= 1st quartile quality score.
-* med	= Median quality score.
-* Q3	= 3rd quartile quality score.
-* IQR	= Inter-Quartile range (Q3-Q1).
-* lW	= 'Left-Whisker' value (for boxplotting).
-* rW	= 'Right-Whisker' value (for boxplotting).
-* A_Count	= Count of 'A' nucleotides found in this column.
-* C_Count	= Count of 'C' nucleotides found in this column.
-* G_Count	= Count of 'G' nucleotides found in this column.
-* T_Count	= Count of 'T' nucleotides found in this column.
-* N_Count = Count of 'N' nucleotides found in this column.
-
-
-For example::
-
-     1  6362991 -4 40 250734117 39.41 40 40 40  0 40 40 1396976 1329101  678730 2958184   0
-     2  6362991 -5 40 250531036 39.37 40 40 40  0 40 40 1786786 1055766 1738025 1782414   0
-     3  6362991 -5 40 248722469 39.09 40 40 40  0 40 40 2296384  984875 1443989 1637743   0
-     4  6362991 -4 40 248214827 39.01 40 40 40  0 40 40 2536861 1167423 1248968 1409739   0
-    36  6362991 -5 40 117158566 18.41  7 15 30 23 -5 40 4074444 1402980   63287  822035 245
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-
-</help>
- </tool>
-<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastx_renamer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-<tool id="cshl_fastx_renamer" name="Rename sequences" version="0.0.11" >
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f $input | fastx_renamer -n $TYPE -o $output -v
-#if $input.ext == "fastqsanger":
--Q 33
-#end if
-	</command>
-
-	<inputs>
-		<param format="fastqsolexa,fasta,fastqsanger" name="input" type="data" label="FASTQ/A Library to rename" />
-
-		<param name="TYPE" type="select" label="Rename sequence identifiers to">
-			<option value="SEQ">Nucleotides sequence</option>
-			<option value="COUNT">Numeric Counter</option>
-		</param>
-	</inputs>
-
-	<outputs>
-		<data format="input" name="output" metadata_source="input" />
-	</outputs>
-
-<help>
-
-**What it does**
-
-This tool renames the sequence identifiers in a FASTQ/A file.
-
-.. class:: infomark
-
-Use this tool at the beginning of your workflow, as a way to keep the original sequence (before trimming, clipping, barcode-removal, etc).
-
---------
-
-**Example**
-
-The following Solexa-FASTQ file::
-
-    @CSHL_4_FC042GAMMII_2_1_517_596
-    GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-    +CSHL_4_FC042GAMMII_2_1_517_596
-    40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40
-
-Renamed to **nucleotides sequence**::
-
-    @GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-    GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-    +GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-    40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40
-
-Renamed to **numeric counter**::
-
-    @1
-    GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
-    +1
-    40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-</help>
-</tool>
-<!-- FASTQ-to-FASTA is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/fastx_toolkit/fastx_reverse_complement.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<tool id="cshl_fastx_reverse_complement" name="Reverse-Complement">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f '$input' | fastx_reverse_complement -v -o $output
-#if $input.ext == "fastqsanger":
--Q 33
-#end if
-	</command>
-	<inputs>
-		<param format="fasta,fastqsolexa,fastqsanger" name="input" type="data" label="Library to reverse-complement" />
-	</inputs>
-
-	<tests>
-		<test>
-			<!-- Reverse-complement a FASTA file -->
-			<param name="input" value="fastx_rev_comp1.fasta" />
-			<output name="output" file="fastx_reverse_complement1.out" />
-		</test>
-		<test>
-			<!-- Reverse-complement a FASTQ file -->
-			<param name="input" value="fastx_rev_comp2.fastq" ftype="fastqsolexa"/>
-			<output name="output" file="fastx_reverse_complement2.out" />
-		</test>
-	</tests>
-
-
-	<outputs>
-		<data format="input" name="output" metadata_source="input" />
-	</outputs>
-
-<help>
-**What it does**
-
-This tool reverse-complements each sequence in a library.
-If the library is a FASTQ, the quality-scores are also reversed.
-
---------
-
-**Example**
-
-Input FASTQ file::
-
-    @CSHL_1_FC42AGWWWXX:8:1:3:740
-    TGTCTGTAGCCTCNTCCTTGTAATTCAAAGNNGGTA
-    +CSHL_1_FC42AGWWWXX:8:1:3:740
-    33 33 33 34 33 33 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 27 21 27 33 32 31 29 26 24 5 5 15 17 27 26
-
-
-Output FASTQ file::
-
-    @CSHL_1_FC42AGWWWXX:8:1:3:740
-    TACCNNCTTTGAATTACAAGGANGAGGCTACAGACA
-    +CSHL_1_FC42AGWWWXX:8:1:3:740
-    26 27 17 15 5 5 24 26 29 31 32 33 27 21 27 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 33 33 34 33 33 33
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-
-</help>
-</tool>
--- a/tools/fastx_toolkit/fastx_trimmer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-<tool id="cshl_fastx_trimmer" name="Trim sequences">
-	<description></description>
-	<requirements><requirement type="package">fastx_toolkit</requirement></requirements>
-	<command>zcat -f '$input' | fastx_trimmer -v -f $first -l $last -o $output
-#if $input.ext == "fastqsanger":
--Q 33
-#end if
-	</command>
-
-	<inputs>
-		<param format="fasta,fastqsolexa,fastqsanger" name="input" type="data" label="Library to clip" />
-
-		<param name="first" size="4" type="integer" value="1">
-			<label>First base to keep</label>
-		</param>
-
-		<param name="last" size="4" type="integer" value="21">
-			<label>Last base to keep</label>
-		</param>
-	</inputs>
-
-	<tests>
-		<test>
-			<!-- Trim a FASTA file - remove first four bases (e.g. a barcode) -->
-			<param name="input" value="fastx_trimmer1.fasta" />
-			<param name="first" value="5"/>
-			<param name="last" value="36"/>
-			<output name="output" file="fastx_trimmer1.out" />
-		</test>
-		<test>
-			<!-- Trim a FASTQ file - remove last 9 bases (e.g. keep only miRNA length sequences) -->
-			<param name="input" value="fastx_trimmer2.fastq" ftype="fastqsolexa"/>
-			<param name="first" value="1"/>
-			<param name="last" value="27"/>
-			<output name="output" file="fastx_trimmer2.out" />
-		</test>
-	</tests>
-
-	<outputs>
-		<data format="input" name="output" metadata_source="input" />
-	</outputs>
-	<help>
-**What it does**
-
-This tool trims (cut bases from) sequences in a FASTA/Q file.
-
---------
-
-**Example**
-
-Input Fasta file (with 36 bases in each sequences)::
-
-    >1-1
-    TATGGTCAGAAACCATATGCAGAGCCTGTAGGCACC
-    >2-1
-    CAGCGAGGCTTTAATGCCATTTGGCTGTAGGCACCA
-
-
-Trimming with First=1 and Last=21, we get a FASTA file with 21 bases in each sequences (starting from the first base)::
-
-    >1-1
-    TATGGTCAGAAACCATATGCA
-    >2-1
-    CAGCGAGGCTTTAATGCCATT
-
-Trimming with First=6 and Last=10, will generate a FASTA file with 5 bases (bases 6,7,8,9,10) in each sequences::
-
-    >1-1
-    TCAGA
-    >2-1
-    AGGCT
-
-    ------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-
-</help>
-</tool>
-<!-- FASTX-Trimmer is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- a/tools/filters/CreateInterval.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#! /usr/bin/perl -w
-
-# Accepts chrom, start, end, name, and strand
-# If strand is void sets it to plus
-# CreateInterval.pl $chrom $start $end $name $strand $output
-
-my $strand = "+";
-
-die "Not enough arguments\n" unless @ARGV == 6;
-
-open OUT, ">$ARGV[5]" or die "Cannot open $ARGV[5]:$!\n";
-
-$strand = "-" if $ARGV[4] eq "minus";
-$ARGV[3] =~ s/\s+/_/g;
-$ARGV[3] =~ s/\t+/_/g;
-
-print OUT "$ARGV[0]\t$ARGV[1]\t$ARGV[2]\t$ARGV[3]\t0\t$strand\n";
-close OUT;
-
--- a/tools/filters/CreateInterval.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-<tool id="createInterval" name="Create single interval">
-  <description>as a new dataset</description>
-  <command interpreter="perl">CreateInterval.pl $chrom $start $end "$name" $strand $out_file1</command>
-  <inputs>
-    <param name="chrom" size="20" type="text" value="chr7" label="Chromosome"/>
-    <param name="start" size="20" type="integer" value="100" label="Start position"/>
-    <param name="end"   size="20" type="integer" value="1000" label="End position"/>
-    <param name="name" size="20" type="text" value="myInterval" label="Name"/>
-    <param name="strand" type="select" label="Strand" help="If your interval is strandless set strand to plus" >
-      <option value="plus">plus</option>
-      <option value="minus">minus</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="bed" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="chrom" value="chr7"/>
-      <param name="start" value="100"/>
-      <param name="end" value="1000"/>
-      <param name="name" value="myinterval"/>
-      <param name="strand" value="plus"/>
-      <output name="out_file1" file="eq-createinterval.dat"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**TIP**. Once your interval appears in history, you must tell Galaxy which genome it belongs to by clicking pencil icon or the "?" link in the history item.
-
------
-
-**What it does**
-
-This tool allows you to create a single genomic interval. The resulting history item will be in the BED format.
-
------
-
-**Example**
-
-Typing the following values in the form::
-
-    Chromosome: chrX
-    Start position: 151087187
-    End position: 151370486
-    Name: NM_000808
-    Strand: minus
-
-will create a single interval::
-
-    chrX  151087187  151370486  NM_000808  0  -
-
-</help>
-</tool>
--- a/tools/filters/axt_to_concat_fasta.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-"""
-Adapted from bx/scripts/axt_to_concat_fasta.py
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
-import sys
-import bx.align.axt
-
-def usage(s=None):
-	message = """
-axt_to_fasta species1 species2 < axt_file > fasta_file
-"""
-	if (s == None): sys.exit (message)
-	else:           sys.exit ("%s\n%s" % (s,message))
-
-
-def main():
-
-	# check the command line
-	species1 = sys.argv[1]
-	species2 = sys.argv[2]
-
-	# convert the alignment blocks
-
-	reader = bx.align.axt.Reader(sys.stdin,support_ids=True,\
-	                             species1=species1,species2=species2)
-	sp1text = list()
-	sp2text = list()
-	for a in reader:
-		sp1text.append(a.components[0].text)
-		sp2text.append(a.components[1].text)
-	sp1seq = "".join(sp1text)
-	sp2seq = "".join(sp2text)
-	print_component_as_fasta(sp1seq,species1)
-	print_component_as_fasta(sp2seq,species2)
-
-
-
-# $$$ this should be moved to a bx.align.fasta module
-
-def print_component_as_fasta(text,src):
-	header = ">" + src
-	print header
-	print text
-
-
-if __name__ == "__main__": main()
-
--- a/tools/filters/axt_to_concat_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<tool id="axt_to_concat_fasta" name="AXT to concatenated FASTA">
-  <description>Converts an AXT formatted file to a concatenated FASTA alignment</description>
-  <command interpreter="python">axt_to_concat_fasta.py $dbkey_1 $dbkey_2 &lt; $axt_input &gt; $out_file1</command>
-  <inputs>
-    <param format="axt" name="axt_input" type="data" label="AXT file"/>
-    <param name="dbkey_1" type="genomebuild" label="Genome"/>
-    <param name="dbkey_2" type="genomebuild" label="Genome"/>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="axt_input" value="1.axt" ftype="axt" />
-      <param name="dbkey_1" value='hg17' />
-      <param name="dbkey_2" value="panTro1" />
-      <output name="out_file1" file="axt_to_concat_fasta.dat" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**IMPORTANT**: AXT formatted alignments will be phased out from Galaxy in the coming weeks. They will be replaced with pairwise MAF alignments, which are already available. To try pairwise MAF alignments use "Extract Pairwise MAF blocks" tool in *Fetch Sequences and Alignments* section.
-
---------
-
-**Syntax**
-
-This tool converts an AXT formatted file to the FASTA format, and concatenates the results in the same build.
-
-- **AXT format** The alignments are produced from Blastz, an alignment tool available from Webb Miller's lab at Penn State University. The lav format Blastz output, which does not include the sequence, was converted to AXT format with lavToAxt. Each alignment block in an AXT file contains three lines: a summary line and 2 sequence lines. Blocks are separated from one another by blank lines.
-
-- **FASTA format** a text-based format for representing both nucleic and protein sequences, in which base pairs or proteins are represented using a single-letter code.
-
-  - This format contains an one line header. It starts with a " >" symbol. The first word on this line is the name of the sequence. The rest of the line is a description of the sequence.
-  - The remaining lines contain the sequence itself.
-  - Blank lines in a FASTA file are ignored, and so are spaces or other gap symbols (dashes, underscores, periods) in a sequence.
-  - Fasta files containing multiple sequences are just the same, with one sequence listed right after another. This format is accepted for many multiple sequence alignment programs.
-
------
-
-**Example**
-
-- AXT format::
-
-    0 chr19 3001012 3001075 chr11 70568380 70568443 - 3500
-    TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGA
-    TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGA
-
-    1 chr19 3008279 3008357 chr11 70573976 70574054 - 3900
-    CACAATCTTCACATTGAGATCCTGAGTTGCTGATCAGAATGGAAGGCTGAGCTAAGATGAGCGACGAGGCAATGTCACA
-    CACAGTCTTCACATTGAGGTACCAAGTTGTGGATCAGAATGGAAAGCTAGGCTATGATGAGGGACAGTGCGCTGTCACA
-
-- Convert the above file to concatenated FASTA format::
-
-    &gt;hg16
-    TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGACACAATCTTCACATTGAGATCCTGAGTTGCTGATCAGAATGGAAGGCTGAGCTAAGATGAGCGACGAGGCAATGTCACA
-    &gt;mm5
-    TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGACACAGTCTTCACATTGAGGTACCAAGTTGTGGATCAGAATGGAAAGCTAGGCTATGATGAGGGACAGTGCGCTGTCACA
-
-  </help>
-</tool>
--- a/tools/filters/axt_to_fasta.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-"""
-Adapted from bx/scripts/axt_to_fasta.py
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
-import sys
-import bx.align.axt
-
-def usage(s=None):
-	message = """
-axt_to_fasta species1 species2 < axt_file > fasta_file
-"""
-	if (s == None): sys.exit (message)
-	else:           sys.exit ("%s\n%s" % (s,message))
-
-
-def main():
-
-	# check the command line
-	species1 = sys.argv[1]
-	species2 = sys.argv[2]
-
-	# convert the alignment blocks
-
-	reader = bx.align.axt.Reader(sys.stdin,support_ids=True,\
-	                             species1=species1,species2=species2)
-
-	for a in reader:
-		if ("id" in a.attributes): id = a.attributes["id"]
-		else:                      id = None
-		print_component_as_fasta(a.components[0],id)
-		print_component_as_fasta(a.components[1],id)
-		print
-
-
-# $$$ this should be moved to a bx.align.fasta module
-
-def print_component_as_fasta(c,id=None):
-	header = ">%s_%s_%s" % (c.src,c.start,c.start+c.size)
-	if (id != None): header += " " + id
-	print header
-	print c.text
-
-
-if __name__ == "__main__": main()
-
--- a/tools/filters/axt_to_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-<tool id="axt_to_fasta" name="AXT to FASTA">
-  <description>Converts an AXT formatted file to FASTA format</description>
-  <command interpreter="python">axt_to_fasta.py $dbkey_1 $dbkey_2 &lt; $axt_input &gt; $out_file1</command>
-  <inputs>
-    <param format="axt" name="axt_input" type="data" label="AXT file"/>
-    <param name="dbkey_1" type="genomebuild" label="Genome"/>
-    <param name="dbkey_2" type="genomebuild" label="Genome"/>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="axt_input" value="1.axt" ftype="axt" />
-      <param name="dbkey_1" value="hg17" />
-      <param name="dbkey_2" value="panTro1" />
-      <output name="out_file1" file="axt_to_fasta.dat" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**IMPORTANT**: AXT formatted alignments will be phased out from Galaxy in the coming weeks. They will be replaced with pairwise MAF alignments, which are already available. To try pairwise MAF alignments use "Extract Pairwise MAF blocks" tool in *Fetch Sequences and Alignments* section.
-
---------
-
-
-**Syntax**
-
-This tool converts an AXT formatted file to the FASTA format.
-
-- **AXT format** The alignments are produced from Blastz, an alignment tool available from Webb Miller's lab at Penn State University. The lav format Blastz output, which does not include the sequence, was converted to AXT format with lavToAxt. Each alignment block in an AXT file contains three lines: a summary line and 2 sequence lines. Blocks are separated from one another by blank lines.
-
-- **FASTA format** a text-based format for representing both nucleic and protein sequences, in which base pairs or proteins are represented using a single-letter code.
-
-  - This format contains an one line header. It starts with a " >" symbol. The first word on this line is the name of the sequence. The rest of the line is a description of the sequence.
-  - The remaining lines contain the sequence itself.
-  - Blank lines in a FASTA file are ignored, and so are spaces or other gap symbols (dashes, underscores, periods) in a sequence.
-  - Fasta files containing multiple sequences are just the same, with one sequence listed right after another. This format is accepted for many multiple sequence alignment programs.
-
------
-
-**Example**
-
-- AXT format::
-
-    0 chr19 3001012 3001075 chr11 70568380 70568443 - 3500
-    TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGA
-    TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGA
-
-    1 chr19 3008279 3008357 chr11 70573976 70574054 - 3900
-    CACAATCTTCACATTGAGATCCTGAGTTGCTGATCAGAATGGAAGGCTGAGCTAAGATGAGCGACGAGGCAATGTCACA
-    CACAGTCTTCACATTGAGGTACCAAGTTGTGGATCAGAATGGAAAGCTAGGCTATGATGAGGGACAGTGCGCTGTCACA
-
-- Convert the above file to FASTA format::
-
-    &gt;hg16.chr19(+):3001012-3001075|hg16_0
-    TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGA
-    &gt;mm5.chr11(-):70568380-70568443|mm5_0
-    TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGA
-
-    &gt;hg16.chr19(+):3008279-3008357|hg16_1
-    CACAATCTTCACATTGAGATCCTGAGTTGCTGATCAGAATGGAAGGCTGAGCTAAGATGAGCGACGAGGCAATGTCACA
-    &gt;mm5.chr11(-):70573976-70574054|mm5_1
-    CACAGTCTTCACATTGAGGTACCAAGTTGTGGATCAGAATGGAAAGCTAGGCTATGATGAGGGACAGTGCGCTGTCACA
-
-  </help>
-</tool>
--- a/tools/filters/axt_to_lav.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,176 +0,0 @@
-#!/usr/bin/env python
-"""
-Application to convert AXT file to LAV file
--------------------------------------------
-
-:Author: Bob Harris (rsharris@bx.psu.edu)
-:Version: $Revision: $
-
-The application reads an AXT file from standard input and writes a LAV file to
-standard out;  some statistics are written to standard error.
-"""
-
-import sys, copy
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import bx.align.axt
-import bx.align.lav
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def usage(s=None):
-    message = """
-axt_to_lav primary_spec secondary_spec [--silent] < axt_file > lav_file
-  Each spec is of the form seq_file[:species_name]:lengths_file.
-
-  seq_file should be a format string for the file names for the individual
-  sequences, with %s to be replaced by the alignment's src field.  For example,
-  "hg18/%s.nib" would prescribe files named "hg18/chr1.nib", "hg18/chr2.nib",
-  etc.
-
-  species_name is optional.  If present, it is prepended to the alignment's src
-  field.
-
-  Lengths files provide the length of each chromosome (lav format needs this
-  information but axt file does not contain it).  The format is a series of
-  lines of the form
-    <chromosome name> <length>
-  The chromosome field in each axt block must match some <chromosome name> in
-  the lengths file.
-"""
-    if (s == None): sys.exit (message)
-    else:           sys.exit ("%s\n%s" % (s,message))
-
-
-def main():
-    global debug
-
-    # parse the command line
-
-    primary   = None
-    secondary = None
-    silent    = False
-
-    # pick off options
-
-    args = sys.argv[1:]
-    seq_file2 = open(args.pop(-1),'w')
-    seq_file1 = open(args.pop(-1),'w')
-    lav_out = args.pop(-1)
-    axt_in = args.pop(-1)
-    while (len(args) > 0):
-        arg = args.pop(0)
-        val = None
-        fields = arg.split("=",1)
-        if (len(fields) == 2):
-            arg = fields[0]
-            val = fields[1]
-            if (val == ""):
-                usage("missing a value in %s=" % arg)
-
-        if (arg == "--silent") and (val == None):
-            silent = True
-        elif (primary == None) and (val == None):
-            primary = arg
-        elif (secondary == None) and (val == None):
-            secondary = arg
-        else:
-            usage("unknown argument: %s" % arg)
-
-    if (primary == None):
-        usage("missing primary file name and length")
-
-    if (secondary == None):
-        usage("missing secondary file name and length")
-
-    try:
-        (primaryFile,primary,primaryLengths) = parse_spec(primary)
-    except:
-        usage("bad primary spec (must be seq_file[:species_name]:lengths_file")
-
-    try:
-        (secondaryFile,secondary,secondaryLengths) = parse_spec(secondary)
-    except:
-        usage("bad secondary spec (must be seq_file[:species_name]:lengths_file")
-
-    # read the lengths
-
-    speciesToLengths = {}
-    speciesToLengths[primary]   = read_lengths (primaryLengths)
-    speciesToLengths[secondary] = read_lengths (secondaryLengths)
-
-    # read the alignments
-
-    out = bx.align.lav.Writer(open(lav_out,'w'), \
-            attributes = { "name_format_1" : primaryFile,
-                           "name_format_2" : secondaryFile })
-
-    axtsRead = 0
-    axtsWritten = 0
-    for axtBlock in bx.align.axt.Reader(open(axt_in), \
-            species_to_lengths = speciesToLengths,
-            species1           = primary,
-            species2           = secondary,
-            support_ids        = True):
-        axtsRead += 1
-        out.write (axtBlock)
-        primary_c = axtBlock.get_component_by_src_start(primary)
-        secondary_c = axtBlock.get_component_by_src_start(secondary)
-
-        print >>seq_file1, ">%s_%s_%s_%s" % (primary_c.src,secondary_c.strand,primary_c.start,primary_c.start+primary_c.size)
-        print >>seq_file1,primary_c.text
-        print >>seq_file1
-
-        print >>seq_file2, ">%s_%s_%s_%s" % (secondary_c.src,secondary_c.strand,secondary_c.start,secondary_c.start+secondary_c.size)
-        print >>seq_file2,secondary_c.text
-        print >>seq_file2
-        axtsWritten += 1
-
-    out.close()
-    seq_file1.close()
-    seq_file2.close()
-
-    if (not silent):
-        sys.stdout.write ("%d blocks read, %d written\n" % (axtsRead,axtsWritten))
-
-def parse_spec(spec): # returns (seq_file,species_name,lengths_file)
-    fields = spec.split(":")
-    if   (len(fields) == 2): return (fields[0],"",fields[1])
-    elif (len(fields) == 3): return (fields[0],fields[1],fields[2])
-    else:                    raise ValueError
-
-def read_lengths (fileName):
-
-    chromToLength = {}
-
-    f = file (fileName, "r")
-
-    for lineNumber,line in enumerate(f):
-        line = line.strip()
-        if (line == ""): continue
-        if (line.startswith("#")): continue
-
-        fields = line.split ()
-        if (len(fields) != 2):
-            raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
-
-        chrom = fields[0]
-        try:
-            length = int(fields[1])
-        except:
-            raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
-
-        if (chrom in chromToLength):
-            raise "%s appears more than once (%s:%d): %s" \
-                % (chrom,fileName,lineNumber)
-
-        chromToLength[chrom] = length
-
-    f.close ()
-
-    return chromToLength
-
-
-if __name__ == "__main__": main()
-
--- a/tools/filters/axt_to_lav.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-<tool id="axt_to_lav_1" name="AXT to LAV">
-  <description>Converts an AXT formatted file to LAV format</description>
-  <command interpreter="python">axt_to_lav.py /galaxy/data/$dbkey_1/seq/%s.nib:$dbkey_1:${GALAXY_DATA_INDEX_DIR}/shared/ucsc/chrom/${dbkey_1}.len /galaxy/data/$dbkey_2/seq/%s.nib:$dbkey_2:${GALAXY_DATA_INDEX_DIR}/shared/ucsc/chrom/${dbkey_2}.len $align_input $lav_file $seq_file1 $seq_file2</command>
-  <inputs>
-    <param name="align_input" type="data" format="axt" label="Alignment File" optional="False"/>
-    <param name="dbkey_1" type="genomebuild" label="Genome"/>
-    <param name="dbkey_2" type="genomebuild" label="Genome"/>
-  </inputs>
-  <outputs>
-    <data name="lav_file" format="lav"/>
-    <data name="seq_file1" format="fasta" parent="lav_file"/>
-    <data name="seq_file2" format="fasta" parent="lav_file"/>
-  </outputs>
-  <help>
-
-.. class:: warningmark
-
-**IMPORTANT**: AXT formatted alignments will be phased out from Galaxy in the coming weeks. They will be replaced with pairwise MAF alignments, which are already available. To try pairwise MAF alignments use "Extract Pairwise MAF blocks" tool in *Fetch Sequences and Alignments* section.
-
---------
-
-
-**Syntax**
-
-This tool converts an AXT formatted file to the LAV format.
-
-- **AXT format** The alignments are produced from Blastz, an alignment tool available from Webb Miller's lab at Penn State University. The lav format Blastz output, which does not include the sequence, was converted to AXT format with lavToAxt. Each alignment block in an AXT file contains three lines: a summary line and 2 sequence lines. Blocks are separated from one another by blank lines.
-
-- **LAV format** LAV is an alignment format developed by Webb Miller's group. It is the primary output format for BLASTZ.
-
-- **FASTA format** a text-based format for representing both nucleic and protein sequences, in which base pairs or proteins are represented using a single-letter code.
-
-  - This format contains an one line header. It starts with a ">" symbol. The first word on this line is the name of the sequence. The rest of the line is a description of the sequence.
-  - The remaining lines contain the sequence itself.
-  - Blank lines in a FASTA file are ignored, and so are spaces or other gap symbols (dashes, underscores, periods) in a sequence.
-  - Fasta files containing multiple sequences are just the same, with one sequence listed right after another. This format is accepted for many multiple sequence alignment programs.
-
------
-
-**Example**
-
-- AXT format::
-
-    0 chr19 3001012 3001075 chr11 70568380 70568443 - 3500
-    TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGA
-    TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGA
-
-    1 chr19 3008279 3008357 chr11 70573976 70574054 - 3900
-    CACAATCTTCACATTGAGATCCTGAGTTGCTGATCAGAATGGAAGGCTGAGCTAAGATGAGCGACGAGGCAATGTCACA
-    CACAGTCTTCACATTGAGGTACCAAGTTGTGGATCAGAATGGAAAGCTAGGCTATGATGAGGGACAGTGCGCTGTCACA
-
-- Convert the above file to LAV format::
-
-    #:lav
-    s {
-      &quot;/galaxy/data/hg16/seq/chr19.nib&quot; 1 63811651 0 1
-      &quot;/galaxy/data/mm5/seq/chr11.nib-&quot; 1 121648857 0 1
-    }
-    h {
-      &quot;> hg16.chr19&quot;
-      &quot;> mm5.chr11 (reverse complement)&quot;
-    }
-    a {
-      s 3500
-      b 3001012 70568380
-      e 3001075 70568443
-      l 3001012 70568380 3001075 70568443 81
-    }
-    a {
-      s 3900
-      b 3008279 70573976
-      e 3008357 70574054
-      l 3008279 70573976 3008357 70574054 78
-    }
-    #:eof
-
-- With two files in the FASTA format::
-
-    &gt;hg16.chr19_-_3001011_3001075
-    TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGA
-
-    &gt;hg16.chr19_-_3008278_3008357
-    CACAATCTTCACATTGAGATCCTGAGTTGCTGATCAGAATGGAAGGCTGAGCTAAGATGAGCGACGAGGCAATGTCACA
-
- **and**::
-
-    &gt;mm5.chr11_-_70568379_70568443
-    TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGA
-
-    &gt;mm5.chr11_-_70573975_70574054
-    CACAGTCTTCACATTGAGGTACCAAGTTGTGGATCAGAATGGAAAGCTAGGCTATGATGAGGGACAGTGCGCTGTCACA
-  </help>
-  <code file="axt_to_lav_code.py"/>
-</tool>
--- a/tools/filters/axt_to_lav_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    for name,data in out_data.items():
-        if name == "seq_file2":
-            data.dbkey = param_dict['dbkey_2']
-            app.model.context.add( data )
-            app.model.context.flush()
-            break
\ No newline at end of file
--- a/tools/filters/bed2gff.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-<tool id="bed2gff1" name="BED-to-GFF" version="2.0.0">
-  <description>converter</description>
-  <command interpreter="python">bed_to_gff_converter.py $input $out_file1</command>
-  <inputs>
-    <param format="bed" name="input" type="data" label="Convert this query"/>
-  </inputs>
-  <outputs>
-    <data format="gff" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="9.bed"/>
-      <output name="out_file1" file="bed2gff_out.gff"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool converts data from BED format to GFF format (scroll down for format description).
-
---------
-
-**Example**
-
-The following data in BED format::
-
-	chr28	346187	388197	BC114771	0	+	346187	388197	0	9	144,81,115,63,155,96,134,105,112,	0,24095,26190,31006,32131,33534,36994,41793,41898,
-
-Will be converted to GFF (**note** that the start coordinate is incremented by 1)::
-
-	##gff-version 2
-	##bed_to_gff_converter.py
-
-	chr28	bed2gff	mRNA	346188	388197	0	+	.	mRNA BC114771;
-	chr28	bed2gff	exon	346188	346331	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	370283	370363	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	372378	372492	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	377194	377256	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	378319	378473	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	379722	379817	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	383182	383315	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	387981	388085	0	+	.	exon BC114771;
-	chr28	bed2gff	exon	388086	388197	0	+	.	exon BC114771;
-
-
-------
-
-.. class:: informark
-
-**About formats**
-
-**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones:
-
-The first three BED fields (required) are::
-
-    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
-    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
-    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
-
-The additional BED fields (optional) are::
-
-    4. name - The name of the BED line.
-    5. score - A score between 0 and 1000.
-    6. strand - Defines the strand - either '+' or '-'.
-    7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
-    8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
-    9. reserved - This should always be set to zero.
-   10. blockCount - The number of blocks (exons) in the BED line.
-   11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
-   12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
-   13. expCount - The number of experiments.
-   14. expIds - A comma-separated list of experiment ids. The number of items in this list should correspond to expCount.
-   15. expScores - A comma-separated list of experiment scores. All of the expScores should be relative to expIds. The number of items in this list should correspond to expCount.
-
-**GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::
-
-    1. seqname - Must be a chromosome or scaffold.
-    2. source - The program that generated this feature.
-    3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
-    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
-    5. end - The ending position of the feature (inclusive).
-    6. score - A score between 0 and 1000. If there is no score value, enter ".".
-    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
-    8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
-    9. group - All lines with the same group are linked together into a single item.
-
-</help>
-</tool>
--- a/tools/filters/bed_to_bigbed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-<tool id="bed_to_bigBed" name="BED-to-bigBed" version="1.0.0">
-  <description>converter</description>
-  <command>bedToBigBed $input1 $chromInfo $out_file1
-    #if $settings.settingsType == "full":
-      -blockSize=${settings.blockSize} -itemsPerSlot=${settings.itemsPerSlot} ${settings.unc}
-    #end if
-    2&gt;&amp;1 || echo "Error running bedToBigBed." >&amp;2
-  </command>
-  <requirements>
-    <requirement type="package">ucsc_tools</requirement>
-  </requirements>
-  <inputs>
-    <param format="bed" name="input1" type="data" label="Convert">
-      <validator type="unspecified_build" />
-    </param>
-    <conditional name="settings">
-      <param name="settingsType" type="select" label="Converter settings to use" help="Default settings should usually be used.">
-        <option value="preset">Default</option>
-        <option value="full">Full parameter list</option>
-      </param>
-      <when value="preset" />
-      <when value="full">
-        <param name="blockSize" size="4" type="integer" value="256" label="Items to bundle in r-tree" help="Default is 256 (blockSize)" />
-        <param name="itemsPerSlot" size="4" type="integer" value="512" label="Data points bundled at lowest level" help="Default is 512 (itemsPerSlot)" />
-        <param name="unc" type="boolean" truevalue="-unc" falsevalue="" checked="False" label="Do not use compression" help="(unc)"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="bigbed" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="7.bed" dbkey="hg17" />
-      <param name="settingsType" value="full" />
-      <param name="blockSize" value="256" />
-      <param name="itemsPerSlot" value="512" />
-      <param name="unc" value="False" />
-      <output name="out_file1" file="7.bigbed"/>
-    </test>
-    <test>
-      <param name="input1" value="7.bed" dbkey="hg17" />
-      <param name="settingsType" value="preset" />
-      <output name="out_file1" file="7.bigbed"/>
-    </test>
-  </tests>
-  <help>
-
-This tool converts a **sorted** BED file into a bigBed file.
-
-Currently, the bedFields option to specify the number of non-standard fields is not supported as an AutoSQL file must be provided, which is a format
-currently not supported by Galaxy.
-
-</help>
-</tool>
--- a/tools/filters/bed_to_gff_converter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-# This code exists in 2 places: ~/datatypes/converters and ~/tools/filters
-import sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    input_name = sys.argv[1]
-    output_name = sys.argv[2]
-    skipped_lines = 0
-    first_skipped_line = 0
-    out = open( output_name, 'w' )
-    out.write( "##gff-version 2\n" )
-    out.write( "##bed_to_gff_converter.py\n\n" )
-    i = 0
-    for i, line in enumerate( file( input_name ) ):
-        complete_bed = False
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and not line.startswith( 'track' ) and not line.startswith( 'browser' ):
-            try:
-                elems = line.split( '\t' )
-                if len( elems ) == 12:
-                    complete_bed = True
-                chrom = elems[0]
-                if complete_bed:
-                    feature = "mRNA"
-                else:
-                    try:
-                        feature = elems[3]
-                    except:
-                        feature = 'feature%d' % ( i + 1 )
-                start = int( elems[1] ) + 1
-                end = int( elems[2] )
-                try:
-                    score = elems[4]
-                except:
-                    score = '0'
-                try:
-                    strand = elems[5]
-                except:
-                    strand = '+'
-                try:
-                    group = elems[3]
-                except:
-                    group = 'group%d' % ( i + 1 )
-                if complete_bed:
-                    out.write( '%s\tbed2gff\t%s\t%d\t%d\t%s\t%s\t.\t%s %s;\n' % ( chrom, feature, start, end, score, strand, feature, group  ) )
-                else:
-                    out.write( '%s\tbed2gff\t%s\t%d\t%d\t%s\t%s\t.\t%s;\n' % ( chrom, feature, start, end, score, strand, group  ) )
-                if complete_bed:
-                    # We have all the info necessary to annotate exons for genes and mRNAs
-                    block_count = int( elems[9] )
-                    block_sizes = elems[10].split( ',' )
-                    block_starts = elems[11].split( ',' )
-                    for j in range( block_count ):
-                        exon_start = int( start ) + int( block_starts[j] )
-                        exon_end = exon_start + int( block_sizes[j] ) - 1
-                        out.write( '%s\tbed2gff\texon\t%d\t%d\t%s\t%s\t.\texon %s;\n' % ( chrom, exon_start, exon_end, score, strand, group ) )
-            except:
-                skipped_lines += 1
-                if not first_skipped_line:
-                    first_skipped_line = i + 1
-        else:
-            skipped_lines += 1
-            if not first_skipped_line:
-                first_skipped_line = i + 1
-    out.close()
-    info_msg = "%i lines converted to GFF version 2.  " % ( i + 1 - skipped_lines )
-    if skipped_lines > 0:
-        info_msg += "Skipped %d blank/comment/invalid lines starting with line #%d." %( skipped_lines, first_skipped_line )
-    print info_msg
-
-if __name__ == "__main__": __main__()
--- a/tools/filters/catWrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-#By, Guruprasad Ananda.
-
-from galaxy import eggs
-import sys, os
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-    outfile = sys.argv[1]
-    infile = sys.argv[2]
-
-    try:
-        fout = open(sys.argv[1],'w')
-    except:
-        stop_err("Output file cannot be opened for writing.")
-
-    try:
-        fin = open(sys.argv[2],'r')
-    except:
-        stop_err("Input file cannot be opened for reading.")
-
-    if len(sys.argv) < 4:
-        os.system("cp %s %s" %(infile,outfile))
-        sys.exit()
-
-    cmdline = "cat %s " %(infile)
-    for inp in sys.argv[3:]:
-        cmdline = cmdline + inp + " "
-    cmdline = cmdline + ">" + outfile
-    try:
-        os.system(cmdline)
-    except:
-        stop_err("Error encountered with cat.")
-
-if __name__ == "__main__": main()
\ No newline at end of file
--- a/tools/filters/catWrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-<tool id="cat1" name="Concatenate datasets">
-    <description>tail-to-head</description>
-    <command interpreter="python">
-        catWrapper.py
-        $out_file1
-        $input1
-        #for $q in $queries
-            ${q.input2}
-        #end for
-    </command>
-    <inputs>
-        <param name="input1" type="data" label="Concatenate Dataset"/>
-        <repeat name="queries" title="Dataset">
-            <param name="input2" type="data" label="Select" />
-        </repeat>
-    </inputs>
-    <outputs>
-        <data name="out_file1" format="input" metadata_source="input1"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="input1" value="1.bed"/>
-            <param name="input2" value="2.bed"/>
-            <output name="out_file1" file="cat_wrapper_out1.bed"/>
-        </test>
-        <!--TODO: if possible, enhance the underlying test code to handle this test
-            the problem is multiple params with the same name "input2"
-        <test>
-            <param name="input1" value="1.bed"/>
-            <param name="input2" value="2.bed"/>
-            <param name="input2" value="3.bed"/>
-            <output name="out_file1" file="cat_wrapper_out2.bed"/>
-        </test>
-        -->
-    </tests>
-    <help>
-
-.. class:: warningmark
-
-**WARNING:** Be careful not to concatenate datasets of different kinds (e.g., sequences with intervals). This tool does not check if the datasets being concatenated are in the same format.
-
------
-
-**What it does**
-
-Concatenates datasets
-
------
-
-**Example**
-
-Concatenating Dataset::
-
-    chrX  151087187  151087355  A  0  -
-    chrX  151572400  151572481  B  0  +
-
-with Dataset1::
-
-    chr1  151242630  151242955  X  0  +
-    chr1  151271715  151271999  Y  0  +
-    chr1  151278832  151279227  Z  0  -
-
-and with Dataset2::
-
-    chr2  100000030  200000955  P  0  +
-    chr2  100000015  200000999  Q  0  +
-
-will result in the following::
-
-    chrX  151087187  151087355  A  0  -
-    chrX  151572400  151572481  B  0  +
-    chr1  151242630  151242955  X  0  +
-    chr1  151271715  151271999  Y  0  +
-    chr1  151278832  151279227  Z  0  -
-    chr2  100000030  200000955  P  0  +
-    chr2  100000015  200000999  Q  0  +
-
-    </help>
-</tool>
--- a/tools/filters/changeCase.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-
-my $columns = {};
-my $del = "";
-my @in = ();
-my @out = ();
-my $command = "";
-my $field = 0;
-
-# a wrapper for changing the case of columns from within galaxy
-# isaChangeCase.pl [filename] [columns] [delim] [casing] [output]
-
-die "Check arguments: $0 [filename] [columns] [delim] [casing] [output]\n" unless @ARGV == 5;
-
-# process column input
-$ARGV[1] =~ s/\s+//g;
-foreach ( split /,/, $ARGV[1] ) {
-  if (m/^c\d{1,}$/i) {
-    s/c//ig;
-    $columns->{$_} = --$_;
-  }
-}
-
-die "No columns specified, columns are not preceeded with 'c', or commas are not used to separate column numbers: $ARGV[1]\n" if keys %$columns == 0;
-
-my $column_delimiters_href = {
-	'TAB' => q{\t},
-	'COMMA' => ",",
-	'DASH' => "-",
-	'UNDERSCORE' => "_",
-	'PIPE' => q{\|},
-	'DOT' => q{\.},
-	'SPACE' => q{\s+}
-};
-
-$del = $column_delimiters_href->{$ARGV[2]};
-
-open (OUT, ">$ARGV[4]") or die "Cannot create $ARGV[4]:$!\n";
-open (IN,  "<$ARGV[0]") or die "Cannot open $ARGV[0]:$!\n";
-while (<IN>) {
-  chop;
-  @in = split /$del/;
-  for ( my $i = 0; $i <= $#in; ++$i) {
-	if (exists $columns->{$i}) {
-		push(@out, $ARGV[3] eq 'up' ? uc($in[$i]) : lc($in[$i]));
-	} else {
-		push(@out, $in[$i]);
-	}
-  }
-  print OUT join("\t",@out), "\n";
-  @out = ();
-}
-close IN;
-
-close OUT;
--- a/tools/filters/changeCase.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-<tool id="ChangeCase" name="Change Case">
-  <description> of selected columns</description>
-  <command interpreter="perl">changeCase.pl $input "$cols" $delimiter $casing $out_file1</command>
-  <inputs>
-    <param name="input" format="txt" type="data" label="From"/>
-    <param name="cols" size="10" type="text" value="c1,c2" label="Change case of columns"/>
-    <param name="delimiter" type="select" label="Delimited by">
-      <option value="TAB">Tab</option>
-      <option value="SPACE">Whitespace</option>
-      <option value="DOT">Dot</option>
-      <option value="COMMA">Comma</option>
-      <option value="DASH">Dash</option>
-      <option value="UNDERSCORE">Underscore</option>
-      <option value="PIPE">Pipe</option>
-    </param>
-    <param name="casing" type="select" label="To">
-      <option value="up">Upper case</option>
-      <option value="lo">Lower case</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="1.txt" ftype="txt"/>
-      <param name="cols" value="c1"/>
-      <param name="delimiter" value="SPACE"/>
-      <param name="casing" value="up"/>
-      <output name="out_file1" file="changeCase_out1.tabular"/>
-    </test>
-    <test>
-      <param name="input" value="1.bed" ftype="bed"/>
-      <param name="cols" value="c1"/>
-      <param name="delimiter" value="TAB"/>
-      <param name="casing" value="up"/>
-      <output name="out_file1" file="changeCase_out2.tabular"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**This tool breaks column assignments.** To re-establish column assignments run the tool and click on the pencil icon in the resulting history item.
-
-.. class:: warningmark
-
-The format of the resulting dataset from this tool is always tabular.
-
------
-
-**What it does**
-
-This tool selects specified columns from a dataset and converts the values of those columns to upper or lower case.
-
-- Columns are specified as **c1**, **c2**, and so on.
-- Columns can be specified in any order (e.g., **c2,c1,c6**)
-
------
-
-**Example**
-
-Changing columns 1 and 3 ( delimited by Comma ) to upper case in::
-
-  apple,is,good
-  windows,is,bad
-
-will result in::
-
-  APPLE is GOOD
-  WINDOWS is BAD
-
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/filters/commWrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-use File::Temp "tempfile";
-#use POSIX qw(tmpnam);
-
-my ($input1, $input2, $mode, $out_file1) = @ARGV;
-
-my ($fh, $file1) = tempfile();
-my ($fh1,$file2) = tempfile();
-
-`sort $input1 > $file1`;
-`sort $input2 > $file2`;
-`comm $mode $file1 $file2 > $out_file1`;
-`rm $file1 ; rm $file2`;
-
-
-
--- a/tools/filters/commWrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-<tool id="Comm1" name="Find Similarities and Differences">
-  <description>between two datasets</description>
-  <command interpreter="perl">commWrapper.pl $input1 $input2 $mode $out_file1</command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Compare Dataset1"/>
-    <param format="tabular" name="input2" type="data" label="with Dataset2"/>
-    <param name="mode" type="select" label="And find">
-      <option value="-23">Lines unique to Dataset1</option>
-      <option value="-12">Lines shared between Dataset1 and Dataset2</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-  </outputs>
-  <help>
-This tool is based on UNIX shell command comm. It compares two datasets and returns similarities or differences. For example, if you have two datasets::
-
- a  1
- b  2
- c  3
-
-and::
-
- a  1
- f  6
- h  8
-
-Using this tool with **Lines unique to Dataset1** option will return::
-
- b  2
- c  3
-
-If you use **Lines shared between Dataset1 and Dataset2** option output will look like this::
-
- a  1
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/filters/compare.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-<tool id="comp1" name="Compare two Datasets" version="1.0.2">
-  <description>to find common or distinct rows</description>
-  <command interpreter="python">joinWrapper.py $input1 $input2 $field1 $field2 $mode $out_file1</command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Compare"/>
-    <param name="field1" label="Using column" type="data_column" data_ref="input1">
-        <validator type="no_options" message="Invalid column choice. Please try again after editing metadata of your input dataset by clicking on the pencil icon next to it."/>
-    </param>
-    <param format="tabular" name="input2" type="data" label="against" />
-    <param name="field2" label="and column" type="data_column" data_ref="input2">
-            <validator type="no_options" message="Invalid column choice. Please try again after editing metadata of your input dataset by clicking on the pencil icon next to it."/>
-    </param>
-    <param name="mode" type="select" label="To find" help="See examples below for explanation of these options">
-      <option value="N">Matching rows of 1st dataset</option>
-      <option value="V">Non Matching rows of 1st dataset</option>
-    </param>
-  </inputs>
-  <outputs>
-     <data format="input" name="out_file1" metadata_source="input1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="input2" value="2.bed"/>
-      <param name="field1" value="2"/>
-      <param name="field2" value="2"/>
-      <param name="mode" value="N"/>
-      <output name="out_file1" file="fs-compare.dat"/>
-    </test>
-    <!--test case with duplicated key values-->
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="input2" value="3.bed"/>
-      <param name="field1" value="1"/>
-      <param name="field2" value="1"/>
-      <param name="mode" value="V"/>
-      <output name="out_file1" file="fs-compare-2.dat"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-This tool finds lines in one dataset that HAVE or DO NOT HAVE a common field with another dataset.
-
------
-
-**Example**
-
-If this is **First dataset**::
-
-  chr1 10 20 geneA
-  chr1 50 80 geneB
-  chr5 10 40 geneL
-
-and this is **Second dataset**::
-
-  geneA tumor-suppressor
-  geneB Foxp2
-  geneC Gnas1
-  geneE INK4a
-
-Finding lines of the **First dataset** whose 4th column matches the 1st column of the **Second dataset** yields::
-
-  chr1 10 20 geneA
-  chr1 50 80 geneB
-
-Conversely, using option **Non Matching rows of First dataset** on the same fields will yield::
-
-  chr5 10 40 geneL
-
-</help>
-</tool>
--- a/tools/filters/condense_characters.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-
-# condenses all consecutive characters of one type
-# convert_characters.pl [input] [character] [output]
-
-die "Check arguments" unless @ARGV == 3;
-
-my $inputfile = $ARGV[0];
-my $character = $ARGV[1];
-my $outputfile = $ARGV[2];
-
-
-my $convert_from;
-my $convert_to;
-
-
-if ($character eq "s")
-{
-    $convert_from = '\s';
-}
-elsif ($character eq "T")
-{
-    $convert_from = '\t';
-}
-elsif ($character eq "Sp")
-{
-    $convert_from = " ";
-}
-elsif ($character eq "Dt")
-{
-    $convert_from = '\.';
-}
-elsif ($character eq "C")
-{
-    $convert_from = ",";
-}
-elsif ($character eq "D")
-{
-    $convert_from = "-";
-}
-elsif ($character eq "U")
-{
-    $convert_from = "_";
-}
-elsif ($character eq "P")
-{
-    $convert_from = '\|';
-}
-else
-{
-    die "Invalid value specified for convert from\n";
-}
-
-
-if ($character eq "T")
-{
-    $convert_to = "\t";
-}
-elsif ($character eq "Sp")
-{
-    $convert_to = " ";
-}
-elsif ($character eq "Dt")
-{
-    $convert_to = "\.";
-}
-elsif ($character eq "C")
-{
-    $convert_to = ",";
-}
-elsif ($character eq "D")
-{
-    $convert_to = "-";
-}
-elsif ($character eq "U")
-{
-    $convert_to = "_";
-}
-elsif ($character eq "P")
-{
-    $convert_to = "|";
-}
-else
-{
-    die "Invalid value specified for Convert to\n";
-}
-
-my $fhIn;
-open ($fhIn, "< $inputfile") or die "Cannot open source file";
-
-my $fhOut;
-open ($fhOut, "> $outputfile");
-
-while (<$fhIn>)
-{
-    my $thisLine = $_;
-    chomp $thisLine;
-    $thisLine =~ s/${convert_from}+/$convert_to/g;
-    print $fhOut $thisLine,"\n";
-}
-close ($fhIn) or die "Cannot close source file";
-close ($fhOut) or die "Cannot close output file";
--- a/tools/filters/condense_characters.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-<tool id="Condense characters1" name="Condense">
-  <description>consecutive characters</description>
-  <command interpreter="perl">condense_characters.pl $input $character $out_file1</command>
-  <inputs>
-<!--    <display>condense all consecutive $character from $input</display> -->
-    <param name="character" type="select" label="Condense all consecutive">
-      <option value="T">Tabs</option>
-      <option value="Sp">Spaces</option>
-      <option value="Dt">Dots</option>
-      <option value="C">Commas</option>
-      <option value="D">Dashes</option>
-      <option value="U">Underscores</option>
-      <option value="P">Pipes</option>
-    </param>
-    <param format="txt" name="input" type="data" label="in this Query"/>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="character" value="T"/>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="eq-condense.dat"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool condenses all consecutive characters of a specified type.
-
------
-
-**Example**
-
-- Input file::
-
-    geneX,,,10,,,,,20
-    geneY,,5,,,,,12,15,9,
-
-- Condense all consecutive commas. The above file will be converted into::
-
-    geneX,10,20
-    geneY,5,12,15,9
-
-</help>
-</tool>
--- a/tools/filters/convert_characters.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-
-# converts all characters of one type into another
-# convert_characters.pl [input] [convert_from] [convert_to] [output]
-
-die "Check argument\n" unless @ARGV == 4;
-
-my $inputfile = $ARGV[0];
-my $convert_from = $ARGV[1];
-my $convert_to = $ARGV[2];
-my $outputfile = $ARGV[3];
-
-if ($convert_from eq "s")
-{
-    $convert_from = '\s';
-}
-elsif ($convert_from eq "T")
-{
-    $convert_from = '\t';
-}
-elsif ($convert_from eq "Sp")
-{
-    $convert_from = '\s';
-}
-elsif ($convert_from eq "Dt")
-{
-    $convert_from = '\.';
-}
-elsif ($convert_from eq "C")
-{
-    $convert_from = ",";
-}
-elsif ($convert_from eq "D")
-{
-    $convert_from = "-";
-}
-elsif ($convert_from eq "U")
-{
-    $convert_from = "_";
-}
-elsif ($convert_from eq "P")
-{
-    $convert_from = '\|';
-}
-else
-{
-    die "Invalid value specified for convert from\n";
-}
-
-
-if ($convert_to eq "T")
-{
-    $convert_to = "\t";
-}
-elsif ($convert_to eq "Sp")
-{
-    $convert_to = '\s';
-}
-elsif ($convert_to eq "Dt")
-{
-    $convert_to = "\.";
-}
-elsif ($convert_to eq "C")
-{
-    $convert_to = ",";
-}
-elsif ($convert_to eq "D")
-{
-    $convert_to = "-";
-}
-elsif ($convert_to eq "U")
-{
-    $convert_to = "_";
-}
-elsif ($convert_to eq "P")
-{
-    $convert_to = "|";
-}
-else
-{
-    die "Invalid value specified for convert to\n";
-}
-
-my $fhIn;
-open ($fhIn, "< $inputfile") or die "Cannot open source file";
-
-my $fhOut;
-open ($fhOut, "> $outputfile");
-
-while (<$fhIn>)
-{
-    my $thisLine = $_;
-    chomp $thisLine;
-    $thisLine =~ s/$convert_from{1,}/$convert_to/g;
-    print $fhOut $thisLine,"\n";
-}
-close ($fhIn) or die "Cannot close source file\n";
-close ($fhOut) or die "Cannot close output fil\n";
--- a/tools/filters/convert_characters.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-#By, Guruprasad Ananda.
-
-from galaxy import eggs
-import sys, re
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-    if len(sys.argv) != 4:
-        stop_err("usage: convert_characters infile from_char outfile")
-
-    try:
-        fin = open(sys.argv[1],'r')
-    except:
-        stop_err("Input file cannot be opened for reading.")
-
-    from_char = sys.argv[2]
-
-    try:
-        fout = open(sys.argv[3],'w')
-    except:
-        stop_err("Output file cannot be opened for writing.")
-
-    char_dict = {'T':'\t','s':'\s','Dt':'\.','C':',','D':'-','U':'_','P':'\|','Co':':'}
-    from_ch = char_dict[from_char] + '+'    #making an RE to match 1 or more occurences.
-    skipped = 0
-
-    for line in fin:
-        line = line.strip()
-        try:
-            fout.write("%s\n" %(re.sub(from_ch,'\t',line)))
-        except:
-            skipped += 1
-
-    if skipped:
-        print "Skipped %d lines as invalid." %skipped
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/tools/filters/convert_characters.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-<tool id="Convert characters1" name="Convert">
-  <description>delimiters to TAB</description>
-  <command interpreter="python">convert_characters.py $input $convert_from $out_file1</command>
-  <inputs>
-    <param name="convert_from" type="select" label="Convert all">
-      <option value="s">Whitespaces</option>
-      <option value="T">Tabs</option>
-      <!--<option value="Sp">Spaces</option>-->
-      <option value="Dt">Dots</option>
-      <option value="C">Commas</option>
-      <option value="D">Dashes</option>
-      <option value="U">Underscores</option>
-      <option value="P">Pipes</option>
-      <option value="Co">Colons</option>
-    </param>
-    <param format="txt" name="input" type="data" label="in Query"/>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="convert_from" value="s"/>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="eq-convert.dat"/>
-    </test>
-    <test>
-      <param name="convert_from" value="s"/>
-      <param name="input" value="a.txt"/>
-      <output name="out_file1" file="a.tab"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Converts all delimiters of a specified type into TABs.  Consecutive characters are condensed. For example, if columns are separated by 5 spaces they will converted into 1 tab.
-
------
-
-**Example**
-
-- Input file::
-
-    chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|-
-    chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|-
-    chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+
-    chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+
-
-- Converting all pipe delimiters of the above file to TABs will get::
-
-    chrX  151283558  151283724  NM_000808_exon_8_0_chrX_151283559_r  0  -
-    chrX  151370273  151370486  NM_000808_exon_9_0_chrX_151370274_r  0  -
-    chrX  151559494  151559583  NM_018558_exon_1_0_chrX_151559495_f  0  +
-    chrX  151564643  151564711  NM_018558_exon_2_0_chrX_151564644_f  0  +
-
-</help>
-</tool>
--- a/tools/filters/cutWrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-use warnings;
-
-my @columns = ();
-my $del = "";
-my @in = ();
-my @out = ();
-my $command = "";
-my $field = 0;
-
-# a wrapper for cut for use in galaxy
-# cutWrapper.pl [filename] [columns] [delim] [output]
-
-die "Check arguments\n" unless @ARGV == 4;
-
-$ARGV[1] =~ s/\s+//g;
-foreach ( split /,/, $ARGV[1] ) {
-  if (m/^c\d{1,}$/i) {
-    push (@columns, $_);
-    $columns[@columns-1] =~s/c//ig;
-  }
-}
-
-die "No columns specified, columns are not preceded with 'c', or commas are not used to separate column numbers: $ARGV[1]\n" if @columns == 0;
-
-my $column_delimiters_href = {
-  'T' => q{\t},
-  'C' => ",",
-  'D' => "-",
-  'U' => "_",
-  'P' => q{\|},
-  'Dt' => q{\.},
-  'Sp' => q{\s+}
-};
-
-$del = $column_delimiters_href->{$ARGV[2]};
-
-open (OUT, ">$ARGV[3]") or die "Cannot create $ARGV[2]:$!\n";
-open (IN,  "<$ARGV[0]") or die "Cannot open $ARGV[0]:$!\n";
-
-while (my $line=<IN>) {
-   if ($line =~ /^#/) {
-     #Ignore comment lines
-   } else {
-     chop($line);
-     @in = split(/$del/, $line);
-     foreach $field (@columns) {
-       if (defined($in[$field-1])) {
-         push(@out, $in[$field-1]);
-       } else {
-         push(@out, ".");
-       }
-     }
-     print OUT join("\t",@out), "\n";
-     @out = ();
-   }
-}
-
-#while (<IN>) {
-#  chop;
-#  @in = split /$del/;
-#  foreach $field (@columns) {
-#    if (defined($in[$field-1])) {
-#      push(@out, $in[$field-1]);
-#    } else {
-#      push(@out, ".");
-#    }
-#  }
-#  print OUT join("\t",@out), "\n";
-#  @out = ();
-#}
-close IN;
-
-close OUT;
-
--- a/tools/filters/cutWrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,202 +0,0 @@
-<tool id="Cut1" name="Cut" version="1.0.1">
-  <description>columns from a table</description>
-  <command interpreter="perl">cutWrapper.pl $input "$columnList" $delimiter $out_file1</command>
-  <inputs>
-    <param name="columnList" size="10" type="text" value="c1,c2" label="Cut columns"/>
-    <param name="delimiter" type="select" label="Delimited by">
-      <option value="T">Tab</option>
-      <option value="Sp">Whitespace</option>
-      <option value="Dt">Dot</option>
-      <option value="C">Comma</option>
-      <option value="D">Dash</option>
-      <option value="U">Underscore</option>
-      <option value="P">Pipe</option>
-    </param>
-    <param format="txt" name="input" type="data" label="From"/>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" >
-      <actions>
-        <conditional name="delimiter">
-          <when value="T">
-            <conditional name="input">
-              <when datatype_isinstance="interval">
-                <action type="format" default="tabular">
-                  <option type="from_param" name="columnList" column="0" offset="0"> <!-- chromCol is 1-->
-
-                    <filter type="insert_column" column="0" value="interval"/>
-
-                    <filter type="insert_column" ref="columnList" /> <!-- startCol -->
-
-                    <filter type="insert_column" ref="columnList" /> <!-- endCol -->
-
-                    <filter type="multiple_splitter" column="1" separator=","/>
-                    <filter type="column_strip" column="1"/> <!-- get rid of all external whitespace -->
-                    <filter type="string_function" column="1" name="lower" />
-                    <filter type="param_value" column="1" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                    <filter type="column_strip" column="1" strip="c"/> <!-- get rid of c's  -->
-                    <filter type="boolean" column="1" cast="int" />
-
-                    <filter type="multiple_splitter" column="2" separator=","/>
-                    <filter type="column_strip" column="2"/> <!-- get rid of all external whitespace -->
-                    <filter type="string_function" column="2" name="lower" />
-                    <filter type="param_value" column="2" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                    <filter type="column_strip" column="2" strip="c"/> <!-- get rid of c's  -->
-                    <filter type="boolean" column="2" cast="int" />
-
-                    <filter type="multiple_splitter" column="3" separator=","/>
-                    <filter type="column_strip" column="3"/> <!-- get rid of all external whitespace -->
-                    <filter type="string_function" column="3" name="lower" />
-                    <filter type="param_value" column="3" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                    <filter type="column_strip" column="3" strip="c"/> <!-- get rid of c's  -->
-                    <filter type="boolean" column="3" cast="int" />
-
-                    <filter type="metadata_value" ref="input" name="chromCol" column="1" />
-                    <filter type="metadata_value" ref="input" name="startCol" column="2" />
-                    <filter type="metadata_value" ref="input" name="endCol" column="3" />
-
-                  </option>
-                </action>
-
-                <conditional name="out_file1">
-                  <when datatype_isinstance="interval">
-                    <action type="metadata" name="chromCol">
-                      <option type="from_param" name="columnList" column="0" offset="0"> <!-- chromCol is 0-->
-                        <filter type="multiple_splitter" column="0" separator=","/>
-                        <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace -->
-                        <filter type="string_function" column="0" name="lower" />
-                        <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                        <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's  -->
-                        <filter type="insert_column" value="1" iterate="True" column="0"/>
-                        <filter type="boolean" column="1" cast="int" />
-                        <filter type="metadata_value" ref="input" name="chromCol" column="1" />
-                      </option>
-                    </action>
-
-                    <action type="metadata" name="startCol">
-                      <option type="from_param" name="columnList" column="0" offset="0"> <!-- startCol is 0-->
-                        <filter type="multiple_splitter" column="0" separator=","/>
-                        <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace -->
-                        <filter type="string_function" column="0" name="lower" />
-                        <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                        <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's  -->
-                        <filter type="insert_column" value="1" iterate="True" column="0"/>
-                        <filter type="boolean" column="1" cast="int" />
-                        <filter type="metadata_value" ref="input" name="startCol" column="1" />
-                      </option>
-                    </action>
-
-                    <action type="metadata" name="endCol">
-                      <option type="from_param" name="columnList" column="0" offset="0"> <!-- endCol is 0-->
-                        <filter type="multiple_splitter" column="0" separator=","/>
-                        <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace -->
-                        <filter type="string_function" column="0" name="lower" />
-                        <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                        <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's  -->
-                        <filter type="insert_column" value="1" iterate="True" column="0"/>
-                        <filter type="boolean" column="1" cast="int" />
-                        <filter type="metadata_value" ref="input" name="endCol" column="1" />
-                      </option>
-                    </action>
-
-                    <action type="metadata" name="nameCol" default="0">
-                      <option type="from_param" name="columnList" column="0" offset="0"> <!-- nameCol is 0-->
-                        <filter type="multiple_splitter" column="0" separator=","/>
-                        <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace -->
-                        <filter type="string_function" column="0" name="lower" />
-                        <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                        <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's  -->
-                        <filter type="insert_column" value="1" iterate="True" column="0"/>
-                        <filter type="boolean" column="1" cast="int" />
-                        <filter type="metadata_value" ref="input" name="nameCol" column="1" />
-                      </option>
-                    </action>
-
-                    <action type="metadata" name="strandCol" default="0">
-                      <option type="from_param" name="columnList" column="0" offset="0"> <!-- strandCol is 0-->
-                        <filter type="multiple_splitter" column="0" separator=","/>
-                        <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace -->
-                        <filter type="string_function" column="0" name="lower" />
-                        <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/>
-                        <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's  -->
-                        <filter type="insert_column" value="1" iterate="True" column="0"/>
-                        <filter type="boolean" column="1" cast="int" />
-                        <filter type="metadata_value" ref="input" name="strandCol" column="1" />
-                      </option>
-                    </action>
-                  </when>
-                </conditional>
-
-              </when>
-            </conditional>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="columnList" value="c1,c4,c2,c3"/>
-      <param name="delimiter" value="T"/>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="eq-cut.dat"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**WARNING: This tool breaks column assignments.** To re-establish column assignments run the tools and click on the pencil icon in the latest history item.
-
-.. class:: infomark
-
-The output of this tool is always in tabular format (e.g., if your original delimiters are commas, they will be replaced with tabs). For example:
-
-  Cutting columns 1 and 3 from::
-
-     apple,is,good
-     windows,is,bad
-
-  will give::
-
-    apple   good
-    windows bad
-
------
-
-**What it does**
-
-This tool selects (cuts out) specified columns from the dataset.
-
-- Columns are specified as **c1**, **c2**, and so on. Column count begins with **1**
-- Columns can be specified in any order (e.g., **c2,c1,c6**)
-- If you specify more columns than actually present - empty spaces will be filled with dots
-
------
-
-**Example**
-
-Input dataset (six columns: c1, c2, c3, c4, c5, and c6)::
-
-   chr1 10   1000  gene1 0 +
-   chr2 100  1500  gene2 0 +
-
-**cut** on columns "**c1,c4,c6**" will return::
-
-   chr1 gene1 +
-   chr2 gene2 +
-
-**cut** on columns "**c6,c5,c4,c1**" will return::
-
-   + 0 gene1 chr1
-   + 0 gene2 chr2
-
-
-**cut** on columns "**c8,c7,c4**" will return::
-
-   . . gene1
-   . . gene2
-
-
-</help>
-</tool>
--- a/tools/filters/fileGrep.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="fileGrep1" name="Match">
-  <description>a column from one Query against another Query</description>
-  <command>cut -f $col $input1 | grep -f - $match $input2 > $out_file1</command>
-  <inputs>
-    <param name="col" size="2" type="text" value="1" label="Match content of column"/>
-    <param format="tabular" name="input1" type="data" label="From Query1"/>
-    <param format="tabular" name="input2" type="data" label="Against Query2"/>
-    <param name="match" type="select" label="and return rows that">
-      <option value="">Match</option>
-      <option value="-v">Do not match</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input2" />
-  </outputs>
-  <help>
-This tool is based on UNIX command grep with option -f. It matches content of one query against another. For example, assume you have two queries - one that contains EST accession numbers and some other information::
-
-  AA001229	12	12
-  A001501	7	7
-  AA001641	6	6
-  AA001842	6	6
-  AA002047	6	6
-  AA004638	3	3
-
-and another that is a typical BED file describing genomic location of some ESTs::
-
-  chr7 115443235 115443809 CA947954_exon_0_0_chr7_115443236_f 0	+
-  chr7 115443236 115443347 DB338189_exon_0_0_chr7_115443237_f 0	+
-  chr7 115443347 115443768 DB338189_exon_1_0_chr7_115443348_f 0	+
-  chr7 115443239 115443802 AA001842_exon_0_0_chr7_115443240_f 0	+
-  chr7 115443243 115443347 DB331869_exon_0_0_chr7_115443244_f 0	+
-  chr7 115443347 115443373 DB331869_exon_1_0_chr7_115443348_f 0	+
-
-Using this tool you will be able to tell how many ESTs in Query1 are also preset in Query2 and will output this::
-
-  chr7 115443239 115443802 AA001842_exon_0_0_chr7_115443240_f 0
-
-if **Match** option is chosen.
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/filters/fixedValueColumn.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-
-# fixedValueColumn.pl $input $out_file1 "expression" "iterate [yes|no]"
-
-my ($input, $out_file1, $expression, $iterate) = @ARGV;
-my $i = 0;
-my $numeric = 0;
-
-die "Check arguments\n" unless @ARGV == 4;
-
-open (DATA, "<$input") or die "Cannot open $input:$!\n";
-open (OUT,  ">$out_file1") or die "Cannot create $out_file1:$!\n";
-
-if ($expression =~ m/^\d+$/) {
-  $numeric = 1;
-  $i = $expression;
-}
-
-while (<DATA>) {
-  chop;
-  if ($iterate eq "no") {
-    print OUT "$_\t$expression\n";
-  } else {
-    print OUT "$_\t$i\n" if $numeric == 1;
-    print OUT "$_\t$expression-$i\n" if $numeric == 0;
-    ++$i;
-  }
-}
-
-close DATA;
-close OUT;
--- a/tools/filters/fixedValueColumn.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="addValue" name="Add column">
-  <description>to an existing dataset</description>
-  <command interpreter="perl">fixedValueColumn.pl $input $out_file1 "$exp" $iterate</command>
-  <inputs>
-     <param name="exp" size="20" type="text" value="1" label="Add this value"/>
-    <param format="tabular" name="input" type="data" label="to Dataset" help="Dataset missing? See TIP below" />
-    <param name="iterate" type="select" label="Iterate?">
-      <option value="no">NO</option>
-      <option value="yes">YES</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="exp" value="1"/>
-      <param name="input" value="1.bed"/>
-      <param name="iterate" value="no"/>
-      <output name="out_file1" file="eq-addvalue.dat"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**What it does**
-
-You can enter any value and it will be added as a new column to your dataset
-
------
-
-**Example**
-
-If you original data looks like this::
-
-    chr1 10  100 geneA
-    chr2 200 300 geneB
-    chr2 400 500 geneC
-
-Typing **+** in the text box will generate::
-
-    chr1 10  100 geneA +
-    chr2 200 300 geneB +
-    chr2 400 500 geneC +
-
-
-You can also add line numbers by selecting **Iterate: YES**. In this case if you enter **1** in the text box you will get::
-
-    chr1 10  100 geneA 1
-    chr2 200 300 geneB 2
-    chr2 400 500 geneC 3
-
-
-
-</help>
-</tool>
--- a/tools/filters/gff/extract_GFF_Features.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-"""
-Extract features from GFF file.
-
-usage: %prog input1 out_file1 column features
-"""
-
-import sys, os
-
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    # Parsing Command Line here
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        inp_file, out_file, column, features = args
-    except:
-        stop_err( "One or more arguments is missing or invalid.\nUsage: prog input output column features" )
-    try:
-        column = int( column )
-    except:
-        stop_err( "Column %s is an invalid column." % column )
-
-    if features == None:
-        stop_err( "Column %d has no features to display, select another column." %( column + 1 ) )
-
-    fo=open( out_file, 'w' )
-    for i, line in enumerate( file( inp_file ) ):
-        line = line.rstrip( '\r\n' )
-        if line and line.startswith( '#' ):
-            # Keep valid comment lines in the output
-            fo.write( "%s\n" % line )
-        else:
-            try:
-                if line.split( '\t' )[column] in features.split( ',' ):
-                    fo.write( "%s\n" % line )
-            except:
-                pass
-    fo.close()
-
-    print 'Column %d features: %s' %( column + 1, features )
-
-if __name__ == "__main__":
-    main()
--- a/tools/filters/gff/extract_GFF_Features.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,114 +0,0 @@
-<tool id="Extract_features1" name="Extract features">
-  <description> from GFF data</description>
-  <command interpreter="python">extract_GFF_Features.py $input1 $out_file1 ${column_choice.col} ${column_choice.feature}</command>
-  <inputs>
-    <param format="gff" name="input1" type="data" label="Select GFF data"/>
-    <conditional name="column_choice">
-      <param name="col" type="select" label="From">
-        <option value="0" selected="true">Column 1 / Sequence name</option>
-        <option value="1">Column 2 / Source</option>
-        <option value="2">Column 3 / Feature</option>
-        <option value="6">Column 7 / Strand</option>
-        <option value="7">Column 8 / Frame</option>
-      </param>
-      <when value="0">
-        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
-          <options from_dataset="input1">
-            <column name="name" index="0"/>
-            <column name="value" index="0"/>
-            <filter type="unique_value" name="unique" column="0"/>
-          </options>
-        </param>
-      </when>
-      <when value="1">
-        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
-          <options from_dataset="input1">
-            <column name="name" index="1"/>
-            <column name="value" index="1"/>
-            <filter type="unique_value" name="unique" column="1"/>
-          </options>
-        </param>
-      </when>
-      <when value="2">
-        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
-          <options from_dataset="input1">
-            <column name="name" index="2"/>
-            <column name="value" index="2"/>
-            <filter type="unique_value" name="unique" column="2"/>
-          </options>
-        </param>
-      </when>
-      <when value="6">
-        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
-          <options from_dataset="input1">
-            <column name="name" index="6"/>
-            <column name="value" index="6"/>
-            <filter type="unique_value" name="unique" column="6"/>
-          </options>
-        </param>
-      </when>
-      <when value="7">
-        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
-          <options from_dataset="input1">
-            <column name="name" index="7"/>
-            <column name="value" index="7"/>
-            <filter type="unique_value" name="unique" column="7"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="5.gff"/>
-      <param name="col" value="0" />
-      <param name="feature" value="chr5,chr6,chr7,chr8" />
-      <output name="out_file1" file="Extract_features1_out.gff"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool extracts selected features from GFF data.
-
------
-
-**Example**
-
-Selecting **promoter** from the following GFF data::
-
-    chr22  GeneA  enhancer  10000000  10001000  500  +  .  TGA
-    chr22  GeneA  promoter  10010000  10010100  900  +  .  TGA
-    chr22  GeneB  promoter  10020000  10025000  400  -  .  TGB
-    chr22  GeneB  CCDS2220  10030000  10065000  800  -  .  TGB
-
-will produce the following output::
-
-    chr22  GeneA  promoter  10010000  10010100  900  +  .  TGA
-    chr22  GeneB  promoter  10020000  10025000  400  -  .  TGB
-
-----
-
-.. class:: infomark
-
-**About formats**
-
-**GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::
-
-    1. seqname - Must be a chromosome or scaffold.
-    2. source - The program that generated this feature.
-    3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
-    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
-    5. end - The ending position of the feature (inclusive).
-    6. score - A score between 0 and 1000. If there is no score value, enter ".".
-    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
-    8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
-    9. group - All lines with the same group are linked together into a single item.
-
-
-  </help>
-</tool>
--- a/tools/filters/gff/gff_filter_by_attribute.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-# This tool takes a gff file as input and creates filters on attributes based on certain properties.
-# The tool will skip over invalid lines within the file, informing the user about the number of lines skipped.
-# TODO: much of this code is copied from the Filter1 tool (filtering.py in tools/stats/). The commonalities should be
-# abstracted and leveraged in each filtering tool.
-
-from __future__ import division
-import sys
-from galaxy import eggs
-from galaxy.util.json import to_json_string, from_json_string
-
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-#
-# Helper functions.
-#
-
-def get_operands( filter_condition ):
-    # Note that the order of all_operators is important
-    items_to_strip = ['+', '-', '**', '*', '//', '/', '%', '<<', '>>', '&', '|', '^', '~', '<=', '<', '>=', '>', '==', '!=', '<>', ' and ', ' or ', ' not ', ' is ', ' is not ', ' in ', ' not in ']
-    for item in items_to_strip:
-        if filter_condition.find( item ) >= 0:
-            filter_condition = filter_condition.replace( item, ' ' )
-    operands = set( filter_condition.split( ' ' ) )
-    return operands
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def check_for_executable( text, description='' ):
-    # Attempt to determine if the condition includes executable stuff and, if so, exit.
-    secured = dir()
-    operands = get_operands( text )
-    for operand in operands:
-        try:
-            check = int( operand )
-        except:
-            if operand in secured:
-                stop_err( "Illegal value '%s' in %s '%s'" % ( operand, description, text ) )
-
-#
-# Process inputs.
-#
-
-in_fname = sys.argv[1]
-out_fname = sys.argv[2]
-cond_text = sys.argv[3]
-attribute_types = from_json_string( sys.argv[4] )
-
-# Convert types from str to type objects.
-for name, a_type in attribute_types.items():
-    check_for_executable(a_type)
-    attribute_types[ name ] = eval( a_type )
-
-# Unescape if input has been escaped
-mapped_str = {
-    '__lt__': '<',
-    '__le__': '<=',
-    '__eq__': '==',
-    '__ne__': '!=',
-    '__gt__': '>',
-    '__ge__': '>=',
-    '__sq__': '\'',
-    '__dq__': '"',
-}
-for key, value in mapped_str.items():
-    cond_text = cond_text.replace( key, value )
-
-# Attempt to determine if the condition includes executable stuff and, if so, exit.
-check_for_executable( cond_text, 'condition')
-
-# Prepare the column variable names and wrappers for column data types. Only
-# prepare columns up to largest column in condition.
-attrs, type_casts = [], []
-for name, attr_type in attribute_types.items():
-    attrs.append( name )
-    type_cast = "get_value('%(name)s', attribute_types['%(name)s'], attribute_values)" % ( {'name': name} )
-    type_casts.append( type_cast )
-
-attr_str = ', '.join( attrs )    # 'c1, c2, c3, c4'
-type_cast_str = ', '.join( type_casts )  # 'str(c1), int(c2), int(c3), str(c4)'
-wrap = "%s = %s" % ( attr_str, type_cast_str )
-
-# Stats
-skipped_lines = 0
-first_invalid_line = 0
-invalid_line = None
-lines_kept = 0
-total_lines = 0
-out = open( out_fname, 'wt' )
-
-# Helper function to safely get and type cast a value in a dict.
-def get_value(name, a_type, values_dict):
-    if name in values_dict:
-        return (a_type)(values_dict[ name ])
-    else:
-        return None
-
-# Read and filter input file, skipping invalid lines
-code = '''
-for i, line in enumerate( file( in_fname ) ):
-    total_lines += 1
-    line = line.rstrip( '\\r\\n' )
-    if not line or line.startswith( '#' ):
-        skipped_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-        continue
-    try:
-        # Place attribute values into variables with attribute
-        # name; type casting is done as well.
-        elems = line.split( '\t' )
-        attribute_values = {}
-        for name_value_pair in elems[8].split(";"):
-            pair = name_value_pair.strip().split(" ")
-            if pair == '':
-                continue
-            name = pair[0].strip()
-            if name == '':
-                continue
-            # Need to strip double quote from value and typecast.
-            attribute_values[name] = pair[1].strip(" \\"")
-        %s
-        if %s:
-            lines_kept += 1
-            print >> out, line
-    except Exception, e:
-        print e
-        skipped_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-''' % ( wrap, cond_text )
-
-valid_filter = True
-try:
-    exec code
-except Exception, e:
-    out.close()
-    if str( e ).startswith( 'invalid syntax' ):
-        valid_filter = False
-        stop_err( 'Filter condition "%s" likely invalid. See tool tips, syntax and examples.' % cond_text )
-    else:
-        stop_err( str( e ) )
-
-if valid_filter:
-    out.close()
-    valid_lines = total_lines - skipped_lines
-    print 'Filtering with %s, ' % ( cond_text )
-    if valid_lines > 0:
-        print 'kept %4.2f%% of %d lines.' % ( 100.0*lines_kept/valid_lines, total_lines )
-    else:
-        print 'Possible invalid filter condition "%s" or non-existent column referenced. See tool tips, syntax and examples.' % cond_text
-    if skipped_lines > 0:
-        print 'Skipped %d invalid lines starting at line #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
--- a/tools/filters/gff/gff_filter_by_attribute.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="gff_filter_by_attribute" name="Filter GFF data by attribute" version="0.1">
-  <description>using simple expressions</description>
-  <command interpreter="python">
-    gff_filter_by_attribute.py $input $out_file1 "$cond" '${input.metadata.attribute_types}'
-  </command>
-  <inputs>
-    <param format="gff" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
-    <param name="cond" size="40" type="text" value="gene_id=='uc002loc.1'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
-      <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-        <param name="input" value="gff_filter_attr_in1.gff"/>
-        <param name="cond" value="conf_lo>0"/>
-        <output name="out_file1" file="gff_filter_by_attribute_out1.gff"/>
-    </test>
-    <test>
-        <param name="input" value="gff_filter_attr_in1.gff"/>
-        <param name="cond" value="conf_lo==0 or conf_hi>125"/>
-        <output name="out_file1" file="gff_filter_by_attribute_out2.gff"/>
-    </test>
-  </tests>
-
-  <help>
-
-.. class:: warningmark
-
-Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**)
-
-.. class:: infomark
-
-**TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the attribute being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings).  If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition.  The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue".
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-The filter tool allows you to restrict the dataset using simple conditional statements.
-
-- Make sure that multi-character operators contain no white space ( e.g., **&lt;=** is valid while **&lt; =** is not valid )
-- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **attribute_name=='chr1'** )
-- Non-numerical values must be included in single or double quotes ( e.g., **attribute_name=='XX22'** )
-
-</help>
-</tool>
--- a/tools/filters/gff/gff_filter_by_feature_count.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-"""
-Filter a gff file using a criterion based on feature counts for a transcript.
-
-Usage:
-%prog input_name output_name feature_name condition
-"""
-import sys
-from galaxy import eggs
-from galaxy.datatypes.util.gff_util import GFFReaderWrapper
-from bx.intervals.io import GenomicInterval
-
-# Valid operators, ordered so that complex operators (e.g. '>=') are
-# recognized before simple operators (e.g. '>')
-ops = [
-    '>=',
-    '<=',
-    '<',
-    '>',
-    '==',
-    '!='
-]
-
-# Escape sequences for valid operators.
-mapped_ops = {
-    '__ge__': ops[0],
-    '__le__': ops[1],
-    '__lt__': ops[2],
-    '__gt__': ops[3],
-    '__eq__': ops[4],
-    '__ne__': ops[5],
-}
-
-
-def __main__():
-    # Get args.
-    input_name = sys.argv[1]
-    output_name = sys.argv[2]
-    feature_name = sys.argv[3]
-    condition = sys.argv[4]
-
-    # Unescape operations in condition str.
-    for key, value in mapped_ops.items():
-        condition = condition.replace( key, value )
-
-    # Error checking: condition should be of the form <operator><number>
-    for op in ops:
-        if op in condition:
-            empty, number_str = condition.split( op )
-            try:
-                number = float( number_str )
-            except:
-                number = None
-            if empty != "" or not number:
-                print >> sys.stderr, "Invalid condition: %s, cannot filter." % condition
-                return
-            break
-
-    # Do filtering.
-    kept_features = 0
-    skipped_lines = 0
-    first_skipped_line = 0
-    out = open( output_name, 'w' )
-    for i, feature in enumerate( GFFReaderWrapper( open( input_name ) ) ):
-        if not isinstance( feature, GenomicInterval ):
-            continue
-        count = 0
-        for interval in feature.intervals:
-            if interval.feature == feature_name:
-                count += 1
-        if eval( '%s %s' % ( count, condition ) ):
-            # Keep feature.
-            for interval in feature.intervals:
-                out.write( "\t".join(interval.fields) + '\n' )
-            kept_features += 1
-
-    # Needed because i is 0-based but want to display stats using 1-based.
-    i += 1
-
-    # Clean up.
-    out.close()
-    info_msg = "%i of %i features kept (%.2f%%) using condition %s.  " % \
-        ( kept_features, i, float(kept_features)/i * 100.0, feature_name + condition )
-    if skipped_lines > 0:
-        info_msg += "Skipped %d blank/comment/invalid lines starting with line #%d." %( skipped_lines, first_skipped_line )
-    print info_msg
-
-if __name__ == "__main__": __main__()
--- a/tools/filters/gff/gff_filter_by_feature_count.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="gff_filter_by_feature_count" name="Filter GFF data by feature count" version="0.1">
-  <description>using simple expressions</description>
-  <command interpreter="python">
-    gff_filter_by_feature_count.py $input_file1 $out_file1 "$feature_name" "$cond"
-  </command>
-  <inputs>
-    <param format="gff" name="input_file1" type="data" label="Filter"/>
-    <param name="feature_name" type="select" label="Using feature name">
-        <options from_dataset="input_file1">
-            <column name="name" index="2"/>
-            <column name="value" index="2"/>
-            <filter type="unique_value" name="unique" column="2"/>
-        </options>
-    </param>
-    <param name="cond" size="40" type="text" value=">0" label="With following condition">
-      <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input_file1"/>
-  </outputs>
-  <tests>
-      <!-- Test GTF filtering. -->
-      <test>
-          <param name="input_file1" value="gops_subtract_in1.gff"/>
-          <param name="feature_name" value="exon"/>
-          <param name="cond" value=">1"/>
-          <output name="out_file1" file="gff_filter_by_feature_count_out1.gff"/>
-      </test>
-      <!-- Test GFF3 filtering. -->
-      <test>
-          <param name="input_file1" value="5.gff3"/>
-          <param name="feature_name" value="HSP"/>
-          <param name="cond" value=">=5"/>
-          <output name="out_file1" file="gff_filter_by_feature_count_out2.gff"/>
-      </test>
-  </tests>
-
-  <help>
-
-
-.. class:: infomark
-
-Valid comparison operators are: &gt; &lt; &gt;=, &lt;=, !=, and ==
-
------
-
-**Syntax**
-
-The filter tool allows you to restrict the dataset based on transcripts' feature counts.
-
-</help>
-</tool>
--- a/tools/filters/gff/gtf_filter_by_attribute_values_list.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-#
-# Filters a GFF file using a list of attribute values. Attribute values must
-# be in the first column of the file; subsequent columns are ignored.
-# Usage:
-# python gff_filter_by_attribute_values.py <gff_file> <attribute_name> <ids_file> <output_file>
-#
-
-import sys
-
-def parse_gff_attributes( attr_str ):
-    """
-    Parses a GFF/GTF attribute string and returns a dictionary of name-value
-    pairs. The general format for a GFF3 attributes string is
-        name1=value1;name2=value2
-    The general format for a GTF attribute string is
-        name1 "value1" ; name2 "value2"
-    The general format for a GFF attribute string is a single string that
-    denotes the interval's group; in this case, method returns a dictionary
-    with a single key-value pair, and key name is 'group'
-    """
-    attributes_list = attr_str.split(";")
-    attributes = {}
-    for name_value_pair in attributes_list:
-        # Try splitting by space and, if necessary, by '=' sign.
-        pair = name_value_pair.strip().split(" ")
-        if len( pair ) == 1:
-            pair = name_value_pair.strip().split("=")
-        if len( pair ) == 1:
-            # Could not split for some reason -- raise exception?
-            continue
-        if pair == '':
-            continue
-        name = pair[0].strip()
-        if name == '':
-            continue
-        # Need to strip double quote from values
-        value = pair[1].strip(" \"")
-        attributes[ name ] = value
-
-    if len( attributes ) == 0:
-        # Could not split attributes string, so entire string must be
-        # 'group' attribute. This is the case for strictly GFF files.
-        attributes['group'] = attr_str
-    return attributes
-
-def filter( gff_file, attribute_name, ids_file, output_file ):
-    # Put ids in dict for quick lookup.
-    ids_dict = {}
-    for line in open( ids_file ):
-        ids_dict[ line.split('\t')[0].strip() ] = True
-
-    # Filter GFF file using ids.
-    output = open( output_file, 'w' )
-    for line in open( gff_file ):
-        fields = line.split( '\t' )
-        attributes = parse_gff_attributes( fields[8] )
-        if ( attribute_name in attributes ) and ( attributes[ attribute_name ] in ids_dict ):
-            output.write( line )
-    output.close()
-
-if __name__ == "__main__":
-    # Handle args.
-    if len( sys.argv ) != 5:
-        print >> sys.stderr, "usage: python %s <gff_file> <attribute_name> <ids_file> <output_file>"  % sys.argv[0]
-        sys.exit( -1 )
-    gff_file, attribute_name, ids_file, output_file = sys.argv[1:]
-    filter( gff_file, attribute_name, ids_file, output_file )
--- a/tools/filters/gff/gtf_filter_by_attribute_values_list.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="gtf_filter_by_attribute_values_list" name="Filter GTF data by attribute values_list" version="0.1">
-    <description></description>
-    <command interpreter="python">
-      gtf_filter_by_attribute_values_list.py $input $attribute_name $ids $output
-    </command>
-    <inputs>
-        <param format="gtf" name="input" type="data" label="Filter"/>
-        <param name="attribute_name" type="select" label="Using attribute name">
-            <option value="gene_id">gene_id</option>
-            <option value="transcript_id">transcript_id</option>
-            <option value="p_id">p_id</option>
-            <option value="tss_id">tss_id</option>
-        </param>
-        <param format="tabular,txt" name="ids" type="data" label="And attribute values"/>
-    </inputs>
-    <outputs>
-        <data format="input" name="output" metadata_source="input"/>
-    </outputs>
-    <tests>
-        <!-- Test filtering with a simple list of values. -->
-        <test>
-            <param name="input" value="gops_subtract_in1.gff"/>
-            <param name="attribute_name" value="gene_id"/>
-            <param name="ids" value="gtf_filter_by_attribute_values_list_in1.txt"/>
-            <output name="output" file="gtf_filter_by_attribute_values_list_out1.gtf"/>
-        </test>
-        <!-- Test filtering with a more complex tabular file. -->
-        <test>
-            <param name="input" value="gtf_filter_by_attribute_values_list_in2.gtf"/>
-            <param name="attribute_name" value="transcript_id"/>
-            <param name="ids" value="gtf_filter_by_attribute_values_list_in3.tabular"/>
-            <output name="output" file="gtf_filter_by_attribute_values_list_out2.gtf"/>
-        </test>
-    </tests>
-    <help>
-
-This tool filters a GTF file using a list of attribute values. The attribute values are
-taken from the first column in the file; additional columns in the file are ignored. An example
-use of this tool is to filter a GTF file using a list of transcript_ids or gene_ids obtained from Cuffdiff.
-
-    </help>
-</tool>
--- a/tools/filters/gff2bed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="gff2bed1" name="GFF-to-BED" version="1.0.1">
-  <description>converter</description>
-  <command interpreter="python">gff_to_bed_converter.py $input $out_file1</command>
-  <inputs>
-    <param format="gff" name="input" type="data" label="Convert this query"/>
-  </inputs>
-  <outputs>
-    <data format="bed" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="5.gff" ftype="gff"/>
-      <output name="out_file1" file="gff2bed_out.bed"/>
-    </test>
-    <test>
-      <param name="input" value="gff2bed_in2.gff" ftype="gff"/>
-      <output name="out_file1" file="gff2bed_out2.bed"/>
-    </test>
-    <test>
-      <!-- Test conversion of gff3 file. -->
-      <param name="input" value="5.gff3" ftype="gff"/>
-      <output name="out_file1" file="gff2bed_out3.bed"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool converts data from GFF format to BED format (scroll down for format description).
-
---------
-
-**Example**
-
-The following data in GFF format::
-
-    chr22  GeneA  enhancer  10000000  10001000  500	 +   .  TGA
-    chr22  GeneA  promoter  10010000  10010100  900	 +   .  TGA
-
-Will be converted to BED (**note** that 1 is subtracted from the start coordinate)::
-
-    chr22   9999999  10001000   enhancer   0   +
-    chr22  10009999  10010100   promoter   0   +
-
-------
-
-.. class:: infomark
-
-**About formats**
-
-**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones:
-
-The first three BED fields (required) are::
-
-    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
-    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
-    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
-
-The additional BED fields (optional) are::
-
-    4. name - The name of the BED line.
-    5. score - A score between 0 and 1000.
-    6. strand - Defines the strand - either '+' or '-'.
-    7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
-    8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
-    9. reserved - This should always be set to zero.
-   10. blockCount - The number of blocks (exons) in the BED line.
-   11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
-   12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
-   13. expCount - The number of experiments.
-   14. expIds - A comma-separated list of experiment ids. The number of items in this list should correspond to expCount.
-   15. expScores - A comma-separated list of experiment scores. All of the expScores should be relative to expIds. The number of items in this list should correspond to expCount.
-
-**GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::
-
-    1. seqname - Must be a chromosome or scaffold.
-    2. source - The program that generated this feature.
-    3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
-    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
-    5. end - The ending position of the feature (inclusive).
-    6. score - A score between 0 and 1000. If there is no score value, enter ".".
-    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
-    8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
-    9. group - All lines with the same group are linked together into a single item.
-
-</help>
-</tool>
--- a/tools/filters/gff_to_bed_converter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,133 +0,0 @@
-#!/usr/bin/env python
-import sys
-from galaxy import eggs
-from galaxy.datatypes.util.gff_util import parse_gff_attributes
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def get_bed_line( chrom, name, strand, blocks ):
-    """ Returns a BED line for given data. """
-
-
-    if len( blocks ) == 1:
-        # Use simple BED format if there is only a single block:
-        #   chrom, chromStart, chromEnd, name, score, strand
-        #
-        start, end = blocks[0]
-        return "%s\t%i\t%i\t%s\t0\t%s\n" % ( chrom, start, end, name, strand )
-
-    #
-    # Build lists for transcript blocks' starts, sizes.
-    #
-
-    # Get transcript start, end.
-    t_start = sys.maxint
-    t_end = -1
-    for block_start, block_end in blocks:
-        if block_start < t_start:
-            t_start = block_start
-        if block_end > t_end:
-            t_end = block_end
-
-    # Get block starts, sizes.
-    block_starts = []
-    block_sizes = []
-    for block_start, block_end in blocks:
-        block_starts.append( str( block_start - t_start ) )
-        block_sizes.append( str( block_end - block_start ) )
-
-    #
-    # Create BED entry.
-    # Bed format: chrom, chromStart, chromEnd, name, score, strand, \
-    #               thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts
-    #
-    # Render complete feature with thick blocks. There's no clear way to do this unless
-    # we analyze the block names, but making everything thick makes more sense than
-    # making everything thin.
-    #
-    return "%s\t%i\t%i\t%s\t0\t%s\t%i\t%i\t0\t%i\t%s\t%s\n" % \
-            ( chrom, t_start, t_end, name, strand, t_start, t_end, len( block_starts ),
-                ",".join( block_sizes ), ",".join( block_starts ) )
-
-def __main__():
-    input_name = sys.argv[1]
-    output_name = sys.argv[2]
-    skipped_lines = 0
-    first_skipped_line = 0
-    out = open( output_name, 'w' )
-    i = 0
-    cur_transcript_chrom = None
-    cur_transcript_id = None
-    cur_transcript_strand = None
-    cur_transcripts_blocks = [] # (start, end) for each block.
-    for i, line in enumerate( file( input_name ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            try:
-                # GFF format: chrom source, name, chromStart, chromEnd, score, strand, attributes
-                elems = line.split( '\t' )
-                start = str( long( elems[3] ) - 1 )
-                coords = [ long( start ), long( elems[4] ) ]
-                strand = elems[6]
-                if strand not in ['+', '-']:
-                    strand = '+'
-                attributes = parse_gff_attributes( elems[8] )
-                t_id = attributes.get( "transcript_id", None )
-
-                if not t_id:
-                    #
-                    # No transcript ID, so write last transcript and write current line as its own line.
-                    #
-
-                    # Write previous transcript.
-                    if cur_transcript_id:
-                        # Write BED entry.
-                        out.write( get_bed_line( cur_transcript_chrome, cur_transcript_id, cur_transcript_strand, cur_transcripts_blocks ) )
-
-                    # Replace any spaces in the name with underscores so UCSC will not complain.
-                    name = elems[2].replace(" ", "_")
-                    out.write( get_bed_line( elems[0], name, strand, [ coords ] ) )
-                    continue
-
-                # There is a transcript ID, so process line at transcript level.
-                if t_id == cur_transcript_id:
-                    # Line is element of transcript and will be a block in the BED entry.
-                    cur_transcripts_blocks.append( coords )
-                    continue
-
-                #
-                # Line is part of new transcript; write previous transcript and start
-                # new transcript.
-                #
-
-                # Write previous transcript.
-                if cur_transcript_id:
-                    # Write BED entry.
-                    out.write( get_bed_line( cur_transcript_chrome, cur_transcript_id, cur_transcript_strand, cur_transcripts_blocks ) )
-
-                # Start new transcript.
-                cur_transcript_chrome = elems[0]
-                cur_transcript_id = t_id
-                cur_transcript_strand = strand
-                cur_transcripts_blocks = []
-                cur_transcripts_blocks.append( coords )
-            except:
-                skipped_lines += 1
-                if not first_skipped_line:
-                    first_skipped_line = i + 1
-        else:
-            skipped_lines += 1
-            if not first_skipped_line:
-                first_skipped_line = i + 1
-
-    # Write last transcript.
-    if cur_transcript_id:
-        # Write BED entry.
-        out.write( get_bed_line( cur_transcript_chrome, cur_transcript_id, cur_transcript_strand, cur_transcripts_blocks ) )
-    out.close()
-    info_msg = "%i lines converted to BED.  " % ( i + 1 - skipped_lines )
-    if skipped_lines > 0:
-        info_msg += "Skipped %d blank/comment/invalid lines starting with line #%d." %( skipped_lines, first_skipped_line )
-    print info_msg
-
-if __name__ == "__main__": __main__()
--- a/tools/filters/grep.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-# Filename: grep.py
-# Author: Ian N. Schenck
-# Version: 8/23/2005
-#
-# This script accepts regular expressions, as well as an "invert"
-# option, and applies the regular expression using grep.  This wrapper
-# provides security and pipeline.
-#
-# Grep is launched based on these inputs:
-# -i		Input file
-# -o		Output file
-# -pattern	RegEx pattern
-# -v	        true or false (output NON-matching lines)
-
-import sys
-import os
-import re
-import string
-import commands
-from tempfile import NamedTemporaryFile
-
-# This function is exceedingly useful, perhaps package for reuse?
-def getopts(argv):
-    opts = {}
-    while argv:
-	if argv[0][0] == '-':
-	    opts[argv[0]] = argv[1]
-	    argv = argv[2:]
-	else:
-	    argv = argv[1:]
-    return opts
-
-def main():
-    args = sys.argv[1:]
-
-    try:
-	opts = getopts(args)
-    except IndexError:
-	print "Usage:"
-	print " -i		Input file"
-	print " -o		Output file"
-	print " -pattern	RegEx pattern"
-	print " -v		true or false (Invert match)"
-	return 0
-
-    outputfile = opts.get("-o")
-    if outputfile == None:
-	print "No output file specified."
-	return -1
-
-    inputfile = opts.get("-i")
-    if inputfile == None:
-	print "No input file specified."
-	return -2
-
-    invert = opts.get("-v")
-    if invert == None:
-	print "Match style (Invert or normal) not specified."
-	return -3
-
-    pattern = opts.get("-pattern")
-    if pattern == None:
-	print "RegEx pattern not specified."
-	return -4
-
-    # All inputs have been specified at this point, now validate.
-
-    # replace if input has been escaped, remove sq
-    # characters that are allowed but need to be escaped
-    mapped_chars = { '>' :'__gt__',
-                 '<' :'__lt__',
-                 '\'' :'__sq__',
-                 '"' :'__dq__',
-                 '[' :'__ob__',
-                 ']' :'__cb__',
-		 '{' :'__oc__',
-                 '}' :'__cc__'
-                 }
-
-    #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility
-    for key, value in mapped_chars.items():
-        pattern = pattern.replace(value, key)
-
-    fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why?
-    invertRegEx = re.compile("(true)|(false)") #why?
-
-    if not fileRegEx.match(outputfile):
-	print "Illegal output filename."
-	return -5
-    if not fileRegEx.match(inputfile):
-	print "Illegal input filename."
-	return -6
-    if not invertRegEx.match(invert):
-	print "Illegal invert option."
-	return -7
-
-    # invert grep search?
-    if invert == "true":
-        invertflag = " -v"
-        print "Not matching pattern: %s" % pattern
-    else:
-        invertflag = ""
-        print "Matching pattern: %s" % pattern
-
-    #Create temp file holding pattern
-    #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern
-    pattern_file_name = NamedTemporaryFile().name
-    open( pattern_file_name, 'w' ).write( pattern )
-
-    #generate grep command
-    commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile )
-
-    #run grep
-    errorcode, stdout = commands.getstatusoutput(commandline)
-
-    #remove temp pattern file
-    os.unlink( pattern_file_name )
-
-    #return error code
-    return errorcode
-
-if __name__ == "__main__":
-    main()
--- a/tools/filters/grep.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-<tool id="Grep1" name="Select" version="1.0.1">
-  <description>lines that match an expression</description>
-  <command interpreter="python">grep.py -i $input -o $out_file1 -pattern '$pattern' -v $invert</command>
-  <inputs>
-    <param format="txt" name="input" type="data" label="Select lines from"/>
-    <param name="invert" type="select" label="that">
-      <option value="false">Matching</option>
-      <option value="true">NOT Matching</option>
-    </param>
-    <param name="pattern" size="40" type="text" value="^chr([0-9A-Za-z])+" label="the pattern" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
-      <sanitizer>
-        <valid initial="string.printable">
-         <remove value="&apos;"/>
-        </valid>
-        <mapping initial="none">
-          <add source="&apos;" target="__sq__"/>
-        </mapping>
-      </sanitizer>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="1.bed"/>
-      <param name="invert" value="false"/>
-      <param name="pattern" value="^chr[0-9]*"/>
-      <output name="out_file1" file="fs-grep.dat"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-The select tool searches the data for lines containing or not containing a match to the given pattern. Regular Expression is introduced in this tool. A Regular Expression is a pattern describing a certain amount of text.
-
-- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
-- **\\A** matches the beginning of a string(but not an internal line).
-- **\\d** matches a digit, same as [0-9].
-- **\\D** matches a non-digit.
-- **\\s** matches a whitespace character.
-- **\\S** matches anything BUT a whitespace.
-- **\\t** matches a tab.
-- **\\w** matches an alphanumeric character.
-- **\\W** matches anything but an alphanumeric character.
-- **(** .. **)** groups a particular pattern.
-- **\\Z** matches the end of a string(but not a internal line).
-- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
-
-  - **{n}** The preceding item is matched exactly n times.
-  - **{n,}** The preceding item is matched n or more times.
-  - **{n,m}** The preceding item is matched at least n times but not more than m times.
-
-- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
-- **.** Matches any single character except a newline.
-- ***** The preceding item will be matched zero or more times.
-- **?** The preceding item is optional and matched at most once.
-- **+** The preceding item will be matched one or more times.
-- **^** has two meaning:
-  - matches the beginning of a line or string.
-  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
-- **$** matches the end of a line or string.
-- **\|** Separates alternate possibilities.
-
------
-
-**Example**
-
-- **^chr([0-9A-Za-z])+** would match lines that begin with chromosomes, such as lines in a BED format file.
-- **(ACGT){1,5}** would match at least 1 "ACGT" and at most 5 "ACGT" consecutively.
-- **([^,][0-9]{1,3})(,[0-9]{3})\*** would match a large integer that is properly separated with commas such as 23,078,651.
-- **(abc)|(def)** would match either "abc" or "def".
-- **^\\W+#** would match any line that is a comment.
-</help>
-</tool>
--- a/tools/filters/gtf2bedgraph.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-<tool id="gtf2bedgraph" name="GTF-to-BEDGraph">
-  <description>converter</description>
-  <command interpreter="python">gtf_to_bedgraph_converter.py $input $out_file1 $attribute_name</command>
-  <inputs>
-    <param format="gtf" name="input" type="data" label="Convert this query"/>
-    <param name="attribute_name" type="text" label="Attribute to Use for Value"/>
-  </inputs>
-  <outputs>
-    <data format="bedgraph" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="gtf2bedgraph_in.gtf" ftype="gtf"/>
-      <param name="attribute_name" value="FPKM"/>
-      <output name="out_file1" file="gtf2bedgraph_out.bedgraph" ftype="bedgraph"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool converts data from GTF format to BEDGraph format (scroll down for format description).
-
---------
-
-**Example**
-
-The following data in GFF format::
-
-    chr22  GeneA  enhancer  10000000  10001000  500	 +   .  gene_id "GeneA"; transcript_id "TranscriptAlpha"; FPKM "2.75"; frac "1.000000";
-    chr22  GeneA  promoter  10010000  10010100  900	 +   .  gene_id "GeneA"; transcript_id "TranscriptsAlpha"; FPKM "2.25"; frac "1.000000";
-
-using the attribute name 'FPKM' will be converted to BEDGraph (**note** that 1 is subtracted from the start coordinate)::
-
-
-    chr22   9999999  10001000   2.75
-    chr22  10009999  10010100   2.25
-
-------
-
-.. class:: infomark
-
-**About formats**
-
-**GTF format** Gene Transfer Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GTF lines have nine tab-separated fields::
-
-    1. seqname - Must be a chromosome or scaffold.
-    2. source - The program that generated this feature.
-    3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
-    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
-    5. end - The ending position of the feature (inclusive).
-    6. score - A score between 0 and 1000. If there is no score value, enter ".".
-    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
-    8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
-    9. group - The group field is a list of attributes. Each attribute consists of a type/value pair. Attributes must end in a semi-colon, and be separated from any following attribute by exactly one space. The attribute list must begin with the two mandatory attributes: (i) gene_id value - A globally unique identifier for the genomic source of the sequence and (ii) transcript_id value - A globally unique identifier for the predicted transcript.
-
-**BEDGraph format**
-
-The bedGraph format is line-oriented. Bedgraph data are preceeded by a track definition line, which adds a number of options for controlling the default display of this track.
-
-For the track definition line, all options are placed in a single line separated by spaces:
-  track type=bedGraph name=track_label description=center_label
-        visibility=display_mode color=r,g,b altColor=r,g,b
-        priority=priority autoScale=on|off alwaysZero=on|off
-        gridDefault=on|off maxHeightPixels=max:default:min
-        graphType=bar|points viewLimits=lower:upper
-        yLineMark=real-value yLineOnOff=on|off
-        windowingFunction=maximum|mean|minimum smoothingWindow=off|2-16
-
-The track type is REQUIRED, and must be bedGraph:
-  type=bedGraph
-
-Following the track definition line are the track data in four column BED format::
-
-  chromA  chromStartA  chromEndA  dataValueA
-  chromB  chromStartB  chromEndB  dataValueB
-
-</help>
-</tool>
--- a/tools/filters/gtf_to_bedgraph_converter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-import os, sys, tempfile
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    # Read parms.
-    input_name = sys.argv[1]
-    output_name = sys.argv[2]
-    attribute_name = sys.argv[3]
-
-    # Create temp files.
-    tmp_name1 = tempfile.NamedTemporaryFile().name
-    tmp_name2 = tempfile.NamedTemporaryFile().name
-
-    # Do conversion.
-    skipped_lines = 0
-    first_skipped_line = 0
-    out = open( tmp_name1, 'w' )
-
-    # Write track data to temporary file.
-    i = 0
-    for i, line in enumerate( file( input_name ) ):
-        line = line.rstrip( '\r\n' )
-
-        if line and not line.startswith( '#' ):
-            try:
-                elems = line.split( '\t' )
-                start = str( int( elems[3] ) - 1 ) # GTF coordinates are 1-based, BedGraph are 0-based.
-                strand = elems[6]
-                if strand not in ['+', '-']:
-                    strand = '+'
-                attributes_list = elems[8].split(";")
-                attributes = {}
-                for name_value_pair in attributes_list:
-                    pair = name_value_pair.strip().split(" ")
-                    name = pair[0].strip()
-                    if name == '':
-                        continue
-                    # Need to strip double quote from values
-                    value = pair[1].strip(" \"")
-                    attributes[name] = value
-                value = attributes[ attribute_name ]
-                # GTF format: chrom source, name, chromStart, chromEnd, score, strand, frame, attributes.
-                # BedGraph format: chrom, chromStart, chromEnd, value
-                out.write( "%s\t%s\t%s\t%s\n" %( elems[0], start, elems[4], value ) )
-            except:
-                skipped_lines += 1
-                if not first_skipped_line:
-                    first_skipped_line = i + 1
-        else:
-            skipped_lines += 1
-            if not first_skipped_line:
-                first_skipped_line = i + 1
-    out.close()
-
-    # Sort tmp file by chromosome name and chromosome start to create ordered track data.
-    cmd = "sort -k1,1 -k2,2n < %s > %s" % ( tmp_name1, tmp_name2 )
-    try:
-        os.system(cmd)
-        os.remove(tmp_name1)
-    except Exception, ex:
-        sys.stderr.write( "%s\n" % ex )
-        sys.exit(1)
-
-    # Create bedgraph file by combining track definition with ordered track data.
-    cmd = "echo 'track type=bedGraph' | cat - %s > %s " % ( tmp_name2, output_name )
-    try:
-        os.system(cmd)
-        os.remove(tmp_name2)
-    except Exception, ex:
-        sys.stderr.write( "%s\n" % ex )
-        sys.exit(1)
-
-    info_msg = "%i lines converted to BEDGraph.  " % ( i + 1 - skipped_lines )
-    if skipped_lines > 0:
-        info_msg += "Skipped %d blank/comment/invalid lines starting with line #%d." %( skipped_lines, first_skipped_line )
-    print info_msg
-
-if __name__ == "__main__": __main__()
--- a/tools/filters/headWrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-
-# a wrapper for head for use in galaxy
-# headWrapper.pl [filename] [# lines to show] [output]
-
-die "Check arguments" unless @ARGV == 3;
-die "Line number must be an integer\n" unless $ARGV[1]=~ m/^\d+$/;
-
-open (OUT, ">$ARGV[2]") or die "Cannot create $ARGV[2]:$!\n";
-open (HEAD, "head -n $ARGV[1] $ARGV[0]|") or die "Cannot run head:$!\n";
-while (<HEAD>) {
-    print OUT;
-}
-close OUT;
-close HEAD;
-
--- a/tools/filters/headWrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="Show beginning1" name="Select first">
-  <description>lines from a dataset</description>
-  <command interpreter="perl">headWrapper.pl $input $lineNum $out_file1</command>
-  <inputs>
-    <param name="lineNum" size="5" type="integer" value="10" label="Select first" help="lines"/>
-    <param format="txt" name="input" type="data" label="from"/>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="lineNum" value="10"/>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="eq-showbeginning.dat"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool outputs specified number of lines from the **beginning** of a dataset
-
------
-
-**Example**
-
-Selecting 2 lines from this::
-
-    chr7  56632  56652  D17003_CTCF_R6  310  +
-    chr7  56736  56756  D17003_CTCF_R7  354  +
-    chr7  56761  56781  D17003_CTCF_R4  220  +
-    chr7  56772  56792  D17003_CTCF_R7  372  +
-    chr7  56775  56795  D17003_CTCF_R4  207  +
-
-will produce::
-
-    chr7  56632  56652  D17003_CTCF_R6  310  +
-    chr7  56736  56756  D17003_CTCF_R7  354  +
-
-  </help>
-</tool>
--- a/tools/filters/join.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,370 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-"""
-Script to Join Two Files on specified columns.
-
-Takes two tab delimited files, two column numbers (base 1) and outputs a new tab delimited file with lines joined by tabs.
-User can also opt to have have non-joining rows of file1 echoed.
-
-"""
-
-import optparse, os, sys, tempfile, struct
-import psyco_full
-
-try:
-    simple_json_exception = None
-    from galaxy import eggs
-    from galaxy.util.bunch import Bunch
-    from galaxy.util import stringify_dictionary_keys
-    import pkg_resources
-    pkg_resources.require("simplejson")
-    import simplejson
-except Exception, e:
-    simplejson_exception = e
-    simplejson = None
-
-
-class OffsetList:
-    def __init__( self, filesize = 0, fmt = None ):
-        self.file = tempfile.NamedTemporaryFile( 'w+b' )
-        if fmt:
-            self.fmt = fmt
-        elif filesize and filesize <= sys.maxint * 2:
-            self.fmt = 'I'
-        else:
-            self.fmt = 'Q'
-        self.fmt_size = struct.calcsize( self.fmt )
-    @property
-    def size( self ):
-        self.file.flush()
-        return self.file_size / self.fmt_size
-    @property
-    def file_size( self ):
-        self.file.flush()
-        return os.stat( self.file.name ).st_size
-    def add_offset( self, offset ):
-        if not isinstance( offset, list ):
-            offset = [offset]
-        self.file.seek( self.file_size )
-        for off in offset:
-            self.file.write( struct.pack( self.fmt, off ) )
-    def get_offsets( self, start = 0 ):
-        self.file.seek( start * self.fmt_size )
-        while True:
-            packed = self.file.read( self.fmt_size )
-            if not packed: break
-            yield struct.unpack( self.fmt, packed )[0]
-    def get_offset_by_index( self, index ):
-        self.file.seek( index * self.fmt_size )
-        return struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
-    def set_offset_at_index( self, index, offset ):
-        if not isinstance( offset, list ):
-            offset = [offset]
-        if index >= self.size:
-            self.add_offset( offset )
-        else:
-            temp_file = tempfile.NamedTemporaryFile( 'w+b' )
-            self.file.seek( 0 )
-            temp_file.write( self.file.read( ( index ) * self.fmt_size ) )
-            for off in offset:
-                temp_file.write( struct.pack( self.fmt, off ) )
-            temp_file.write( self.file.read() )
-            self.file = temp_file
-
-class SortedOffsets( OffsetList ):
-    def __init__( self, indexed_filename, column, split = None ):
-        OffsetList.__init__( self, os.stat( indexed_filename ).st_size )
-        self.indexed_filename = indexed_filename
-        self.indexed_file = open( indexed_filename, 'rb' )
-        self.column = column
-        self.split = split
-        self.last_identifier = None
-        self.last_identifier_merged = None
-        self.last_offset_merged = 0
-    def merge_with_dict( self, new_offset_dict ):
-        if not new_offset_dict: return #no items to merge in
-        keys = new_offset_dict.keys()
-        keys.sort()
-        identifier2 = keys.pop( 0 )
-
-        result_offsets = OffsetList( fmt = self.fmt )
-        offsets1 = enumerate( self.get_offsets() )
-        try:
-            index1, offset1 = offsets1.next()
-            identifier1 = self.get_identifier_by_offset( offset1 )
-        except StopIteration:
-            offset1 = None
-            identifier1 = None
-            index1 = 0
-
-        while True:
-            if identifier1 is None and identifier2 is None:
-                self.file = result_offsets.file #self is now merged results
-                return
-            elif identifier1 is None or ( identifier2 and identifier2 < identifier1 ):
-                result_offsets.add_offset( new_offset_dict[identifier2] )
-                if keys:
-                    identifier2 = keys.pop( 0 )
-                else:
-                    identifier2 = None
-            elif identifier2 is None:
-                result_offsets.file.seek( result_offsets.file_size )
-                self.file.seek( index1 * self.fmt_size )
-                result_offsets.file.write( self.file.read() )
-                identifier1 = None
-                offset1 = None
-            else:
-                result_offsets.add_offset( offset1 )
-                try:
-                    index1, offset1 = offsets1.next()
-                    identifier1 = self.get_identifier_by_offset( offset1 )
-                except StopIteration:
-                    offset1 = None
-                    identifier1 = None
-                    index1 += 1
-#methods to help link offsets to lines, ids, etc
-    def get_identifier_by_line( self, line ):
-        if isinstance( line, str ):
-            fields = line.rstrip( '\r\n' ).split( self.split )
-            if self.column < len( fields ):
-                return fields[self.column]
-        return None
-    def get_line_by_offset( self, offset ):
-        self.indexed_file.seek( offset )
-        return self.indexed_file.readline()
-    def get_identifier_by_offset( self, offset ):
-        return self.get_identifier_by_line( self.get_line_by_offset( offset ) )
-
-#indexed set of offsets, index is built on demand
-class OffsetIndex:
-    def __init__( self, filename, column, split = None, index_depth = 3 ):
-        self.filename = filename
-        self.file = open( filename, 'rb' )
-        self.column = column
-        self.split = split
-        self._offsets = {}
-        self._index = None
-        self.index_depth = index_depth
-    def _build_index( self ):
-        self._index = {}
-        for start_char, sorted_offsets in self._offsets.items():
-            self._index[start_char]={}
-            for i, offset in enumerate( sorted_offsets.get_offsets() ):
-                identifier = sorted_offsets.get_identifier_by_offset( offset )
-                if identifier[0:self.index_depth] not in self._index[start_char]:
-                    self._index[start_char][identifier[0:self.index_depth]] = i
-    def get_lines_by_identifier( self, identifier ):
-        if not identifier: return
-        #if index doesn't exist, build it
-        if self._index is None: self._build_index()
-
-        #identifier cannot exist
-        if identifier[0] not in self._index or identifier[0:self.index_depth] not in self._index[identifier[0]]:
-            return
-        #identifier might exist, search for it
-        offset_index = self._index[identifier[0]][identifier[0:self.index_depth]]
-        while True:
-            if offset_index >= self._offsets[identifier[0]].size:
-                return
-            offset = self._offsets[identifier[0]].get_offset_by_index( offset_index )
-            identifier2 = self._offsets[identifier[0]].get_identifier_by_offset( offset )
-            if not identifier2 or identifier2 > identifier:
-                return
-            if identifier2 == identifier:
-                yield self._offsets[identifier[0]].get_line_by_offset( offset )
-            offset_index += 1
-    def get_offsets( self ):
-        keys = self._offsets.keys()
-        keys.sort()
-        for key in keys:
-            for offset in self._offsets[key].get_offsets():
-                yield offset
-    def get_line_by_offset( self, offset ):
-        self.file.seek( offset )
-        return self.file.readline()
-    def get_identifiers_offsets( self ):
-        keys = self._offsets.keys()
-        keys.sort()
-        for key in keys:
-            for offset in self._offsets[key].get_offsets():
-                yield self._offsets[key].get_identifier_by_offset( offset ), offset
-    def get_identifier_by_line( self, line ):
-        if isinstance( line, str ):
-            fields = line.rstrip( '\r\n' ).split( self.split )
-            if self.column < len( fields ):
-                return fields[self.column]
-        return None
-    def merge_with_dict( self, d ):
-        if not d: return #no data to merge
-        self._index = None
-        keys = d.keys()
-        keys.sort()
-        identifier = keys.pop( 0 )
-        first_char = identifier[0]
-        temp = { identifier: d[identifier] }
-        while True:
-            if not keys:
-                if first_char not in self._offsets:
-                    self._offsets[first_char] = SortedOffsets( self.filename, self.column, self.split )
-                self._offsets[first_char].merge_with_dict( temp )
-                return
-            identifier = keys.pop( 0 )
-            if identifier[0] == first_char:
-                temp[identifier] = d[identifier]
-            else:
-                if first_char not in self._offsets:
-                    self._offsets[first_char] = SortedOffsets( self.filename, self.column, self.split )
-                self._offsets[first_char].merge_with_dict( temp )
-                temp = { identifier: d[identifier] }
-                first_char = identifier[0]
-
-class BufferedIndex:
-    def __init__( self, filename, column, split = None, buffer = 1000000, index_depth = 3 ):
-        self.index = OffsetIndex( filename, column, split, index_depth )
-        self.buffered_offsets = {}
-        f = open( filename, 'rb' )
-        offset = f.tell()
-        identified_offset_count = 1
-        while True:
-            offset = f.tell()
-            line = f.readline()
-            if not line: break #EOF
-            identifier = self.index.get_identifier_by_line( line )
-            if identifier:
-                #flush buffered offsets, if buffer size reached
-                if buffer and identified_offset_count % buffer == 0:
-                    self.index.merge_with_dict( self.buffered_offsets )
-                    self.buffered_offsets = {}
-                if identifier not in self.buffered_offsets:
-                    self.buffered_offsets[identifier] = []
-                self.buffered_offsets[identifier].append( offset )
-                identified_offset_count += 1
-        f.close()
-
-    def get_lines_by_identifier( self, identifier ):
-        for line in self.index.get_lines_by_identifier( identifier ):
-            yield line
-        if identifier in self.buffered_offsets:
-            for offset in self.buffered_offsets[identifier]:
-                yield self.index.get_line_by_offset( offset )
-
-
-def fill_empty_columns( line, split, fill_values ):
-    if not fill_values:
-        return line
-    filled_columns = []
-    for i, field in enumerate( line.split( split ) ):
-        if field or i >= len( fill_values ):
-            filled_columns.append( field )
-        else:
-            filled_columns.append( fill_values[i] )
-    if len( fill_values ) > len( filled_columns ):
-        filled_columns.extend( fill_values[ len( filled_columns ) : ] )
-    return split.join( filled_columns )
-
-
-def join_files( filename1, column1, filename2, column2, out_filename, split = None, buffer = 1000000, keep_unmatched = False, keep_partial = False, index_depth = 3, fill_options = None ):
-    #return identifier based upon line
-    def get_identifier_by_line( line, column, split = None ):
-        if isinstance( line, str ):
-            fields = line.rstrip( '\r\n' ).split( split )
-            if column < len( fields ):
-                return fields[column]
-        return None
-    if fill_options is None:
-        fill_options = Bunch( fill_unjoined_only = True, file1_columns = None, file2_columns = None )
-    out = open( out_filename, 'w+b' )
-    index = BufferedIndex( filename2, column2, split, buffer, index_depth )
-    for line1 in open( filename1, 'rb' ):
-        identifier = get_identifier_by_line( line1, column1, split )
-        if identifier:
-            written = False
-            for line2 in index.get_lines_by_identifier( identifier ):
-                if not fill_options.fill_unjoined_only:
-                    out.write( "%s%s%s\n" % ( fill_empty_columns( line1.rstrip( '\r\n' ), split, fill_options.file1_columns ), split, fill_empty_columns( line2.rstrip( '\r\n' ), split, fill_options.file2_columns ) ) )
-                else:
-                    out.write( "%s%s%s\n" % ( line1.rstrip( '\r\n' ), split, line2.rstrip( '\r\n' ) ) )
-                written = True
-            if not written and keep_unmatched:
-                out.write( fill_empty_columns( line1.rstrip( '\r\n' ), split, fill_options.file1_columns ) )
-                if fill_options:
-                    if fill_options.file2_columns:
-                        out.write( "%s%s" % ( split,  fill_empty_columns( "", split, fill_options.file2_columns ) ) )
-                out.write( "\n" )
-        elif keep_partial:
-            out.write( fill_empty_columns( line1.rstrip( '\r\n' ), split, fill_options.file1_columns ) )
-            if fill_options:
-                if fill_options.file2_columns:
-                    out.write( "%s%s" % ( split,  fill_empty_columns( "", split, fill_options.file2_columns ) ) )
-            out.write( "\n" )
-    out.close()
-
-def main():
-    parser = optparse.OptionParser()
-    parser.add_option(
-        '-b','--buffer',
-        dest='buffer',
-        type='int',default=1000000,
-        help='Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.'
-    )
-    parser.add_option(
-        '-d','--index_depth',
-        dest='index_depth',
-        type='int',default=3,
-        help='Depth to use on filebased offset indexing. Default: 3.'
-    )
-    parser.add_option(
-        '-p','--keep_partial',
-        action='store_true',
-        dest='keep_partial',
-        default=False,
-        help='Keep rows in first input which are missing identifiers.')
-    parser.add_option(
-        '-u','--keep_unmatched',
-        action='store_true',
-        dest='keep_unmatched',
-        default=False,
-        help='Keep rows in first input which are not joined with the second input.')
-    parser.add_option(
-        '-f','--fill_options_file',
-        dest='fill_options_file',
-        type='str',default=None,
-        help='Fill empty columns with a values from a JSONified file.')
-
-
-    options, args = parser.parse_args()
-
-    fill_options = None
-    if options.fill_options_file is not None:
-        try:
-            if simplejson is None:
-                raise simplejson_exception
-            fill_options = Bunch( **stringify_dictionary_keys( simplejson.load( open( options.fill_options_file ) ) ) ) #simplejson.load( open( options.fill_options_file ) )
-        except Exception, e:
-            print "Warning: Ignoring fill options due to simplejson error (%s)." % e
-    if fill_options is None:
-        fill_options = Bunch()
-    if 'fill_unjoined_only' not in fill_options:
-        fill_options.fill_unjoined_only = True
-    if 'file1_columns' not in fill_options:
-        fill_options.file1_columns = None
-    if 'file2_columns' not in fill_options:
-        fill_options.file2_columns = None
-
-
-    try:
-        filename1 = args[0]
-        filename2 = args[1]
-        column1 = int( args[2] ) - 1
-        column2 = int( args[3] ) - 1
-        out_filename = args[4]
-    except:
-        print >> sys.stderr, "Error parsing command line."
-        sys.exit()
-
-    #Character for splitting fields and joining lines
-    split = "\t"
-
-    return join_files( filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.index_depth, fill_options = fill_options )
-
-if __name__ == "__main__": main()
--- a/tools/filters/joinWrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-use File::Temp "tempfile";
-
-my ($input1, $input2, $field1, $field2, $mode, $OOption, $out_file1) = @ARGV;
-
-die "No arguments\n" unless @ARGV == 7;
-
-my ($fh1, $file1) = tempfile();
-my ($fh2, $file2) = tempfile();
-
-`sort -k $field1 $input1 > $file1`;
-`sort -k $field2 $input2 > $file2`;
-
-my $option = "";
-my @fields = ();
-my $line = "";
-
-if ($OOption eq "Y") {
-  if (defined($fh1)) {
-    $line = <$fh1>;
-  } else {
-    die "Failed to create file $file1\n";
-  }
-  @fields = split /\t/, $line;
-  die "The field you selected does not exist in the input file" if (@fields < $field1);
-  my @optionO = ();
-  my $i = 0;
-  foreach (@fields) {
-    ++$i;
-    push(@optionO, "1.$i");
-  }
-  $option = "-o " . join(",", @optionO);
-} else {
-  $option = "";
-}
-
-$ENV{'LC_ALL'} = 'POSIX';
-
-if ($mode eq "V") {
-  `join -v 1 $option -1 $field1 -2 $field2 $file1 $file2 | tr " " "\t" > $out_file1`;
-} else {
-  `join $option -1 $field1 -2 $field2 $file1 $file2 | tr " " "\t" > $out_file1`;
-}
-
-`rm $file1 ; rm $file2`;
-
-
-
--- a/tools/filters/joinWrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-"""
-This tool provides the UNIX "join" functionality.
-"""
-import sys, os, tempfile, subprocess
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-    infile1 = sys.argv[1]
-    infile2 = sys.argv[2]
-    field1 = int(sys.argv[3])
-    field2 = int(sys.argv[4])
-    mode =sys.argv[5]
-    outfile = sys.argv[6]
-
-    tmpfile1 = tempfile.NamedTemporaryFile()
-    tmpfile2 = tempfile.NamedTemporaryFile()
-
-    try:
-        #Sort the two files based on specified fields
-        os.system("sort -t '	' -k %d,%d -o %s %s" %(field1, field1, tmpfile1.name, infile1))
-        os.system("sort -t '	' -k %d,%d -o %s %s" %(field2, field2, tmpfile2.name, infile2))
-    except Exception, exc:
-        stop_err( 'Initialization error -> %s' %str(exc) )
-
-    option = ""
-    for line in file(tmpfile1.name):
-        line = line.strip()
-        if line:
-            elems = line.split('\t')
-            for j in range(1,len(elems)+1):
-                if j == 1:
-                    option = "1.1"
-                else:
-                    option = option + ",1." + str(j)
-            break
-
-    #check if join has --version option. BSD join doens't have this option, while GNU join does.
-    #The return value in the latter case will be 0, and non-zero in the latter case.
-    ret = subprocess.call('join --version 2>/dev/null', shell=True)
-    # check if we are a version later than 7 of join. If so, we want to skip
-    # checking the order since join will raise an error with duplicated items in
-    # the two files being joined.
-    if ret == 0:
-        cl = subprocess.Popen(["join", "--version"], stdout=subprocess.PIPE)
-        (stdout, _) = cl.communicate()
-        version_line = stdout.split("\n")[0]
-        (version, _) = version_line.split()[-1].split(".")
-        if int(version) >= 7:
-            flags = "--nocheck-order"
-        else:
-            flags = ""
-    else:
-        flags = ""
-
-    if mode == "V":
-        cmdline = "join %s -t '	' -v 1 -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
-    else:
-        cmdline = "join %s -t '	' -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
-
-    try:
-        os.system(cmdline)
-    except Exception, exj:
-        stop_err('Error joining the two datasets -> %s' %str(exj))
-
-if __name__ == "__main__":
-    main()
--- a/tools/filters/joiner.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,180 +0,0 @@
-<tool id="join1" name="Join two Datasets" version="2.0.2">
-  <description>side by side on a specified field</description>
-  <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Join"/>
-    <param name="field1" label="using column" type="data_column" data_ref="input1" />
-    <param format="tabular" name="input2" type="data" label="with" />
-    <param name="field2" label="and column" type="data_column" data_ref="input2" />
-    <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input">
-      <option value="-u">Yes</option>
-      <option value="" selected="true">No</option>
-    </param>
-    <param name="partial" type="select" label="Keep lines of first input that are incomplete">
-      <option value="-p">Yes</option>
-      <option value="" selected="true">No</option>
-    </param>
-    <conditional name="fill_empty_columns">
-      <param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
-        <option value="no_fill" selected="True">No</option>
-        <option value="fill_empty">Yes</option>
-      </param>
-     <when value="no_fill">
-        <!-- do nothing -->
-     </when>
-     <when value="fill_empty">
-       <param type="select" name="fill_columns_by" label="Only fill unjoined rows">
-         <option value="fill_unjoined_only" selected="True">Yes</option>
-         <option value="fill_all">No</option>
-       </param>
-       <conditional name="do_fill_empty_columns">
-         <param name="column_fill_type" type="select" label="Fill Columns by">
-           <option value="single_fill_value" selected="True">Single fill value</option>
-           <option value="fill_value_by_column">Values by column</option>
-         </param>
-         <when value="single_fill_value">
-           <param type="text" name="fill_value" label="Fill value" value="."/>
-         </when>
-         <when value="fill_value_by_column">
-           <repeat name="column_fill1" title="Fill Column for Input 1">
-             <param name="column_number1" label="Column" type="data_column" data_ref="input1" />
-             <param type="text" name="fill_value1" value="."/>
-           </repeat>
-           <repeat name="column_fill2" title="Fill Column for Input 2">
-             <param name="column_number2" label="Column" type="data_column" data_ref="input2" />
-             <param type="text" name="fill_value2" value="."/>
-           </repeat>
-         </when>
-       </conditional>
-     </when>
-   </conditional>
-  </inputs>
-  <configfiles>
-    <configfile name="fill_options_file">&lt;%
-import simplejson
-%&gt;
-#set $__fill_options = {}
-#if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
-    #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only'
-    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
-        #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
-    #else:
-        #set $__start_fill = ""
-    #end if
-    #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
-    #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ]
-    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
-        #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']:
-            #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value
-        #end for
-        #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']:
-            #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value
-        #end for
-    #end if
-#end if
-${simplejson.dumps( __fill_options )}
-    </configfile>
-  </configfiles>
-  <outputs>
-     <data format="input" name="out_file1" metadata_source="input1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="input2" value="2.bed"/>
-      <param name="field1" value="2"/>
-      <param name="field2" value="2"/>
-      <param name="unmatched" value=""/>
-      <param name="partial" value=""/>
-      <param name="fill_empty_columns_switch" value="no_fill"/>
-      <output name="out_file1" file="joiner_out1.bed"/>
-    </test>
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="input2" value="2.bed"/>
-      <param name="field1" value="2"/>
-      <param name="field2" value="2"/>
-      <param name="unmatched" value="Yes"/>
-      <param name="partial" value="Yes"/>
-      <param name="fill_empty_columns_switch" value="no_fill"/>
-      <output name="out_file1" file="joiner_out2.bed"/>
-    </test>
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="input2" value="2.bed"/>
-      <param name="field1" value="2"/>
-      <param name="field2" value="2"/>
-      <param name="unmatched" value="Yes"/>
-      <param name="partial" value="Yes"/>
-      <param name="fill_empty_columns_switch" value="fill_empty"/>
-      <param name="fill_columns_by" value="fill_all"/>
-      <param name="column_fill_type" value="single_fill_value"/>
-      <param name="fill_value" value="~"/>
-      <output name="out_file1" file="joiner_out3.bed"/>
-    </test>
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="input2" value="2.bed"/>
-      <param name="field1" value="2"/>
-      <param name="field2" value="2"/>
-      <param name="unmatched" value="Yes"/>
-      <param name="partial" value="Yes"/>
-      <param name="fill_empty_columns_switch" value="fill_empty"/>
-      <param name="fill_columns_by" value="fill_all"/>
-      <param name="column_fill_type" value="fill_value_by_column"/>
-      <param name="column_number1" value="6"/>
-      <param name="fill_value1" value="+"/>
-      <param name="column_number2" value="1"/>
-      <param name="fill_value2" value="NoChrom"/>
-      <output name="out_file1" file="joiner_out4.bed"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier.
-You may choose to include lines of your first input that do not join with your second input.
-
-- Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file.
-
------
-
-**Example**
-
-Dataset1::
-
-  chr1 10 20 geneA
-  chr1 50 80 geneB
-  chr5 10 40 geneL
-
-Dataset2::
-
-  geneA tumor-supressor
-  geneB Foxp2
-  geneC Gnas1
-  geneE INK4a
-
-Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield::
-
-  chr1 10 20 geneA geneA tumor-suppressor
-  chr1 50 80 geneB geneB Foxp2
-
-Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield::
-
-  chr1 10 20 geneA geneA tumor-suppressor
-  chr1 50 80 geneB geneB Foxp2
-  chr5 10 40 geneL
-
-</help>
-</tool>
--- a/tools/filters/joiner2.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-<tool id="joiner2" name="Relational join 2">
-  <description>two datasets a specific column of which has the same value</description>
-  <command>sort -k $col1 $input1 > $input1.tmp; sort -k $col2 $input2 > $input2.tmp; join -1 $col1 -2 $col2 $input1.tmp $input2.tmp | tr " " "\t" > $out_file1; rm -rf $input1.tmp $input2.tmp </command>
-  <inputs>
-    <param name="input1" label="Combine dataset" format="tabular" type="data" />
-    <param name="col1" label="using column" type="data_column" data_ref="input1" />
-    <param name="input2" label="with dataset"	format="tabular" type="data"/>
-    <param name="col2" label="and column" type="data_column" data_ref="input2" />
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-  </outputs>
-</tool>
--- a/tools/filters/lav_to_bed.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-#Reads a LAV file and writes two BED files.
-import sys
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import bx.align.lav
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    try:
-        lav_file = open(sys.argv[1],'r')
-        bed_file1 = open(sys.argv[2],'w')
-        bed_file2 = open(sys.argv[3],'w')
-    except Exception, e:
-        stop_err( str( e ) )
-
-    lavsRead = 0
-    bedsWritten = 0
-    species = {}
-    # TODO: this is really bad since everything is read into memory.  Can we eliminate this tool?
-    for lavBlock in bx.align.lav.Reader( lav_file ):
-        lavsRead += 1
-        for c in lavBlock.components:
-            spec, chrom = bx.align.lav.src_split( c.src )
-            if bedsWritten < 1:
-                if len( species )==0:
-                    species[spec]=bed_file1
-                elif len( species )==1:
-                    species[spec]=bed_file2
-                else:
-                    continue #this is a pairwise alignment...
-            if spec in species:
-                species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
-        bedsWritten += 1
-
-
-    for spec,file in species.items():
-        print "#FILE\t%s\t%s" % (file.name, spec)
-
-    lav_file.close()
-    bed_file1.close()
-    bed_file2.close()
-
-    print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)
-
-
-
-if __name__ == "__main__": main()
\ No newline at end of file
--- a/tools/filters/lav_to_bed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-<tool id="lav_to_bed1" name="LAV to BED">
-  <description>Converts a LAV formatted file to BED format</description>
-  <command interpreter="python">lav_to_bed.py $lav_file $bed_file1 $bed_file2</command>
-  <inputs>
-    <param name="lav_file" type="data" format="lav" label="LAV File" optional="False"/>
-  </inputs>
-  <outputs>
-    <data name="bed_file1" format="bed"/>
-    <data name="bed_file2" format="bed"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="lav_file" value="2.lav" ftype="lav" />
-      <output name="bed_file2" file="lav_to_bed_out_1.bed" />
-      <output name="bed_file2" file="lav_to_bed_out_2.bed" />
-    </test>
-  </tests>
-  <help>
-
-**Syntax**
-
-This tool converts a LAV formatted file to the BED format.
-
-- **LAV format** LAV is an alignment format developed by Webb Miller's group at Penn State University. It is the primary output format for BLASTZ.
-
-- **BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser.
-
------
-
-**Example**
-
-- Convert LAV format::
-
-    #:lav
-    s {
-      &quot;/galaxy/data/hg16/seq/chr19.nib&quot; 1 63811651 0 1
-      &quot;/galaxy/data/mm5/seq/chr11.nib&quot; 1 121648857 0 1
-    }
-    h {
-      &quot;> hg16.chr19&quot;
-      &quot;> mm5.chr11 (reverse complement)&quot;
-    }
-    a {
-      s 3500
-      b 3001012 70568380
-      e 3001075 70568443
-      l 3001012 70568380 3001075 70568443 81
-    }
-    a {
-      s 3900
-      b 3008279 70573976
-      e 3008357 70574054
-      l 3008279 70573976 3008357 70574054 78
-    }
-    #:eof
-
-- To two BED formatted files::
-
-    chr19	3001011	3001075	hg16_0	0	+
-    chr19	3008278	3008357	hg16_1	0	+
-
- **and**::
-
-    chr11	70568379	70568443	mm5_0	0	+
-    chr11	70573975	70574054	mm5_1	0	+
-  </help>
-  <code file="lav_to_bed_code.py"/>
-</tool>
--- a/tools/filters/lav_to_bed_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#Set build, name, and info for each output BED file
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    new_stdout = ""
-    filename_to_build = {}
-    for line in stdout.split("\n"):
-        if line.startswith("#FILE"):
-            fields = line.split("\t")
-            filename_to_build[fields[1]]=fields[2].strip()
-        else:
-            new_stdout = "%s%s" % ( new_stdout, line )
-    for name,data in out_data.items():
-        try:
-            data.info = "%s\n%s" % ( new_stdout, stderr )
-            data.dbkey = filename_to_build[data.file_name]
-            data.name = "%s (%s)" % ( data.name, data.dbkey )
-            app.model.context.add( data )
-            app.model.context.flush()
-        except:
-            continue
--- a/tools/filters/mergeCols.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-import sys, re
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    try:
-        infile =  open ( sys.argv[1], 'r')
-        outfile = open ( sys.argv[2], 'w')
-    except:
-        stop_err( 'Cannot open or create a file\n' )
-
-    if len( sys.argv ) < 4:
-        stop_err( 'No columns to merge' )
-    else:
-        cols = sys.argv[3:]
-
-    skipped_lines = 0
-
-    for line in infile:
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            fields = line.split( '\t' )
-            line += '\t'
-            for col in cols:
-                try:
-                    line += fields[ int( col ) -1 ]
-                except:
-                    skipped_lines += 1
-
-            print >>outfile, line
-
-    if skipped_lines > 0:
-        print 'Skipped %d invalid lines' % skipped_lines
-
-if __name__ == "__main__" : __main__()
\ No newline at end of file
--- a/tools/filters/mergeCols.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<tool id="mergeCols1" name="Merge Columns" version="1.0.1">
-  <description>together</description>
-  <command interpreter="python">
-   mergeCols.py
-      $input1
-      $out_file1
-      $col1
-      $col2
-      #for $col in $columns
-        ${col.datacol}
-      #end for
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="col1" label="Merge column" type="data_column" data_ref="input1" />
-    <param name="col2" label="with column" type="data_column" data_ref="input1" help="Need to add more columns? Use controls below."/>
-    <repeat name="columns" title="Columns">
-      <param name="datacol" label="Add column" type="data_column" data_ref="input1" />
-    </repeat>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="col1" value="4" />
-      <param name="col2" value="1" />
-      <param name="datacol" value="6" />
-      <output name="out_file1" file="mergeCols.dat"/>
-    </test>
-  </tests>
-<help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**What it does**
-
-This tool merges columns together. Any number of valid columns can be merged in any order.
-
------
-
-**Example**
-
-Input dataset (five columns: c1, c2, c3, c4, and c5)::
-
-   1 10   1000  gene1 chr
-   2 100  1500  gene2 chr
-
-merging columns "**c5,c1**" will return::
-
-   1 10   1000  gene1 chr chr1
-   2 100  1500  gene2 chr chr2
-
-.. class:: warningmark
-
-Note that all original columns are preserved and the result of merge is added as the rightmost column.
-  </help>
-</tool>
--- a/tools/filters/pasteWrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-my $command = "";
-# a wrapper for paste for use in galaxy
-# pasteWrapper.pl [filename1] [filename2] [delimiter] [output]
-
-die "Check arguments" unless @ARGV == 4;
-
-if ($ARGV[2] eq 'T') {
-    $command = "paste $ARGV[0] $ARGV[1]";
-} elsif ($ARGV[2] eq 'C') {
-    $command = "paste -d \",\" $ARGV[0] $ARGV[1]";
-} elsif ($ARGV[2] eq 'D') {
-    $command = "paste -d \"-\" $ARGV[0] $ARGV[1]";
-} elsif ($ARGV[2] eq 'U') {
-    $command = "paste -d \"_\" $ARGV[0] $ARGV[1]";
-} elsif ($ARGV[2] eq 'P') {
-    $command = "paste -d \"|\" $ARGV[0] $ARGV[1]";
-} elsif ($ARGV[2] eq 'Dt') {
-    $command = "paste -d \".\" $ARGV[0] $ARGV[1]";
-} elsif ($ARGV[2] eq 'Sp') {
-    $command = "paste -d \" \" $ARGV[0] $ARGV[1]";
-}
-
-open (OUT, ">$ARGV[3]") or die "Cannot create $ARGV[2]:$!\n";
-open (PASTE, "$command |") or die "Cannot run paste:$!\n";
-
-while (<PASTE>) {
-    print OUT;
-}
-close OUT;
-close PASTE;
-
--- a/tools/filters/pasteWrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-<tool id="Paste1" name="Paste">
-  <description>two files side by side</description>
-  <command interpreter="perl">pasteWrapper.pl $input1 $input2 $delimiter $out_file1</command>
-  <inputs>
-<!--    <display>paste $input1 and $input2 using $delimiter as delimiter</display> -->
-    <param format="txt" name="input1" type="data" label="Paste"/>
-    <param format="txt" name="input2" type="data" label="and"/>
-    <param name="delimiter" type="select" label="Delimit by">
-      <option value="T">Tab</option>
-      <option value="Dt">Dot</option>
-      <option value="C">Comma</option>
-      <option value="D">Dash</option>
-      <option value="U">Underscore</option>
-      <option value="P">Pipe</option>
-      <option value="Sp">Space</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1">
-      <change_format>
-        <when input_dataset="input1" attribute="ext" value="bed" format="interval"/>
-      </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="input2" value="2.bed"/>
-      <param name="delimiter" value="T"/>
-      <output name="out_file1" file="eq-paste.dat"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-Paste preserves column assignments of the first dataset.
-
------
-
-**What it does**
-
-This tool merges two datasets side by side. If the first (left) dataset contains column assignments such as chromosome, start, end and strand, these will be preserved. However, if you would like to change column assignments, click the pencil icon in the history item.
-
------
-
-**Example**
-
-First dataset::
-
-    a 1
-    a 2
-    a 3
-
-Second dataset::
-
-    20
-    30
-    40
-
-Pasting them together will produce::
-
-    a 1 20
-    a 2 30
-    a 3 40
-
-</help>
-</tool>
--- a/tools/filters/randomlines.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#!/usr/bin/env python
-# Kanwei Li, 2010
-# Selects N random lines from a file and outputs to another file
-
-import random, sys
-
-def main():
-    infile = open(sys.argv[1], 'r')
-    total_lines = int(sys.argv[2])
-
-    if total_lines < 1:
-        sys.stderr.write( "Must select at least one line." )
-        sys.exit()
-
-    kept = []
-    n = 0
-    for line in infile:
-        line = line.rstrip("\n")
-        n += 1
-        if (n <= total_lines):
-            kept.append(line)
-        elif random.randint(1, n) <= total_lines:
-            kept.pop(random.randint(0, total_lines-1))
-            kept.append(line)
-
-    if n < total_lines:
-        sys.stderr.write( "Error: asked to select more lines than there were in the file." )
-        sys.exit()
-
-    open(sys.argv[3], 'w').write( "\n".join(kept) )
-
-if __name__ == "__main__":
-    main()
--- a/tools/filters/randomlines.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="random_lines1" name="Select random lines">
-  <description>from a file</description>
-  <command interpreter="python">randomlines.py $input $num_lines $out_file1</command>
-  <inputs>
-    <param name="num_lines" size="5" type="integer" value="1" label="Randomly select" help="lines"/>
-    <param format="txt" name="input" type="data" label="from"/>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="num_lines" value="65"/>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="1.bed"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool selects N random lines from a file, with no repeats, and preserving ordering.
-
------
-
-**Example**
-
-Input File::
-
-    chr7  56632  56652   D17003_CTCF_R6  310  +
-    chr7  56736  56756   D17003_CTCF_R7  354  +
-    chr7  56761  56781   D17003_CTCF_R4  220  +
-    chr7  56772  56792   D17003_CTCF_R7  372  +
-    chr7  56775  56795   D17003_CTCF_R4  207  +
-
-Selecting 2 random lines might return this::
-
-    chr7  56736  56756   D17003_CTCF_R7  354  +
-    chr7  56775  56795   D17003_CTCF_R4  207  +
-
-    </help>
-</tool>
--- a/tools/filters/remove_beginning.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-
-# Removes the specified number of lines from the beginning of the file.
-# remove_beginning.pl [input] [num_lines] [output]
-
-die "Check arguments" unless @ARGV == 3;
-
-my $inputfile = $ARGV[0];
-my $num_lines = $ARGV[1];
-my $outputfile = $ARGV[2];
-
-my $curCount=0;
-
-my $fhIn;
-open ($fhIn, "< $inputfile") or die "Cannot open source file";
-
-my $fhOut;
-open ($fhOut, "> $outputfile");
-
-while (<$fhIn>)
-{
-    $curCount++;
-    if ($curCount<=$num_lines)
-    {
-        next;
-    }
-    print $fhOut $_;
-}
-close ($fhIn) or die "Cannot close source file";
-close ($fhOut) or die "Cannot close output file";
--- a/tools/filters/remove_beginning.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="Remove beginning1" name="Remove beginning">
-  <description>of a file</description>
-  <command interpreter="perl">remove_beginning.pl $input $num_lines $out_file1</command>
-  <inputs>
-    <param name="num_lines" size="5" type="integer" value="1" label="Remove first" help="lines"/>
-    <param format="txt" name="input" type="data" label="from"/>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="num_lines" value="5"/>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="eq-removebeginning.dat"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool removes a specified number of lines from the beginning of a dataset.
-
------
-
-**Example**
-
-Input File::
-
-    chr7  56632  56652   D17003_CTCF_R6  310  +
-    chr7  56736  56756   D17003_CTCF_R7  354  +
-    chr7  56761  56781   D17003_CTCF_R4  220  +
-    chr7  56772  56792   D17003_CTCF_R7  372  +
-    chr7  56775  56795   D17003_CTCF_R4  207  +
-
-After removing the first 3 lines the dataset will look like this::
-
-    chr7  56772  56792   D17003_CTCF_R7  372  +
-    chr7  56775  56795   D17003_CTCF_R4  207  +
-
-</help>
-</tool>
--- a/tools/filters/sff_extract.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1505 +0,0 @@
-#!/usr/bin/python
-'''This software extracts the seq, qual and ancillary information from an sff
-file, like the ones used by the 454 sequencer.
-
-Optionally, it can also split paired-end reads if given the linker sequence.
-The splitting is done with maximum match, i.e., every occurence of the linker
-sequence will be removed, even if occuring multiple times.'''
-
-#copyright Jose Blanca and Bastien Chevreux
-#COMAV institute, Universidad Politecnica de Valencia (UPV)
-#Valencia, Spain
-
-# additions to handle paired end reads by Bastien Chevreux
-# bugfixes for linker specific lengths: Lionel Guy
-
-#This program is free software: you can redistribute it and/or modify
-#it under the terms of the GNU General Public License as published by
-#the Free Software Foundation, either version 3 of the License, or
-#(at your option) any later version.
-#This program is distributed in the hope that it will be useful,
-#but WITHOUT ANY WARRANTY; without even the implied warranty of
-#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#GNU General Public License for more details.
-#You should have received a copy of the GNU General Public License
-#along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-__author__ = 'Jose Blanca and Bastien Chevreux'
-__copyright__ = 'Copyright 2008, Jose Blanca, COMAV, and Bastien Chevreux'
-__license__ = 'GPLv3 or later'
-__version__ = '0.2.8'
-__email__ = 'jblanca@btc.upv.es'
-__status__ = 'beta'
-
-import struct
-import sys
-import os
-import subprocess
-import tempfile
-
-
-fake_sff_name = 'fake_sff_name'
-
-
-# readname as key: lines with matches from SSAHA, one best match
-ssahapematches = {}
-# linker readname as key: length of linker sequence
-linkerlengths = {}
-
-# set to true if something really fishy is going on with the sequences
-stern_warning = True
-
-def read_bin_fragment(struct_def, fileh, offset=0, data=None,
-                                                             byte_padding=None):
-    '''It reads a chunk of a binary file.
-
-    You have to provide the struct, a file object, the offset (where to start
-    reading).
-    Also you can provide an optional dict that will be populated with the
-    extracted data.
-    If a byte_padding is given the number of bytes read will be a multiple of
-    that number, adding the required pad at the end.
-    It returns the number of bytes reads and the data dict.
-    '''
-    if data is None:
-        data = {}
-
-    #we read each item
-    bytes_read = 0
-    for item in struct_def:
-        #we go to the place and read
-        fileh.seek(offset + bytes_read)
-        n_bytes = struct.calcsize(item[1])
-        buffer = fileh.read(n_bytes)
-        read = struct.unpack('>' + item[1], buffer)
-        if len(read) == 1:
-            read = read[0]
-        data[item[0]] = read
-        bytes_read += n_bytes
-
-    #if there is byte_padding the bytes_to_read should be a multiple of the
-    #byte_padding
-    if byte_padding is not None:
-        pad = byte_padding
-        bytes_read = ((bytes_read + pad - 1) // pad) * pad
-
-    return (bytes_read, data)
-
-
-def check_magic(magic):
-    '''It checks that the magic number of the file matches the sff magic.'''
-    if magic != 779314790:
-        raise RuntimeError('This file does not seems to be an sff file.')
-
-def check_version(version):
-    '''It checks that the version is supported, otherwise it raises an error.'''
-    supported = ('\x00', '\x00', '\x00', '\x01')
-    i = 0
-    for item in version:
-        if version[i] != supported[i]:
-            raise RuntimeError('SFF version not supported. Please contact the author of the software.')
-        i += 1
-
-def read_header(fileh):
-    '''It reads the header from the sff file and returns a dict with the
-    information'''
-    #first we read the first part of the header
-    head_struct = [
-        ('magic_number', 'I'),
-        ('version', 'cccc'),
-        ('index_offset', 'Q'),
-        ('index_length', 'I'),
-        ('number_of_reads', 'I'),
-        ('header_length', 'H'),
-        ('key_length', 'H'),
-        ('number_of_flows_per_read', 'H'),
-        ('flowgram_format_code', 'B'),
-    ]
-    data = {}
-    first_bytes, data = read_bin_fragment(struct_def=head_struct, fileh=fileh,
-                                                            offset=0, data=data)
-    check_magic(data['magic_number'])
-    check_version(data['version'])
-    #now that we know the number_of_flows_per_read and the key_length
-    #we can read the second part of the header
-    struct2 = [
-        ('flow_chars', str(data['number_of_flows_per_read']) + 'c'),
-        ('key_sequence', str(data['key_length']) + 'c')
-    ]
-    read_bin_fragment(struct_def=struct2, fileh=fileh, offset=first_bytes,
-                                                                      data=data)
-    return data
-
-
-def read_sequence(header, fileh, fposition):
-    '''It reads one read from the sff file located at the fposition and
-    returns a dict with the information.'''
-    header_length = header['header_length']
-    index_offset = header['index_offset']
-    index_length = header['index_length']
-
-    #the sequence struct
-    read_header_1 = [
-        ('read_header_length', 'H'),
-        ('name_length', 'H'),
-        ('number_of_bases', 'I'),
-        ('clip_qual_left', 'H'),
-        ('clip_qual_right', 'H'),
-        ('clip_adapter_left', 'H'),
-        ('clip_adapter_right', 'H'),
-    ]
-    def read_header_2(name_length):
-        '''It returns the struct definition for the second part of the header'''
-        return [('name', str(name_length) +'c')]
-    def read_data(number_of_bases):
-        '''It returns the struct definition for the read data section.'''
-        #size = {'c': 1, 'B':1, 'H':2, 'I':4, 'Q':8}
-        if header['flowgram_format_code'] == 1:
-            flow_type = 'H'
-        else:
-            raise Error('file version not supported')
-        number_of_bases = str(number_of_bases)
-        return [
-            ('flowgram_values', str(header['number_of_flows_per_read']) +
-                                                                     flow_type),
-            ('flow_index_per_base', number_of_bases + 'B'),
-            ('bases', number_of_bases + 'c'),
-            ('quality_scores', number_of_bases + 'B'),
-        ]
-
-    data = {}
-    #we read the first part of the header
-    bytes_read, data = read_bin_fragment(struct_def=read_header_1,
-                                    fileh=fileh, offset=fposition, data=data)
-
-    read_bin_fragment(struct_def=read_header_2(data['name_length']),
-                          fileh=fileh, offset=fposition + bytes_read, data=data)
-    #we join the letters of the name
-    data['name'] = ''.join(data['name'])
-    offset = data['read_header_length']
-    #we read the sequence and the quality
-    read_data_st = read_data(data['number_of_bases'])
-    bytes_read, data = read_bin_fragment(struct_def=read_data_st,
-                                    fileh=fileh, offset=fposition + offset,
-                                    data=data, byte_padding=8)
-    #we join the bases
-    data['bases'] = ''.join(data['bases'])
-
-    #print data
-    #print "pre cqr: ", data['clip_qual_right']
-    #print "pre car: ", data['clip_adapter_right']
-    #print "pre cql: ", data['clip_qual_left']
-    #print "pre cal: ", data['clip_adapter_left']
-
-    # correct for the case the right clip is <= than the left clip
-    # in this case, left clip is 0 are set to 0 (right clip == 0 means
-    #  "whole sequence")
-    if data['clip_qual_right'] <= data['clip_qual_left'] :
-        data['clip_qual_right'] = 0
-        data['clip_qual_left'] = 0
-    if data['clip_adapter_right'] <= data['clip_adapter_left'] :
-        data['clip_adapter_right'] = 0
-        data['clip_adapter_left'] = 0
-
-    #the clipping section follows the NCBI's guidelines Trace Archive RFC
-    #http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=rfc&m=doc&s=rfc
-    #if there's no adapter clip: qual -> vector
-    #else:  qual-> qual
-    #       adapter -> vector
-
-    if not data['clip_adapter_left']:
-        data['clip_adapter_left'], data['clip_qual_left'] = data['clip_qual_left'], data['clip_adapter_left']
-    if not data['clip_adapter_right']:
-        data['clip_adapter_right'], data['clip_qual_right'] = data['clip_qual_right'], data['clip_adapter_right']
-
-    # see whether we have to override the minimum left clips
-    if config['min_leftclip'] > 0:
-        if data['clip_adapter_left'] >0 and data['clip_adapter_left'] < config['min_leftclip']:
-            data['clip_adapter_left'] = config['min_leftclip']
-        if data['clip_qual_left'] >0 and data['clip_qual_left'] < config['min_leftclip']:
-            data['clip_qual_left'] = config['min_leftclip']
-
-
-    #print "post cqr: ", data['clip_qual_right']
-    #print "post car: ", data['clip_adapter_right']
-    #print "post cql: ", data['clip_qual_left']
-    #print "post cal: ", data['clip_adapter_left']
-
-
-    # for handling the -c (clip) option gently, we already clip here
-    #  and set all clip points to the sequence end points
-    if config['clip']:
-        data['bases'], data['quality_scores'] = clip_read(data)
-
-        data['number_of_bases']=len(data['bases'])
-        data['clip_qual_right'] = data['number_of_bases']
-        data['clip_adapter_right'] = data['number_of_bases']
-        data['clip_qual_left'] = 0
-        data['clip_adapter_left'] = 0
-
-    return data['read_header_length'] + bytes_read, data
-
-
-def sequences(fileh, header):
-    '''It returns a generator with the data for each read.'''
-    #now we can read all the sequences
-    fposition = header['header_length']    #position in the file
-    reads_read = 0
-    while True:
-        if fposition == header['index_offset']:
-            #we have to skip the index section
-            fposition += index_length
-            continue
-        else:
-            bytes_read, seq_data = read_sequence(header=header, fileh=fileh,
-                                                            fposition=fposition)
-            yield seq_data
-            fposition += bytes_read
-            reads_read += 1
-            if reads_read >= header['number_of_reads']:
-                break
-
-
-def remove_last_xmltag_in_file(fname, tag=None):
-    '''Given an xml file name and a tag, it removes the last tag of the
-    file if it matches the given tag. Tag removal is performed via file
-    truncation.
-
-    It the given tag is not the last in the file, a RunTimeError will be
-    raised.
-
-    The resulting xml file will be not xml valid. This function is a hack
-    that allows to append records to xml files in a quick and dirty way.
-    '''
-
-    fh = open(fname, 'r+')
-    #we have to read from the end to the start of the file and keep the
-    #string enclosed by </ >
-    i = -1
-    last_tag = []   #the chars that form the last tag
-    start_offset = None     #in which byte does the last tag starts?
-    end_offset = None     #in which byte does the last tag ends?
-    while True:
-        fh.seek(i, 2)
-        char = fh.read(1)
-        if not char.isspace():
-            last_tag.append(char)
-        if char == '>':
-            end_offset = i
-        if char == '<':
-            start_offset = i
-            break
-        i -= 1
-
-    #we have read the last tag backwards
-    last_tag = ''.join(last_tag[::-1])
-    #we remove the </ and >
-    last_tag = last_tag.rstrip('>').lstrip('</')
-
-    #we check that we're removing the asked tag
-    if tag is not None and tag != last_tag:
-        raise RuntimeError("The given xml tag wasn't the last one in the file")
-
-    # while we are at it: also remove all white spaces in that line :-)
-    i -= 1
-    while True:
-        fh.seek(i, 2)
-        char = fh.read(1)
-        if not char == ' ' and not char == '\t':
-            break;
-        if fh.tell() == 1:
-            break;
-        i -= 1
-
-    fh.truncate();
-
-    fh.close()
-    return last_tag
-
-
-def create_basic_xml_info(readname, fname):
-    '''Formats a number of read specific infos into XML format.
-    Currently formated: name and the tags set from command line
-    '''
-    to_print = ['    <trace>\n']
-    to_print.append('        <trace_name>')
-    to_print.append(readname)
-    to_print.append('</trace_name>\n')
-
-    #extra information
-    #do we have extra info for this file?
-    info = None
-    if config['xml_info']:
-        #with this name?
-        if fname in config['xml_info']:
-            info = config['xml_info'][fname]
-        else:
-        #with no name?
-            try:
-                info = config['xml_info'][fake_sff_name]
-            except KeyError:
-                pass
-    #we print the info that we have
-    if info:
-        for key in info:
-            to_print.append('        <' + key + '>' + info[key] + \
-                            '</' + key +'>\n')
-
-    return ''.join(to_print)
-
-
-def create_clip_xml_info(readlen, adapl, adapr, quall, qualr):
-    '''Takes the clip values of the read and formats them into XML
-    Corrects "wrong" values that might have resulted through
-    simplified calculations earlier in the process of conversion
-    (especially during splitting of paired-end reads)
-    '''
-
-    to_print = [""]
-
-    # if right borders are >= to read length, they don't need
-    #  to be printed
-    if adapr >= readlen:
-        adapr = 0
-    if qualr >= readlen:
-        qualr = 0
-
-    # BaCh
-    # when called via split_paired_end(), some values may be < 0
-    #  (when clip values were 0 previously)
-    # instead of putting tons of if clauses for different calculations there,
-    #  I centralise corrective measure here
-    # set all values <0 to 0
-
-    if adapr < 0:
-        adapr = 0
-    if qualr <0:
-        qualr = 0
-    if adapl < 0:
-        adapl = 0
-    if quall <0:
-        quall = 0
-
-    if quall:
-        to_print.append('        <clip_quality_left>')
-        to_print.append(str(quall))
-        to_print.append('</clip_quality_left>\n')
-    if qualr:
-        to_print.append('        <clip_quality_right>')
-        to_print.append(str(qualr))
-        to_print.append('</clip_quality_right>\n')
-    if adapl:
-        to_print.append('        <clip_vector_left>')
-        to_print.append(str(adapl))
-        to_print.append('</clip_vector_left>\n')
-    if adapr:
-        to_print.append('        <clip_vector_right>')
-        to_print.append(str(adapr))
-        to_print.append('</clip_vector_right>\n')
-    return ''.join(to_print)
-
-
-def create_xml_for_unpaired_read(data, fname):
-    '''Given the data for one read it returns an str with the xml ancillary
-    data.'''
-    to_print = [create_basic_xml_info(data['name'],fname)]
-    #clippings in the XML only if we do not hard clip
-    if not config['clip']:
-        to_print.append(create_clip_xml_info(data['number_of_bases'],data['clip_adapter_left'], data['clip_adapter_right'], data['clip_qual_left'], data['clip_qual_right']));
-    to_print.append('    </trace>\n')
-    return ''.join(to_print)
-
-
-def format_as_fasta(name,seq,qual):
-    name_line = ''.join(('>', name,'\n'))
-    seqstring = ''.join((name_line, seq, '\n'))
-    qual_line = ' '.join([str(q) for q in qual])
-    qualstring = ''.join((name_line, qual_line, '\n'))
-    return seqstring, qualstring
-
-def format_as_fastq(name,seq,qual):
-    qual_line = ''.join([chr(q+33) for q in qual])
-    #seqstring = ''.join(('@', name,'\n', seq, '\n+', name,'\n', qual_line, '\n'))
-    seqstring = ''.join(('@', name,'\n', seq, '\n+\n', qual_line, '\n'))
-    return seqstring
-
-
-def get_read_data(data):
-    '''Given the data for one read it returns 2 strs with the fasta seq
-    and fasta qual.'''
-    #seq and qual
-    if config['mix_case']:
-        seq = sequence_case(data)
-        qual = data['quality_scores']
-    else :
-        seq = data['bases']
-        qual = data['quality_scores']
-
-    return seq, qual
-
-def extract_read_info(data, fname):
-    '''Given the data for one read it returns 3 strs with the fasta seq, fasta
-    qual and xml ancillary data.'''
-
-    seq,qual = get_read_data(data)
-    seqstring, qualstring = format_as_fasta(data['name'],seq,qual)
-
-    #name_line = ''.join(('>', data['name'],'\n'))
-    #seq = ''.join((name_line, seq, '\n'))
-    #qual_line = ' '.join([str(q) for q in qual])
-    #qual = ''.join((name_line, qual_line, '\n'))
-
-    xmlstring = create_xml_for_unpaired_read(data, fname)
-
-    return seqstring, qualstring, xmlstring
-
-def write_sequence(name,seq,qual,seq_fh,qual_fh):
-    '''Write sequence and quality FASTA and FASTA qual filehandles
-    (or into FASTQ and XML)
-    if sequence length is 0, don't write'''
-
-    if len(seq) == 0 : return
-
-    if qual_fh is None:
-        seq_fh.write(format_as_fastq(name,seq,qual))
-    else:
-        seqstring, qualstring = format_as_fasta(name,seq,qual)
-        seq_fh.write(seqstring)
-        qual_fh.write(qualstring)
-    return
-
-def write_unpaired_read(data, sff_fh, seq_fh, qual_fh, xml_fh):
-    '''Writes an unpaired read into FASTA, FASTA qual and XML filehandles
-    (or into FASTQ and XML)
-    if sequence length is 0, don't write'''
-
-    seq,qual = get_read_data(data)
-    if len(seq) == 0 : return
-
-    write_sequence(data['name'],seq,qual,seq_fh,qual_fh)
-
-    anci = create_xml_for_unpaired_read(data, sff_fh.name)
-    if anci is not None:
-        xml_fh.write(anci)
-    return
-
-
-def reverse_complement(seq):
-    '''Returns the reverse complement of a DNA sequence as string'''
-
-    compdict = {
-        'a': 't',
-        'c': 'g',
-        'g': 'c',
-        't': 'a',
-        'u': 't',
-        'm': 'k',
-        'r': 'y',
-        'w': 'w',
-        's': 's',
-        'y': 'r',
-        'k': 'm',
-        'v': 'b',
-        'h': 'd',
-        'd': 'h',
-        'b': 'v',
-        'x': 'x',
-        'n': 'n',
-        'A': 'T',
-        'C': 'G',
-        'G': 'C',
-        'T': 'A',
-        'U': 'T',
-        'M': 'K',
-        'R': 'Y',
-        'W': 'W',
-        'S': 'S',
-        'Y': 'R',
-        'K': 'M',
-        'V': 'B',
-        'H': 'D',
-        'D': 'H',
-        'B': 'V',
-        'X': 'X',
-        'N': 'N',
-        '*': '*'
-        }
-
-    complseq = ''.join([compdict[base] for base in seq])
-    # python hack to reverse a list/string/etc
-    complseq = complseq[::-1]
-    return complseq
-
-
-def mask_sequence(seq, maskchar, fpos, tpos):
-    '''Given a sequence, mask it with maskchar starting at fpos (including) and
-    ending at tpos (excluding)
-    '''
-
-    if len(maskchar) > 1:
-        raise RuntimeError("Internal error: more than one character given to mask_sequence")
-    if fpos<0:
-        fpos = 0
-    if tpos > len(seq):
-        tpos = len(seq)
-
-    newseq = ''.join((seq[:fpos],maskchar*(tpos-fpos), seq[tpos:]))
-
-    return newseq
-
-
-def fragment_sequences(sequence, qualities, splitchar):
-    '''Works like split() on strings, except it does this on a sequence
-    and the corresponding list with quality values.
-    Returns a tuple for each fragment, each sublist has the fragment
-    sequence as first and the fragment qualities as second elemnt'''
-
-    # this is slow (due to zip and list appends... use an iterator over
-    #  the sequence find find variations and splices on seq and qual
-
-    if len(sequence) != len(qualities):
-        print sequence, qualities
-        raise RuntimeError("Internal error: length of sequence and qualities don't match???")
-
-    retlist = ([])
-    if len(sequence) == 0:
-        return retlist
-
-    actseq = ([])
-    actqual = ([])
-    if sequence[0] != splitchar:
-        inseq = True
-    else:
-        inseq = False
-    for char,qual in zip(sequence,qualities):
-        if inseq:
-            if char != splitchar:
-                actseq.append(char)
-                actqual.append(qual)
-            else:
-                retlist.append((''.join(actseq), actqual))
-                actseq = ([])
-                actqual = ([])
-                inseq = False
-        else:
-            if char != splitchar:
-                inseq = True
-                actseq.append(char)
-                actqual.append(qual)
-
-    if inseq and len(actseq):
-        retlist.append((''.join(actseq), actqual))
-
-    return retlist
-
-
-def calc_subseq_boundaries(maskedseq, maskchar):
-    '''E.g.:
-       ........xxxxxxxx..........xxxxxxxxxxxxxxxxxxxxx.........
-       to
-         (0,8),(8,16),(16,26),(26,47),(47,56)
-    '''
-
-    blist = ([])
-    if len(maskedseq) == 0:
-        return blist
-
-    inmask = True
-    if maskedseq[0] != maskchar:
-        inmask = False
-
-    start = 0
-    for spos in range(len(maskedseq)):
-        if inmask and maskedseq[spos] != maskchar:
-            blist.append(([start,spos]))
-            start = spos
-            inmask = False
-        elif not inmask and maskedseq[spos] == maskchar:
-            blist.append(([start,spos]))
-            start = spos
-            inmask = True
-
-    blist.append(([start,spos+1]))
-
-    return blist
-
-
-def correct_for_smallhits(maskedseq, maskchar, linkername):
-    '''If partial hits were found, take preventive measure: grow
-        the masked areas by 20 bases in each direction
-       Returns either unchanged "maskedseq" or a new sequence
-        with some more characters masked.
-    '''
-    global linkerlengths
-
-    CEBUG = 0
-
-    if CEBUG : print "correct_for_smallhits"
-    if CEBUG : print "Masked seq\n", maskedseq
-    if CEBUG : print "Linkername: ", linkername
-
-    if len(maskedseq) == 0:
-        return maskedseq
-
-    growl=40
-    growl2=growl/2
-
-    boundaries = calc_subseq_boundaries(maskedseq,maskchar)
-    if CEBUG : print "Boundaries: ", boundaries
-
-    foundpartial = False
-    for bounds in boundaries:
-        if CEBUG : print "\tbounds: ", bounds
-        left, right = bounds
-        if left != 0 and right != len(maskedseq):
-            if maskedseq[left] == maskchar:
-                # allow 10% discrepancy
-                #    -linkerlengths[linkername]/10
-                # that's a kind of safety net if there are slight sequencing
-                #  errors in the linker itself
-                if right-left < linkerlengths[linkername]-linkerlengths[linkername]/10:
-                    if CEBUG : print "\t\tPartial: found " + str(right-left) + " gaps, " + linkername + " is " + str(linkerlengths[linkername]) + " nt long."
-                    foundpartial = True
-
-    if not foundpartial:
-        return maskedseq
-
-    # grow
-    newseq = ""
-    for bounds in boundaries:
-        if CEBUG : print "Bounds: ", bounds
-        left, right = bounds
-        if maskedseq[left] == maskchar:
-            newseq += maskedseq[left:right]
-        else:
-            clearstart = 0
-            if left > 0 :
-                clearstart = left+growl2
-            clearstop = len(maskedseq)
-            if right < len(maskedseq):
-                clearstop = right-growl2
-
-            if CEBUG : print "clearstart, clearstop: ",clearstart, clearstop
-
-            if clearstop <= clearstart:
-                newseq += maskchar * (right-left)
-            else:
-                if clearstart != left:
-                    newseq += maskchar * growl2
-                newseq += maskedseq[clearstart:clearstop]
-                if clearstop != right:
-                    newseq += maskchar * growl2
-
-        #print "newseq\n",newseq
-
-    return newseq
-
-
-def split_paired_end(data, sff_fh, seq_fh, qual_fh, xml_fh):
-    '''Splits a paired end read and writes sequences into FASTA, FASTA qual
-    and XML traceinfo file. Returns the number of sequences created.
-
-    As the linker sequence may be anywhere in the read, including the ends
-    and overlapping with bad quality sequence, we need to perform some
-    computing and eventually set new clip points.
-
-    If the resulting split yields only one sequence (because linker
-    was not present or overlapping with left or right clip), only one
-    sequence will be written with ".fn" appended to the name.
-
-    If the read can be split, two reads will be written. The side left of
-    the linker will be named ".r" and will be written in reverse complement
-    into the file to conform with what approximately all assemblers expect
-    when reading paired-end data: reads in forward direction in file. The side
-    right of the linker will be named ".f"
-
-    If SSAHA found partial linker (linker sequences < length of linker),
-    the sequences will get a "_pl" furthermore be cut back thoroughly.
-
-    If SSAHA found multiple occurences of the linker, the names will get an
-    additional "_mlc" within the name to show that there was "multiple
-    linker contamination".
-
-    For multiple or partial linker, the "good" parts of the reads are
-    stored with a ".part<number>" name, additionally they will not get
-    template information in the XML
-    '''
-
-    global ssahapematches
-
-    CEBUG = 0
-
-    maskchar = "#"
-
-    if CEBUG : print "Need to split: " + data['name']
-
-    numseqs = 0;
-    readname = data['name']
-    readlen = data['number_of_bases']
-
-    leftclip, rightclip = return_merged_clips(data)
-    seq, qual = get_read_data(data)
-
-    if CEBUG : print "Original read:\n",seq
-
-    maskedseq = seq
-    if leftclip > 0:
-        maskedseq = mask_sequence(maskedseq, maskchar, 0, leftclip-1)
-    if rightclip < len(maskedseq):
-        maskedseq = mask_sequence(maskedseq, maskchar, rightclip, len(maskedseq))
-
-    leftclip, rightclip = return_merged_clips(data)
-    readlen = data['number_of_bases']
-
-    if CEBUG : print "Readname:", readname
-    if CEBUG : print "Readlen:", readlen
-    if CEBUG : print "Num matches:", str(len(ssahapematches[data['name']]))
-    if CEBUG : print "matches:", ssahapematches[data['name']]
-
-    for match in ssahapematches[data['name']]:
-        score = int(match[0])
-        linkername = match[2]
-        leftreadhit = int(match[3])
-        rightreadhit = int(match[4])
-        #leftlinkerhit = int(match[5])
-        #rightlinkerhit = int(match[6])
-        #direction = match[7]
-        #hitlen = int(match[8])
-        #hitidentity = float(match[9])
-
-        if CEBUG : print match
-        if CEBUG : print "Match with score:", score
-        if CEBUG : print "Read before:\n", maskedseq
-        maskedseq = mask_sequence(maskedseq, maskchar, leftreadhit-1, rightreadhit)
-        if CEBUG : print "Masked seq:\n", maskedseq
-
-    correctedseq = correct_for_smallhits(maskedseq, maskchar, linkername)
-
-    if len(maskedseq) != len(correctedseq):
-        raise RuntimeError("Internal error: maskedseq != correctedseq")
-
-    partialhits = False
-    if correctedseq != maskedseq:
-        if CEBUG : print "Partial hits in", readname
-        if CEBUG : print "Original seq:\n", seq
-        if CEBUG : print "Masked seq:\n", maskedseq
-        if CEBUG : print "Corrected seq\n", correctedseq
-        partialhits = True
-        readname += "_pl"
-        maskedseq = correctedseq
-
-    fragments = fragment_sequences(maskedseq, qual, maskchar)
-
-    if CEBUG : print "Fragments (", len(fragments), "): ", fragments
-
-    mlcflag = False
-    #if len(ssahapematches[data['name']]) > 1:
-    #    #print "Multi linker contamination"
-    #    mlcflag = True
-    #    readname += "_mlc"
-
-    if len(fragments) > 2:
-        if CEBUG : print "Multi linker contamination"
-        mlcflag = True
-        readname += "_mlc"
-
-
-    #print fragments
-    if mlcflag or partialhits:
-        fragcounter = 1
-        readname += ".part"
-        for frag in fragments:
-            actseq = frag[0]
-            if len(actseq) >= 20:
-                actqual = frag[1]
-                oname = readname + str(fragcounter)
-                #seq_fh.write(">"+oname+"\n")
-                #seq_fh.write(actseq+"\n")
-                #qual_fh.write(">"+oname+"\n")
-                #qual_fh.write(' '.join((str(q) for q in actqual)))
-                #qual_fh.write("\n")
-                write_sequence(oname,actseq,actqual,seq_fh,qual_fh)
-                to_print = [create_basic_xml_info(oname,sff_fh.name)]
-                # No clipping in XML ... the multiple and partial fragments
-                #  are clipped "hard"
-                # No template ID and trace_end: we don't know the
-                #  orientation of the frahments. Even if it were
-                #  only two, the fact we had multiple linkers
-                #  says something went wrong, so simply do not
-                #  write any paired-end information for all these fragments
-                to_print.append('    </trace>\n')
-                xml_fh.write(''.join(to_print))
-                numseqs += 1
-                fragcounter += 1
-    else:
-        if len(fragments) >2:
-            raise RuntimeError("Unexpected: more than two fragments detected in " + readname + ". please contact the authors.")
-        # nothing will happen for 0 fragments
-        if len(fragments) == 1:
-            #print "Tada1"
-            boundaries = calc_subseq_boundaries(maskedseq,maskchar)
-            if len(boundaries) < 1 or len(boundaries) >3:
-                raise RuntimeError("Unexpected case: ", str(len(boundaries)), "boundaries for 1 fragment of ", readname)
-            if len(boundaries) == 3:
-                # case: mask char on both sides of sequence
-                #print "bounds3"
-                data['clip_adapter_left']=1+boundaries[0][1]
-                data['clip_adapter_right']=boundaries[2][0]
-            elif len(boundaries) == 2:
-                # case: mask char left or right of sequence
-                #print "bounds2",
-                if maskedseq[0] == maskchar :
-                    # case: mask char left
-                    #print "left"
-                    data['clip_adapter_left']=1+boundaries[0][1]
-                else:
-                    # case: mask char right
-                    #print "right"
-                    data['clip_adapter_right']=boundaries[1][0]
-            data['name'] = data['name'] + ".fn"
-            write_unpaired_read(data, sff_fh, seq_fh, qual_fh, xml_fh)
-            numseqs = 1
-        elif len(fragments) == 2:
-            #print "Tada2"
-            oname = readname + ".r"
-            seq, qual = get_read_data(data)
-
-            startsearch = False
-            for spos in range(len(maskedseq)):
-                if maskedseq[spos] != maskchar:
-                    startsearch = True;
-                else:
-                    if startsearch:
-                        break
-
-            #print "\nspos: ", spos
-            lseq=seq[:spos]
-            #print "lseq:", lseq
-            actseq = reverse_complement(lseq)
-            lreadlen = len(actseq)
-            lqual = qual[:spos];
-            # python hack to reverse a list/string/etc
-            lqual = lqual[::-1];
-
-            #seq_fh.write(">"+oname+"\n")
-            #seq_fh.write(actseq+"\n")
-            #qual_fh.write(">"+oname+"\n")
-            #qual_fh.write(' '.join((str(q) for q in lqual)))
-            #qual_fh.write("\n")
-
-            write_sequence(oname,actseq,lqual,seq_fh,qual_fh)
-
-            to_print = [create_basic_xml_info(oname,sff_fh.name)]
-            to_print.append(create_clip_xml_info(lreadlen, 0, lreadlen+1-data['clip_adapter_left'], 0, lreadlen+1-data['clip_qual_left']));
-            to_print.append('        <template_id>')
-            to_print.append(readname)
-            to_print.append('</template_id>\n')
-            to_print.append('        <trace_end>r</trace_end>\n')
-            to_print.append('    </trace>\n')
-            xml_fh.write(''.join(to_print))
-
-            oname = readname + ".f"
-            startsearch = False
-            for spos in range(len(maskedseq)-1,-1,-1):
-                if maskedseq[spos] != maskchar:
-                    startsearch = True;
-                else:
-                    if startsearch:
-                        break
-
-            actseq = seq[spos+1:]
-            actqual = qual[spos+1:];
-
-            #print "\nspos: ", spos
-            #print "rseq:", actseq
-
-            #seq_fh.write(">"+oname+"\n")
-            #seq_fh.write(actseq+"\n")
-            #qual_fh.write(">"+oname+"\n")
-            #qual_fh.write(' '.join((str(q) for q in actqual)))
-            #qual_fh.write("\n")
-            write_sequence(oname,actseq,actqual,seq_fh,qual_fh)
-
-            rreadlen = len(actseq)
-            to_print = [create_basic_xml_info(oname,sff_fh.name)]
-            to_print.append(create_clip_xml_info(rreadlen, 0, rreadlen-(readlen-data['clip_adapter_right']), 0, rreadlen-(readlen-data['clip_qual_right'])));
-            to_print.append('        <template_id>')
-            to_print.append(readname)
-            to_print.append('</template_id>\n')
-            to_print.append('        <trace_end>f</trace_end>\n')
-            to_print.append('    </trace>\n')
-            xml_fh.write(''.join(to_print))
-            numseqs = 2
-
-    return numseqs
-
-
-
-def extract_reads_from_sff(config, sff_files):
-    '''Given the configuration and the list of sff_files it writes the seqs,
-    qualities and ancillary data into the output file(s).
-
-    If file for paired-end linker was given, first extracts all sequences
-    of an SFF and searches these against the linker(s) with SSAHA2 to
-    create needed information to split reads.
-    '''
-
-    global ssahapematches
-
-
-    if len(sff_files) == 0 :
-        raise RuntimeError("No SFF file given?")
-
-    #we go through all input files
-    for sff_file in sff_files:
-        if not os.path.getsize(sff_file):
-            raise RuntimeError('Empty file? : ' + sff_file)
-        fh = open(sff_file, 'r')
-        fh.close()
-
-    openmode = 'w'
-    if config['append']:
-        openmode = 'a'
-
-    seq_fh = open(config['seq_fname'], openmode)
-    xml_fh = open(config['xml_fname'], openmode)
-    if config['want_fastq']:
-        qual_fh = None
-        try:
-            os.remove(config['qual_fname'])
-        except :
-            python_formattingwithoutbracesisdumb_dummy = 1
-    else:
-        qual_fh = open(config['qual_fname'], openmode)
-
-    if not config['append']:
-        xml_fh.write('<?xml version="1.0"?>\n<trace_volume>\n')
-    else:
-        remove_last_xmltag_in_file(config['xml_fname'], "trace_volume")
-
-    #we go through all input files
-    for sff_file in sff_files:
-        #print "Working on '" + sff_file + "':"
-        ssahapematches.clear()
-
-        seqcheckstore = ([])
-
-        debug = 0
-
-        if not debug and config['pelinker_fname']:
-            #print "Creating temporary sequences from reads in '" + sff_file + "' ... ",
-            sys.stdout.flush()
-
-            if 0 :
-                # for debugging
-                pid = os.getpid()
-                tmpfasta_fname = 'sffe.tmp.'+ str(pid)+'.fasta'
-                tmpfasta_fh = open(tmpfasta_fname, 'w')
-            else:
-                tmpfasta_fh = tempfile.NamedTemporaryFile(prefix = 'sffeseqs_',
-                                                          suffix = '.fasta')
-
-            sff_fh = open(sff_file, 'rb')
-            header_data = read_header(fileh=sff_fh)
-            for seq_data in sequences(fileh=sff_fh, header=header_data):
-                seq,qual = get_read_data(seq_data)
-                seqstring, qualstring = format_as_fasta(seq_data['name'],seq,qual)
-                tmpfasta_fh.write(seqstring)
-                #seq, qual, anci = extract_read_info(seq_data, sff_fh.name)
-                #tmpfasta_fh.write(seq)
-            #print "done."
-            tmpfasta_fh.seek(0)
-
-            if 0 :
-                # for debugging
-                tmpssaha_fname = 'sffe.tmp.'+str(pid)+'.ssaha2'
-                tmpssaha_fh = open(tmpssaha_fname, 'w+')
-            else:
-                tmpssaha_fh = tempfile.NamedTemporaryFile(prefix = 'sffealig_',
-                                                          suffix = '.ssaha2')
-
-            launch_ssaha(config['pelinker_fname'], tmpfasta_fh.name, tmpssaha_fh)
-            tmpfasta_fh.close()
-
-            tmpssaha_fh.seek(0)
-            read_ssaha_data(tmpssaha_fh)
-            tmpssaha_fh.close()
-
-        if debug:
-            tmpssaha_fh = open("sffe.tmp.10634.ssaha2", 'r')
-            read_ssaha_data(tmpssaha_fh)
-
-        #print "Converting '" + sff_file + "' ... ",
-        sys.stdout.flush()
-        sff_fh = open(sff_file, 'rb')
-        #read_header(infile)
-        header_data = read_header(fileh=sff_fh)
-
-        #now convert all reads
-        nseqs_sff = 0
-        nseqs_out = 0
-        for seq_data in sequences(fileh=sff_fh, header=header_data):
-            nseqs_sff += 1
-
-            seq, qual = clip_read(seq_data)
-            seqcheckstore.append(seq[0:50])
-
-            #if nseqs_sff >1000:
-            #    check_for_dubious_startseq(seqcheckstore,sff_file,seq_data)
-            #    sys.exit()
-
-            if ssahapematches.has_key(seq_data['name']):
-                #print "Paired end:",seq_data['name']
-                nseqs_out += split_paired_end(seq_data, sff_fh, seq_fh, qual_fh, xml_fh)
-            else:
-                #print "Normal:",seq_data['name']
-                if config['pelinker_fname']:
-                    seq_data['name'] = seq_data['name'] + ".fn"
-                write_unpaired_read(seq_data, sff_fh, seq_fh, qual_fh, xml_fh)
-                nseqs_out += 1
-        #print "done."
-        #print 'Converted', str(nseqs_sff), 'reads into', str(nseqs_out), 'sequences.'
-        sff_fh.close()
-
-        check_for_dubious_startseq(seqcheckstore,sff_file,seq_data)
-        seqcheckstore = ([])
-
-    xml_fh.write('</trace_volume>\n')
-
-    xml_fh.close()
-    seq_fh.close()
-    if qual_fh is not None:
-        qual_fh.close()
-
-    return
-
-def check_for_dubious_startseq(seqcheckstore, sffname,seqdata):
-
-    global stern_warning
-
-    foundproblem = ""
-    for checklen in range(1,len(seqcheckstore[0])):
-        foundinloop = False
-        seqdict = {}
-        for seq in seqcheckstore:
-            shortseq = seq[0:checklen]
-            if shortseq in seqdict:
-                seqdict[shortseq] += 1
-            else:
-                seqdict[shortseq] = 1
-
-        for shortseq, count in seqdict.items():
-            if float(count)/len(seqcheckstore) >= 0.5:
-                foundinloop = True
-                stern_warning
-                foundproblem = "\n"+"*" * 80
-                foundproblem += "\nWARNING: "
-                foundproblem += "weird sequences in file " + sffname + "\n\n"
-                foundproblem += "After applying left clips, " + str(count) + " sequences (="
-                foundproblem += '%.0f'%(100.0*float(count)/len(seqcheckstore))
-                foundproblem += "%) start with these bases:\n" + shortseq
-                foundproblem += "\n\nThis does not look sane.\n\n"
-                foundproblem += "Countermeasures you *probably* must take:\n"
-                foundproblem += "1) Make your sequence provider aware of that problem and ask whether this can be\n    corrected in the SFF.\n"
-                foundproblem += "2) If you decide that this is not normal and your sequence provider does not\n    react, use the --min_left_clip of sff_extract.\n"
-                left,right = return_merged_clips(seqdata)
-                foundproblem += "    (Probably '--min_left_clip="+ str(left+len(shortseq))+"' but you should cross-check that)\n"
-                foundproblem += "*" * 80 + "\n"
-        if not foundinloop :
-            break
-    if len(foundproblem):
-        print foundproblem
-
-
-def parse_extra_info(info):
-    '''It parses the information that will go in the xml file.
-
-    There are two formats accepted for the extra information:
-    key1:value1, key2:value2
-    or:
-    file1.sff{key1:value1, key2:value2};file2.sff{key3:value3}
-    '''
-    if not info:
-        return info
-    finfos = info.split(';')    #information for each file
-    data_for_files = {}
-    for finfo in finfos:
-        #we split the file name from the rest
-        items = finfo.split('{')
-        if len(items) == 1:
-            fname = fake_sff_name
-            info = items[0]
-        else:
-            fname = items[0]
-            info = items[1]
-        #now we get each key,value pair in the info
-        info = info.replace('}', '')
-        data = {}
-        for item in info.split(','):
-            key, value = item.strip().split(':')
-            key = key.strip()
-            value = value.strip()
-            data[key] = value
-        data_for_files[fname] = data
-    return data_for_files
-
-
-def return_merged_clips(data):
-    '''It returns the left and right positions to clip.'''
-    def max(a, b):
-        '''It returns the max of the two given numbers.
-
-        It won't take into account the zero values.
-        '''
-        if not a and not b:
-            return None
-        if not a:
-            return b
-        if not b:
-            return a
-        if a >= b:
-            return a
-        else:
-            return b
-    def min(a, b):
-        '''It returns the min of the two given numbers.
-
-        It won't take into account the zero values.
-        '''
-        if not a and not b:
-            return None
-        if not a:
-            return b
-        if not b:
-            return a
-        if a <= b:
-            return a
-        else:
-            return b
-    left = max(data['clip_adapter_left'], data['clip_qual_left'])
-    right = min(data['clip_adapter_right'], data['clip_qual_right'])
-    #maybe both clips where zero
-    if left is None:
-        left = 1
-    if right is None:
-        right = data['number_of_bases']
-    return left, right
-
-def sequence_case(data):
-    '''Given the data for one read it returns the seq with mixed case.
-
-    The regions to be clipped will be lower case and the rest upper case.
-    '''
-    left, right = return_merged_clips(data)
-    seq = data['bases']
-    new_seq = ''.join((seq[:left-1].lower(), seq[left-1:right], seq[right:].lower()))
-    return new_seq
-
-def clip_read(data):
-    '''Given the data for one read it returns clipped seq and qual.'''
-
-    qual = data['quality_scores']
-    left, right = return_merged_clips(data)
-    seq = data['bases']
-    qual = data['quality_scores']
-    new_seq = seq[left-1:right]
-    new_qual = qual[left-1:right]
-
-    return new_seq, new_qual
-
-
-
-def tests_for_ssaha(linker_fname):
-    '''Tests whether SSAHA2 can be successfully called.'''
-
-    try:
-        print "Testing whether SSAHA2 is installed and can be launched ... ",
-        sys.stdout.flush()
-        fh = open('/dev/null', 'w')
-        retcode = subprocess.call(["ssaha2", "-v"], stdout = fh)
-        fh.close()
-        print "ok."
-    except :
-        print "nope? Uh oh ...\n\n"
-        raise RuntimeError('Could not launch ssaha2. Have you installed it? Is it in your path?')
-
-
-def load_linker_sequences(linker_fname):
-    '''Loads all linker sequences into memory, storing only the length
-    of each linker.'''
-
-    global linkerlengths
-
-    if not os.path.getsize(linker_fname):
-        raise RuntimeError("File empty? '" + linker_fname + "'")
-    fh = open(linker_fname, 'r')
-    linkerseqs = read_fasta(fh)
-    if len(linkerseqs) == 0:
-        raise RuntimeError(linker_fname + ": no sequence found?")
-    for i in linkerseqs:
-        if linkerlengths.has_key(i.name):
-            raise RuntimeError(linker_fname + ": sequence '" + i.name + "' present multiple times. Aborting.")
-        linkerlengths[i.name] = len(i.sequence)
-    fh.close()
-
-
-def launch_ssaha(linker_fname, query_fname, output_fh):
-    '''Launches SSAHA2 on the linker and query file, string SSAHA2 output
-    into the output filehandle'''
-
-    try:
-        print "Searching linker sequences with SSAHA2 (this may take a while) ... ",
-        sys.stdout.flush()
-        retcode = subprocess.call(["ssaha2", "-output", "ssaha2", "-solexa", "-kmer", "4", "-skip", "1", linker_fname, query_fname], stdout = output_fh)
-        if retcode:
-            raise RuntimeError('Ups.')
-        else:
-            print "ok."
-    except:
-        print "\n"
-        raise RuntimeError('An error occured during the SSAHA2 execution, aborting.')
-
-def read_ssaha_data(ssahadata_fh):
-    '''Given file handle, reads file generated with SSAHA2 (with default
-    output format) and stores all matches as list ssahapematches
-    (ssaha paired-end matches) dictionary'''
-
-    global ssahapematches
-
-    print "Parsing SSAHA2 result file ... ",
-    sys.stdout.flush()
-
-    for line in ssahadata_fh:
-        if line.startswith('ALIGNMENT'):
-            ml = line.split()
-            if len(ml) != 12 :
-                print "\n", line,
-                raise RuntimeError('Expected 12 elements in the SSAHA2 line with ALIGMENT keyword, but found ' + str(len(ml)))
-            if not ssahapematches.has_key(ml[2]) :
-                ssahapematches[ml[2]] = ([])
-            if ml[8] == 'F':
-                #print line,
-
-                # store everything except the first element (output
-                #  format name (ALIGNMENT)) and the last element
-                #  (length)
-                ssahapematches[ml[2]].append(ml[1:-1])
-            else:
-                #print ml
-                ml[4],ml[5] = ml[5],ml[4]
-                #print ml
-                ssahapematches[ml[2]].append(ml[1:-1])
-
-    print "done."
-
-
-##########################################################################
-#
-# BaCh: This block was shamelessly copied from
-#  http://python.genedrift.org/2007/07/04/reading-fasta-files-conclusion/
-# and then subsequently modified to read fasta correctly
-# It's still not fool proof, but should be good enough
-#
-##########################################################################
-
-class Fasta:
-    def __init__(self, name, sequence):
-        self.name = name
-        self.sequence = sequence
-
-def read_fasta(file):
-    items = []
-    aninstance = Fasta('', '')
-    linenum = 0
-    for line in file:
-        linenum += 1
-        if line.startswith(">"):
-            if len(aninstance.sequence):
-                items.append(aninstance)
-                aninstance = Fasta('', '')
-            # name == all characters until the first whitespace
-            #  (split()[0]) but without the starting ">" ([1:])
-            aninstance.name = line.split()[0][1:]
-            aninstance.sequence = ''
-            if len(aninstance.name) == 0:
-                raise RuntimeError(file.name + ': no name in line ' + str(linenum) + '?')
-
-        else:
-            if len(aninstance.name) == 0:
-                raise RuntimeError(file.name + ': no sequence header at line ' + str(linenum) + '?')
-            aninstance.sequence += line.strip()
-
-    if len(aninstance.name) and len(aninstance.sequence):
-        items.append(aninstance)
-
-    return items
-##########################################################################
-
-def version_string ():
-    return "sff_extract " + __version__
-
-def read_config():
-    '''It reads the configuration options from the command line arguments and
-    it returns a dict with them.'''
-    from optparse import OptionParser, OptionGroup
-    usage = "usage: %prog [options] sff1 sff2 ..."
-    desc = "Extract sequences from 454 SFF files into FASTA, FASTA quality"\
-           " and XML traceinfo format. When a paired-end linker sequence"\
-           " is given (-l), use SSAHA2 to scan the sequences for the linker,"\
-           " then split the sequences, removing the linker."
-    parser = OptionParser(usage = usage, version = version_string(), description = desc)
-    parser.add_option('-a', '--append', action="store_true", dest='append',
-            help='append output to existing files', default=False)
-    parser.add_option('-i', '--xml_info', dest='xml_info',
-            help='extra info to write in the xml file')
-    parser.add_option("-l", "--linker_file", dest="pelinker_fname",
-            help="FASTA file with paired-end linker sequences", metavar="FILE")
-
-    group = OptionGroup(parser, "File name options","")
-    group.add_option('-c', '--clip', action="store_true", dest='clip',
-                     help='clip (completely remove) ends with low qual and/or adaptor sequence', default=False)
-    group.add_option('-u', '--upper_case', action="store_false", dest='mix_case',
-                      help='all bases in upper case, including clipped ends', default=True)
-    group.add_option('', '--min_left_clip', dest='min_leftclip',
-                     metavar="INTEGER", type = "int",
-                     help='if the left clip coming from the SFF is smaller than this value, override it', default=0)
-    group.add_option('-Q', '--fastq', action="store_true", dest='want_fastq',
-                      help='store as FASTQ file instead of FASTA + FASTA quality file', default=False)
-    parser.add_option_group(group)
-
-    group = OptionGroup(parser, "File name options","")
-    group.add_option("-o", "--out_basename", dest="basename",
-            help="base name for all output files")
-    group.add_option("-s", "--seq_file", dest="seq_fname",
-            help="output sequence file name", metavar="FILE")
-    group.add_option("-q", "--qual_file", dest="qual_fname",
-            help="output quality file name", metavar="FILE")
-    group.add_option("-x", "--xml_file", dest="xml_fname",
-            help="output ancillary xml file name", metavar="FILE")
-    parser.add_option_group(group)
-
-    #default fnames
-    #is there an sff file?
-    basename = 'reads'
-    if sys.argv[-1][-4:].lower() == '.sff':
-        basename = sys.argv[-1][:-4]
-    def_seq_fname = basename + '.fasta'
-    def_qual_fname = basename + '.fasta.qual'
-    def_xml_fname = basename + '.xml'
-    def_pelinker_fname = ''
-    parser.set_defaults(seq_fname = def_seq_fname)
-    parser.set_defaults(qual_fname = def_qual_fname)
-    parser.set_defaults(xml_fname = def_xml_fname)
-    parser.set_defaults(pelinker_fname = def_pelinker_fname)
-
-    #we parse the cmd line
-    (options, args) = parser.parse_args()
-
-    #we put the result in a dict
-    global config
-    config = {}
-    for property in dir(options):
-        if property[0] == '_' or property in ('ensure_value', 'read_file',
-                                                                'read_module'):
-            continue
-        config[property] = getattr(options, property)
-
-    if config['basename'] is None:
-        config['basename']=basename
-
-    #if we have not set a file name with -s, -q or -x we set the basename
-    #based file name
-    if config['want_fastq']:
-        config['qual_fname'] = ''
-        if config['seq_fname'] == def_seq_fname:
-            config['seq_fname'] = config['basename'] + '.fastq'
-    else:
-        if config['seq_fname'] == def_seq_fname:
-            config['seq_fname'] = config['basename'] + '.fasta'
-        if config['qual_fname'] == def_qual_fname:
-            config['qual_fname'] = config['basename'] + '.fasta.qual'
-
-    if config['xml_fname'] == def_xml_fname:
-        config['xml_fname'] = config['basename'] + '.xml'
-
-    #we parse the extra info for the xml file
-    config['xml_info'] = parse_extra_info(config['xml_info'])
-    return config, args
-
-
-
-##########################################################################
-
-
-def testsome():
-    sys.exit()
-    return
-
-
-def debug():
-    try:
-        dummy = 1
-        #debug()
-        #testsome()
-
-        config, args = read_config()
-        load_linker_sequences(config['pelinker_fname'])
-
-        #pid = os.getpid()
-        pid = 15603
-
-        #tmpfasta_fname = 'sffe.tmp.'+ str(pid)+'.fasta'
-        #tmpfasta_fh = open(tmpfasta_fname, 'w')
-        tmpfasta_fname = 'FLVI58L05.fa'
-        tmpfasta_fh = open(tmpfasta_fname, 'r')
-
-        tmpssaha_fname = 'sffe.tmp.'+str(pid)+'.ssaha2'
-        tmpssaha_fh = open(tmpssaha_fname, 'w')
-
-        launch_ssaha(config['pelinker_fname'], tmpfasta_fh.name, tmpssaha_fh)
-
-        tmpssaha_fh = open("sffe.tmp.15603.ssaha2", 'r')
-        read_ssaha_data(tmpssaha_fh)
-
-        sys.exit()
-
-        extract_reads_from_sff(config, args)
-
-    except (OSError, IOError, RuntimeError), errval:
-        print errval
-        sys.exit()
-
-    sys.exit()
-
-
-def main():
-
-    argv = sys.argv
-    if len(argv) == 1:
-        sys.argv.append('-h')
-        read_config()
-        sys.exit()
-    try:
-        #debug();
-
-        config, args = read_config()
-
-        if config['pelinker_fname']:
-            #tests_for_ssaha(config['pelinker_fname'])
-            load_linker_sequences(config['pelinker_fname'])
-        if len(args) == 0:
-            raise RuntimeError("No SFF file given?")
-        extract_reads_from_sff(config, args)
-    except (OSError, IOError, RuntimeError), errval:
-        print errval
-        return 1
-
-    if stern_warning:
-        return 1
-
-    return 0
-
-
-
-if __name__ == "__main__":
-        sys.exit(main())
--- a/tools/filters/sff_extractor.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-<tool id="Sff_extractor" name="SFF converter" version="1.0.0">
-    <description></description>
-    <command interpreter="python">
-        #if   str($fastq_output) == "fastq_false"  #sff_extract.py $clip --seq_file=$out_file3 --qual_file=$out_file4 --xml_file=$out_file2 $input
-        #elif str($fastq_output) == "fastq_true"   #sff_extract.py $clip --fastq --seq_file=$out_file1 --xml_file=$out_file2 $input
-        #end if#
-    </command>
-    <inputs>
-        <param format="sff" name="input" type="data" label="Extract from this dataset"/>
-        <param name="clip" type="select" label="Completely remove ends with low qual and/or adaptor sequence">
-            <option value="">No</option>
-            <option value="--clip">Yes</option>
-        </param>
-        <param name="fastq_output" type="boolean" truevalue="fastq_true" falsevalue="fastq_false" checked="False" label="Do you want FASTQ file instead of FASTA + FASTA quality file?" />
-    </inputs>
-    <outputs>
-        <data format="fastqsanger" name="out_file1" >
-            <filter>fastq_output is True</filter>
-        </data>
-        <data format="xml" name="out_file2">
-        </data>
-        <data format="fasta" name="out_file3">
-            <filter>fastq_output is False</filter>
-        </data>
-        <data format="qual" name="out_file4">
-            <filter>fastq_output is False</filter>
-        </data>
-    </outputs>
-    <tests>
-        <test>
-            <param name="input" value="2.sff"/>
-            <param name="clip" value=""/>
-            <param name="fastq_output" value="false"/>
-            <output name="out_file2" file="sff_converter_xml_1.dat"/>
-            <output name="out_file3" file="sff_converter_fasta.dat"/>
-            <output name="out_file4" file="sff_converter_qual.dat"/>
-        </test>
-        <test>
-            <param name="input" value="2.sff"/>
-            <param name="clip" value=""/>
-            <param name="fastq_output" value="true"/>
-            <output name="out_file1" file="sff_converter_fastq.dat"/>
-            <output name="out_file2" file="sff_converter_xml_2.dat"/>
-        </test>
-    </tests>
-    <help>
-
-**What it does**
-
-This tool extracts data from the 454 Sequencer SFF format and creates three files containing the:
-Sequences (FASTA),
-Qualities (QUAL) and
-Clippings (XML)
-
-    </help>
-</tool>
-
-
--- a/tools/filters/sorter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-"""
-Sorts tabular data on one or more columns.
-
-usage: %prog [options]
-   -i, --input=i: Tabular file to be sorted
-   -o, --out_file1=o: Sorted output file
-   -c, --column=c: First column to sort on
-   -s, --style=s: Sort style (numerical or alphabetical)
-   -r, --order=r: Order (ASC or DESC)
-
-usage: %prog input out_file1 column style order [column style ...]
-"""
-
-import os, re, string, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def main():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        inputfile = options.input
-        outputfile = '-o %s' % options.out_file1
-        columns = [options.column]
-        styles = [('','n')[options.style == 'num']]
-        orders = [('','r')[options.order == 'DESC']]
-        col_style_orders = sys.argv[6:]
-        if len(col_style_orders) > 1:
-            columns.extend([col_style_orders[i] for i in range(0,len(col_style_orders),3)])
-            styles.extend([('','n')[col_style_orders[i] == 'num'] for i in range(1,len(col_style_orders),3)])
-            orders.extend([('','r')[col_style_orders[i] == 'DESC'] for i in range(2,len(col_style_orders),3)])
-        cols = [ '-k%s,%s%s%s'%(columns[i], columns[i], styles[i], orders[i]) for i in range(len(columns)) ]
-    except Exception, ex:
-        stop_err('Error parsing input parameters\n' + str(ex))
-
-    # Launch sort.
-    cmd = "sort -f -t '	' %s %s %s" % (' '.join(cols), outputfile, inputfile)
-    try:
-        os.system(cmd)
-    except Exception, ex:
-        stop_err('Error running sort command\n' + str(ex))
-
-if __name__ == "__main__":
-    main()
--- a/tools/filters/sorter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-<tool id="sort1" name="Sort" version="1.0.1">
-  <description>data in ascending or descending order</description>
-  <command interpreter="python">
-    sorter.py
-      --input=$input
-      --out_file1=$out_file1
-      --column=$column
-      --style=$style
-      --order=$order
-      #for $col in $column_set:
-        ${col.other_column}
-        ${col.other_style}
-        ${col.other_order}
-      #end for
-  </command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Sort Query" />
-    <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true" />
-    <param name="style" type="select" label="with flavor">
-      <option value="num">Numerical sort</option>
-      <option value="alpha">Alphabetical sort</option>
-    </param>
-    <param name="order" type="select" label="everything in">
-      <option value="DESC">Descending order</option>
-      <option value="ASC">Ascending order</option>
-    </param>
-    <repeat name="column_set" title="Column selection">
-      <param name="other_column" label="on column" type="data_column" data_ref="input" accept_default="true" />
-      <param name="other_style" type="select" label="with flavor">
-        <option value="num">Numerical sort</option>
-        <option value="alpha">Alphabetical sort</option>
-      </param>
-      <param name="other_order" type="select" label="everything in">
-        <option value="DESC">Descending order</option>
-        <option value="ASC">Ascending order</option>
-      </param>
-    </repeat>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="sort_in1.bed"/>
-      <param name="column" value="1"/>
-      <param name="style" value="num"/>
-      <param name="order" value="ASC"/>
-      <param name="other_column" value="3"/>
-      <param name="other_style" value="num"/>
-      <param name="other_order" value="ASC"/>
-      <output name="out_file1" file="sort_out1.bed"/>
-    </test>
-    <test>
-      <param name="input" value="sort_in1.bed"/>
-      <param name="column" value="3"/>
-      <param name="style" value="alpha"/>
-      <param name="order" value="ASC"/>
-      <param name="other_column" value="1"/>
-      <param name="other_style" value="alpha"/>
-      <param name="other_order" value="ASC"/>
-      <output name="out_file1" file="sort_out2.bed"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-This tool sorts the dataset on any number of columns in either ascending or descending order.
-
-* Numerical sort orders numbers by their magnitude, ignores all characters besides numbers, and evaluates a string of numbers to the value they signify.
-* Alphabetical sort is a phonebook type sort based on the conventional order of letters in an alphabet. Each nth letter is compared with the nth letter of other words in the list, starting at the first letter of each word and advancing to the second, third, fourth, and so on, until the order is established. Therefore, in an alphabetical sort, 2 comes after 100 (1 &lt; 2).
-
------
-
-**Examples**
-
-The list of numbers 4,17,3,5 collates to 3,4,5,17 by numerical sorting, while it collates to 17,3,4,5 by alphabetical sorting.
-
-Sorting the following::
-
-  Q     d    7   II    jhu  45
-  A     kk   4   I     h    111
-  Pd    p    1   ktY   WS   113
-  A     g    10  H     ZZ   856
-  A     edf  4   tw    b    234
-  BBB   rt   10  H     ZZ   100
-  A     rew  10  d     b    1111
-  C     sd   19  YH    aa   10
-  Hah   c    23  ver   bb   467
-  MN    gtr  1   a     X    32
-  N     j    9   a     T    205
-  BBB   rrf  10  b     Z    134
-  odfr  ws   6   Weg   dew  201
-  C     f    3   WW    SW   34
-  A     jhg  4   I     b    345
-  Pd    gf   7   Gthe  de   567
-  rS    hty  90  YY    LOp  89
-  A     g    10  H     h    43
-  A     g    4   I     h    500
-
-on columns 1 (alpha), 3 (num), and 6 (num) in ascending order will yield::
-
-  A     kk   4   I     h    111
-  A     edf  4   tw    b    234
-  A     jhg  4   I     b    345
-  A     g    4   I     h    500
-  A     g    10  H     h    43
-  A     g    10  H     ZZ   856
-  A     rew  10  d     b    1111
-  BBB   rt   10  H     ZZ   100
-  BBB   rrf  10  b     Z    134
-  C     f    3   WW    SW   34
-  C     sd   19  YH    aa   10
-  Hah   c    23  ver   bb   467
-  MN    gtr  1   a     X    32
-  N     j    9   a     T    205
-  odfr  ws   6   Weg   dew  201
-  Pd    p    1   ktY   WS   113
-  Pd    gf   7   Gthe  de   567
-  Q     d    7   II    jhu  45
-  rS    hty  90  YY    LOp  89
-
-  </help>
-</tool>
--- a/tools/filters/tailWrapper.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use warnings;
-
-# a wrapper for tail for use in galaxy
-# lessWrapper.pl [filename] [# lines to show] [output]
-
-die "Check arguments" unless @ARGV == 3;
-die "Line number should be an integer\n" unless $ARGV[1]=~ m/^\d+$/;
-
-open (OUT, ">$ARGV[2]") or die "Cannot create $ARGV[2]:$!\n";
-open (TAIL, "tail -n $ARGV[1] $ARGV[0]|") or die "Cannot run tail:$!\n";
-while (<TAIL>) {
-    print OUT;
-}
-close OUT;
-close TAIL;
-
--- a/tools/filters/tailWrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="Show tail1" name="Select last">
-  <description>lines from a dataset</description>
-  <command interpreter="perl">tailWrapper.pl $input $lineNum $out_file1</command>
-  <inputs>
-    <param name="lineNum" size="5" type="integer" value="10" label="Select last" help="lines"/>
-    <param format="txt" name="input" type="data" label="from"/>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="lineNum" value="10"/>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="eq-showtail.dat"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool outputs specified number of lines from the **end** of a dataset
-
------
-
-**Example**
-
-- Input File::
-
-    chr7    57134   57154   D17003_CTCF_R7  356     -
-    chr7    57247   57267   D17003_CTCF_R4  207     +
-    chr7    57314   57334   D17003_CTCF_R5  269     +
-    chr7    57341   57361   D17003_CTCF_R7  375     +
-    chr7    57457   57477   D17003_CTCF_R3  188     +
-
-- Show last two lines of above file. The result is::
-
-    chr7    57341   57361   D17003_CTCF_R7  375     +
-    chr7    57457   57477   D17003_CTCF_R3  188     +
-
-  </help>
-</tool>
--- a/tools/filters/trimmer.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import optparse
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    usage = """%prog [options]
-
-options (listed below) default to 'None' if omitted
-    """
-    parser = optparse.OptionParser(usage=usage)
-
-    parser.add_option(
-        '-a','--ascii',
-        dest='ascii',
-        action='store_true',
-        default = False,
-        help='Use ascii codes to defined ignored beginnings instead of raw characters')
-
-    parser.add_option(
-        '-q','--fastq',
-        dest='fastq',
-        action='store_true',
-        default = False,
-        help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids')
-
-    parser.add_option(
-        '-i','--ignore',
-        dest='ignore',
-        help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled')
-
-    parser.add_option(
-        '-s','--start',
-        dest='start',
-        default = '0',
-        help='Trim from beginning to here (1-based)')
-
-    parser.add_option(
-        '-e','--end',
-        dest='end',
-        default = '0',
-        help='Trim from here to the ned (1-based)')
-
-    parser.add_option(
-        '-f','--file',
-        dest='input_txt',
-        default = False,
-        help='Name of file to be chopped. STDIN is default')
-
-    parser.add_option(
-        '-c','--column',
-        dest='col',
-        default = '0',
-        help='Column to chop. If 0 = chop the whole line')
-
-
-    options, args = parser.parse_args()
-    invalid_starts = []
-
-    if options.input_txt:
-		infile = open ( options.input_txt, 'r')
-    else:
-    	infile = sys.stdin
-
-    if options.ignore and options.ignore != "None":
-        invalid_starts = options.ignore.split(',')
-
-    if options.ascii and options.ignore and options.ignore != "None":
-        for i, item in enumerate( invalid_starts ):
-            invalid_starts[i] = chr( int( item ) )
-
-    col = int( options.col )
-
-    for i, line in enumerate( infile ):
-        line = line.rstrip( '\r\n' )
-        if line:
-
-            if options.fastq and i % 2 == 0:
-                print line
-                continue
-
-
-            if line[0] not in invalid_starts:
-                if col == 0:
-                    if int( options.end ) > 0:
-                        line = line[ int( options.start )-1 : int( options.end ) ]
-                    else:
-                        line = line[ int( options.start )-1 : ]
-                else:
-                    fields = line.split( '\t' )
-                    if col-1 > len( fields ):
-                        stop_err('Column %d does not exist. Check input parameters\n' % col)
-
-                    if int( options.end ) > 0:
-                        fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ]
-                    else:
-                        fields[col - 1] = fields[col - 1][ int( options.start )-1 : ]
-                    line = '\t'.join(fields)
-            print line
-
-if __name__ == "__main__": main()
-
--- a/tools/filters/trimmer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-<tool id="trimmer" name="Trim" version="0.0.1">
-    <description>leading or trailing characters</description>
-    <command interpreter="python">
-    trimmer.py -a -f $input1 -c $col -s $start -e $end -i $ignore $fastq > $out_file1
-    </command>
-    <inputs>
-        <param format="tabular,txt" name="input1" type="data" label="this dataset"/>
-        <param name="col" type="integer" value="0" label="Trim this column only" help="0 = process entire line" />
-        <param name="start" type="integer" size="10" value="1" label="Trim from the beginning to this position" help="1 = do not trim the beginning"/>
-        <param name="end" type="integer" size="10" value="0" label="Remove everything from this position to the end" help="0 = do not trim the end"/>
-        <param name="fastq" type="select" label="Is input dataset in fastq format?" help="If set to YES, the tool will not trim evenly numbered lines (0, 2, 4, etc...)">
-            <option selected="true" value="">No</option>
-            <option value="-q">Yes</option>
-        </param>
-        <param name="ignore" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are not trimmed">
-            <option value="62">&gt;</option>
-            <option value="64">@</option>
-            <option value="43">+</option>
-            <option value="60">&lt;</option>
-            <option value="42">*</option>
-            <option value="45">-</option>
-            <option value="61">=</option>
-            <option value="124">|</option>
-            <option value="63">?</option>
-            <option value="36">$</option>
-            <option value="46">.</option>
-            <option value="58">:</option>
-            <option value="38">&amp;</option>
-            <option value="37">%</option>
-            <option value="94">^</option>
-            <option value="35">&#35;</option>
-         </param>
-    </inputs>
-    <outputs>
-        <data name="out_file1" format="input" metadata_source="input1"/>
-    </outputs>
-    <tests>
-        <test>
-           <param name="input1" value="trimmer_tab_delimited.dat"/>
-           <param name="col" value="0"/>
-           <param name="start" value="1"/>
-           <param name="end" value="13"/>
-           <param name="ignore" value="62"/>
-           <param name="fastq" value="No"/>
-           <output name="out_file1" file="trimmer_a_f_c0_s1_e13_i62.dat"/>
-        </test>
-        <test>
-           <param name="input1" value="trimmer_tab_delimited.dat"/>
-           <param name="col" value="2"/>
-           <param name="start" value="1"/>
-           <param name="end" value="2"/>
-           <param name="ignore" value="62"/>
-           <param name="fastq" value="No"/>
-           <output name="out_file1" file="trimmer_a_f_c2_s1_e2_i62.dat"/>
-        </test>
-
-    </tests>
-
-    <help>
-
-
-**What it does**
-
-Trims specified number of characters from a dataset or its field (if dataset is tab-delimited).
-
------
-
-**Example 1**
-
-Trimming this dataset::
-
-  1234567890
-  abcdefghijk
-
-by setting **Trim from the beginning to this position** to *2* and **Remove everything from this position to the end** to *6* will produce::
-
-  23456
-  bcdef
-
------
-
-**Example 2**
-
-Trimming column 2 of this dataset::
-
-  abcde 12345 fghij 67890
-  fghij 67890 abcde 12345
-
-by setting **Trim content of this column only** to *2*, **Trim from the beginning to this position** to *2*, and **Remove everything from this position to the end** to *4* will produce::
-
-  abcde  234 fghij 67890
-  fghij  789 abcde 12345
-
------
-
-**Trimming FASTQ datasets**
-
-This tool can be used to trim sequences and quality strings in fastq datasets. This is done by selected *Yes* from the **Is input dataset in fastq format?** dropdown. If set to *Yes*, the tool will skip all even numbered lines (see warning below). For example, trimming last 5 bases of this dataset::
-
-  @081017-and-081020:1:1:1715:1759
-  GGACTCAGATAGTAATCCACGCTCCTTTAAAATATC
-  +
-  II#IIIIIII$5+.(9IIIIIII$%*$G$A31I&amp;&amp;B
-
-cab done by setting **Remove everything from this position to the end** to 31::
-
-  @081017-and-081020:1:1:1715:1759
-  GGACTCAGATAGTAATCCACGCTCCTTTAAA
-  +
-  II#IIIIIII$5+.(9IIIIIII$%*$G$A3
-
-**Note** that headers are skipped.
-
-.. class:: warningmark
-
-**WARNING:** This tool will only work on properly formatted fastq datasets where (1) each read and quality string occupy one line and (2) '@' (read header) and "+" (quality header) lines are evenly numbered like in the above example.
-
-
-    </help>
-</tool>
--- a/tools/filters/ucsc_gene_bed_to_exon_bed.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,152 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a table dump in the UCSC gene table format and print a tab separated
-list of intervals corresponding to requested features of each gene.
-
-usage: ucsc_gene_table_to_intervals.py [options]
-
-options:
-  -h, --help                  show this help message and exit
-  -rREGION, --region=REGION
-                              Limit to region: one of coding, utr3, utr5, codon, intron, transcribed [default]
-  -e, --exons                 Only print intervals overlapping an exon
-  -i, --input=inputfile       input file
-  -o, --output=outputfile     output file
-"""
-
-import optparse, string, sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-
-    # Parse command line
-    parser = optparse.OptionParser( usage="%prog [options] " )
-    parser.add_option( "-r", "--region", dest="region", default="transcribed",
-                       help="Limit to region: one of coding, utr3, utr5, transcribed [default]" )
-    parser.add_option( "-e", "--exons",  action="store_true", dest="exons",
-                       help="Only print intervals overlapping an exon" )
-    parser.add_option( "-s", "--strand",  action="store_true", dest="strand",
-                       help="Print strand after interval" )
-    parser.add_option( "-i", "--input",  dest="input",  default=None,
-                       help="Input file" )
-    parser.add_option( "-o", "--output", dest="output", default=None,
-                       help="Output file" )
-    options, args = parser.parse_args()
-    assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed', 'intron', 'codon' ), "Invalid region argument"
-
-    try:
-        out_file = open (options.output,"w")
-    except:
-        print >> sys.stderr, "Bad output file."
-        sys.exit(0)
-
-    try:
-        in_file = open (options.input)
-    except:
-        print >> sys.stderr, "Bad input file."
-        sys.exit(0)
-
-    print "Region:", options.region+";"
-    """print "Only overlap with Exons:",
-    if options.exons:
-        print "Yes"
-    else:
-        print "No"
-    """
-
-    # Read table and handle each gene
-    for line in in_file:
-        try:
-            if line[0:1] == "#":
-                continue
-            # Parse fields from gene tabls
-            fields = line.split( '\t' )
-            chrom     = fields[0]
-            tx_start  = int( fields[1] )
-            tx_end    = int( fields[2] )
-            name      = fields[3]
-            strand    = fields[5].replace(" ","_")
-            cds_start = int( fields[6] )
-            cds_end   = int( fields[7] )
-
-            # Determine the subset of the transcribed region we are interested in
-            if options.region == 'utr3':
-                if strand == '-': region_start, region_end = tx_start, cds_start
-                else: region_start, region_end = cds_end, tx_end
-            elif options.region == 'utr5':
-                if strand == '-': region_start, region_end = cds_end, tx_end
-                else: region_start, region_end = tx_start, cds_start
-            elif options.region == 'coding' or options.region == 'codon':
-                region_start, region_end = cds_start, cds_end
-            else:
-                region_start, region_end = tx_start, tx_end
-
-            # If only interested in exons, print the portion of each exon overlapping
-            # the region of interest, otherwise print the span of the region
-        # options.exons is always TRUE
-            if options.exons:
-                exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
-                exon_starts = map((lambda x: x + tx_start ), exon_starts)
-                exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
-                exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);
-
-        #for Intron regions:
-            if options.region == 'intron':
-                i=0
-                while i < len(exon_starts)-1:
-                    intron_starts = exon_ends[i]
-                    intron_ends = exon_starts[i+1]
-                    if strand: print_tab_sep(out_file, chrom, intron_starts, intron_ends, name, "0", strand )
-                    else: print_tab_sep(out_file, chrom, intron_starts, intron_ends )
-                    i+=1
-        #for non-intron regions:
-            else:
-                for start, end in zip( exon_starts, exon_ends ):
-                    start = max( start, region_start )
-                    end = min( end, region_end )
-                    if start < end:
-                        if options.region == 'codon':
-                            start += (3 - ((start-region_start)%3))%3
-                            c_start = start
-                            while c_start+3 <= end:
-                                if strand:
-                                    print_tab_sep(out_file, chrom, c_start, c_start+3, name, "0", strand )
-                                else:
-                                    print_tab_sep(out_file, chrom, c_start, c_start+3)
-                                c_start += 3
-                        else:
-                            if strand:
-                                print_tab_sep(out_file, chrom, start, end, name, "0", strand )
-                            else:
-                                print_tab_sep(out_file, chrom, start, end )
-                    """
-                    else:
-                        if options.region == 'codon':
-                            c_start = start
-                            c_end = end
-                            if c_start > c_end:
-                                t = c_start
-                                c_start = c_end
-                                c_end = t
-                            while c_start+3 <= c_end:
-                                if strand:
-                                    print_tab_sep(out_file, chrom, c_start, c_start+3, name, "0", strand )
-                                else:
-                                    print_tab_sep(out_file, chrom, c_start, c_start+3)
-                                c_start += 3
-                        else:
-                            if strand:
-                                print_tab_sep(out_file, chrom, region_start, region_end, name, "0", strand )
-                            else:
-                                print_tab_sep(out_file, chrom, region_start, region_end )
-                    """
-        except:
-            continue
-
-def print_tab_sep(out_file, *args ):
-    """Print items in `l` to stdout separated by tabs"""
-    print >>out_file, string.join( [ str( f ) for f in args ], '\t' )
-
-if __name__ == "__main__": main()
--- a/tools/filters/ucsc_gene_bed_to_exon_bed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-<tool id="gene2exon1" name="Gene BED To Exon/Intron/Codon BED">
-<description>expander</description>
-  <command interpreter="python">ucsc_gene_bed_to_exon_bed.py --input=$input1 --output=$out_file1 --region=$region "--exons"</command>
-  <inputs>
-    <param name="region" type="select">
-      <label>Extract</label>
-      <option value="transcribed">Coding Exons + UTR Exons</option>
-      <option value="coding">Coding Exons only</option>
-      <option value="utr5">5'-UTR Exons</option>
-      <option value="utr3">3'-UTR Exons</option>
-      <option value="intron">Introns</option>
-      <option value="codon">Codons</option>
-    </param>
-    <param name="input1" type="data" format="bed" label="from" help="this history item must contain a 12 field BED (see below)"/>
-  </inputs>
-  <outputs>
-    <data name="out_file1" format="bed"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3.bed" />
-      <param name="region" value="transcribed" />
-      <output name="out_file1" file="cf-gene2exon.dat"/>
-    </test>
-  </tests>
-<help>
-
-.. class:: warningmark
-
-This tool works only on a BED file that contains at least 12 fields (see **Example** and **About formats** below).  The output will be empty if applied to a BED file with 3 or 6 fields.
-
-------
-
-**What it does**
-
-BED format can be used to represent a single gene in just one line, which contains the information about exons, coding sequence location (CDS), and positions of untranslated regions (UTRs).  This tool *unpacks* this information by converting a single line describing a gene into a collection of lines representing individual exons, introns, UTRs, etc.
-
--------
-
-**Example**
-
-Extracting **Coding Exons + UTR Exons** from the following two BED lines::
-
-    chr7 127475281 127491632 NM_000230 0 + 127486022 127488767 0 3 29,172,3225,    0,10713,13126
-    chr7 127486011 127488900 D49487    0 + 127486022 127488767 0 2 155,490,        0,2399
-
-will return::
-
-    chr7 127475281 127475310 NM_000230 0 +
-    chr7 127485994 127486166 NM_000230 0 +
-    chr7 127488407 127491632 NM_000230 0 +
-    chr7 127486011 127486166 D49487    0 +
-    chr7 127488410 127488900 D49487    0 +
-
-------
-
-.. class:: infomark
-
-**About formats**
-
-**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and additional optional ones. In the specific case of this tool the following fields must be present::
-
-    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
-    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
-    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
-    4. name - The name of the BED line.
-    5. score - A score between 0 and 1000.
-    6. strand - Defines the strand - either '+' or '-'.
-    7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
-    8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
-    9. reserved - This should always be set to zero.
-   10. blockCount - The number of blocks (exons) in the BED line.
-   11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
-   12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
-
-
-</help>
-</tool>
--- a/tools/filters/ucsc_gene_bed_to_intron_bed.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a table dump in the UCSC gene table format and print a tab separated
-list of intervals corresponding to requested features of each gene.
-
-usage: ucsc_gene_table_to_intervals.py [options]
-
-options:
-  -h, --help                  show this help message and exit
-  -rREGION, --region=REGION
-                              Limit to region: one of coding, utr3, utr5, transcribed [default]
-  -e, --exons                 Only print intervals overlapping an exon
-  -i, --input=inputfile       input file
-  -o, --output=outputfile     output file
-"""
-
-import optparse, string, sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-
-    # Parse command line
-    parser = optparse.OptionParser( usage="%prog [options] " )
-    #parser.add_option( "-r", "--region", dest="region", default="transcribed",
-    #                   help="Limit to region: one of coding, utr3, utr5, transcribed [default]" )
-    #parser.add_option( "-e", "--exons",  action="store_true", dest="exons",
-    #                   help="Only print intervals overlapping an exon" )
-    parser.add_option( "-s", "--strand",  action="store_true", dest="strand",
-                       help="Print strand after interval" )
-    parser.add_option( "-i", "--input",  dest="input",  default=None,
-                       help="Input file" )
-    parser.add_option( "-o", "--output", dest="output", default=None,
-                       help="Output file" )
-    options, args = parser.parse_args()
-    #assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed' ), "Invalid region argument"
-
-    try:
-        out_file = open (options.output,"w")
-    except:
-        print >> sys.stderr, "Bad output file."
-        sys.exit(0)
-
-    try:
-        in_file = open (options.input)
-    except:
-        print >> sys.stderr, "Bad input file."
-        sys.exit(0)
-
-    #print "Region:", options.region+";"
-    #print "Only overlap with Exons:",
-    #if options.exons:
-    #    print "Yes"
-    #else:
-    #    print "No"
-
-    # Read table and handle each gene
-
-    for line in in_file:
-        try:
-	    #print ("len: %d", len(line))
-            if line[0:1] == "#":
-                continue
-
-            # Parse fields from gene tabls
-            fields = line.split( '\t' )
-            chrom     = fields[0]
-            tx_start  = int( fields[1] )
-            tx_end    = int( fields[2] )
-            name      = fields[3]
-            strand    = fields[5].replace(" ","_")
-            cds_start = int( fields[6] )
-            cds_end   = int( fields[7] )
-
-	    exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
-            exon_starts = map((lambda x: x + tx_start ), exon_starts)
-            exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
-            exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);
-
-	    i=0
-	    while i < len(exon_starts)-1:
-            	intron_starts = exon_ends[i] + 1
-		intron_ends = exon_starts[i+1] - 1
-		if strand: print_tab_sep(out_file, chrom, intron_starts, intron_ends, name, "0", strand )
-                else: print_tab_sep(out_file, chrom, intron_starts, intron_ends )
-		i+=1
-            # If only interested in exons, print the portion of each exon overlapping
-            # the region of interest, otherwise print the span of the region
-
-        except:
-            continue
-
-def print_tab_sep(out_file, *args ):
-    """Print items in `l` to stdout separated by tabs"""
-    print >>out_file, string.join( [ str( f ) for f in args ], '\t' )
-
-if __name__ == "__main__": main()
--- a/tools/filters/ucsc_gene_bed_to_intron_bed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="gene2intron1" name="Gene BED To Intron BED">
-<description>expander</description>
-  <command interpreter="python">ucsc_gene_bed_to_intron_bed.py --input=$input1 --output=$out_file1</command>
-  <inputs>
-    <param name="input1" type="data" format="interval" label="UCSC Gene Table"/>
-
-  </inputs>
-  <outputs>
-    <data name="out_file1" format="bed"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3.bed" />
-      <output name="out_file1" file="cf-gene2intron.dat"/>
-    </test>
-  </tests>
-<help>
-
-**Syntax**
-
-This tool converts a UCSC gene bed format file to a list of bed format lines corresponding to requested features of each gene.
-
-- **BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and twelve additional optional ones::
-
-    The first three BED fields (required) are:
-    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
-    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
-    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
-
-    The twelve additional BED fields (optional) are:
-    4. name - The name of the BED line.
-    5. score - A score between 0 and 1000.
-    6. strand - Defines the strand - either '+' or '-'.
-    7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
-    8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
-    9. reserved - This should always be set to zero.
-   10. blockCount - The number of blocks (exons) in the BED line.
-   11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
-   12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
-   13. expCount - The number of experiments.
-   14. expIds - A comma-separated list of experiment ids. The number of items in this list should correspond to expCount.
-   15. expScores - A comma-separated list of experiment scores. All of the expScores should be relative to expIds. The number of items in this list should correspond to expCount.
-
------
-
-**Example**
-
-- A UCSC gene bed format file::
-
-    chr7 127475281 127491632 NM_000230 0 + 127486022 127488767 0 3 29,172,3225,    0,10713,13126
-    chr7 127486011 127488900 D49487    0 + 127486022 127488767 0 2 155,490,        0,2399
-
-- Converts the above file to a list of bed lines, which has the introns::
-
-    chr7 127475311 127475993 NM_000230 0 +
-    chr7 127486167 127488406 NM_000230 0 +
-    chr7 127486167 127488409 D49487    0 +
-
-</help>
-</tool>
--- a/tools/filters/ucsc_gene_table_to_intervals.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a table dump in the UCSC gene table format and print a tab separated
-list of intervals corresponding to requested features of each gene.
-
-usage: ucsc_gene_table_to_intervals.py [options]
-
-options:
-  -h, --help                  show this help message and exit
-  -rREGION, --region=REGION
-                              Limit to region: one of coding, utr3, utr5, transcribed [default]
-  -e, --exons                 Only print intervals overlapping an exon
-  -i, --input=inputfile       input file
-  -o, --output=outputfile     output file
-"""
-
-import optparse, string, sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-
-    # Parse command line
-    parser = optparse.OptionParser( usage="%prog [options] " )
-    parser.add_option( "-r", "--region", dest="region", default="transcribed",
-                       help="Limit to region: one of coding, utr3, utr5, transcribed [default]" )
-    parser.add_option( "-e", "--exons",  action="store_true", dest="exons",
-                       help="Only print intervals overlapping an exon" )
-    parser.add_option( "-s", "--strand",  action="store_true", dest="strand",
-                       help="Print strand after interval" )
-    parser.add_option( "-i", "--input",  dest="input",  default=None,
-                       help="Input file" )
-    parser.add_option( "-o", "--output", dest="output", default=None,
-                       help="Output file" )
-    options, args = parser.parse_args()
-    assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed' ), "Invalid region argument"
-
-    try:
-        out_file = open (options.output,"w")
-    except:
-        print >> sys.stderr, "Bad output file."
-        sys.exit(0)
-
-    try:
-        in_file = open (options.input)
-    except:
-        print >> sys.stderr, "Bad input file."
-        sys.exit(0)
-
-    print "Region:", options.region+";"
-    print "Only overlap with Exons:",
-    if options.exons:
-        print "Yes"
-    else:
-        print "No"
-
-    # Read table and handle each gene
-    for line in in_file:
-        try:
-            if line[0:1] == "#":
-                continue
-            # Parse fields from gene tabls
-            fields = line.split( '\t' )
-            name = fields[0]
-            chrom = fields[1]
-            strand = fields[2].replace(" ","_")
-            tx_start = int( fields[3] )
-            tx_end = int( fields[4] )
-            cds_start = int( fields[5] )
-            cds_end = int( fields[6] )
-
-            # Determine the subset of the transcribed region we are interested in
-            if options.region == 'utr3':
-                if strand == '-': region_start, region_end = tx_start, cds_start
-                else: region_start, region_end = cds_end, tx_end
-            elif options.region == 'utr5':
-                if strand == '-': region_start, region_end = cds_end, tx_end
-                else: region_start, region_end = tx_start, cds_start
-            elif options.region == 'coding':
-                region_start, region_end = cds_start, cds_end
-            else:
-                region_start, region_end = tx_start, tx_end
-
-            # If only interested in exons, print the portion of each exon overlapping
-            # the region of interest, otherwise print the span of the region
-            if options.exons:
-                exon_starts = map( int, fields[8].rstrip( ',\n' ).split( ',' ) )
-                exon_ends = map( int, fields[9].rstrip( ',\n' ).split( ',' ) )
-                for start, end in zip( exon_starts, exon_ends ):
-                    start = max( start, region_start )
-                    end = min( end, region_end )
-                    if start < end:
-                        if strand: print_tab_sep(out_file, chrom, start, end, name, "0", strand )
-                        else: print_tab_sep(out_file, chrom, start, end )
-            else:
-                if strand: print_tab_sep(out_file, chrom, region_start, region_end, name, "0", strand )
-                else: print_tab_sep(out_file, chrom, region_start, region_end )
-        except:
-            continue
-
-def print_tab_sep(out_file, *args ):
-    """Print items in `l` to stdout separated by tabs"""
-    print >>out_file, string.join( [ str( f ) for f in args ], '\t' )
-
-if __name__ == "__main__": main()
--- a/tools/filters/ucsc_gene_table_to_intervals.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-<tool id="ucsc_gene_table_to_intervals1" name="Gene Table To BED">
-<description>Parse a UCSC Gene Table dump</description>
-  <command interpreter="python">ucsc_gene_table_to_intervals.py --input=$input1 --output=$out_file1 --region=$region $exon</command>
-  <inputs>
-    <param name="input1" type="data" format="inverval" label="UCSC Gene Table"/>
-    <param name="region" type="select">
-      <label>Feature Type</label>
-      <option value="transcribed">Transcribed</option>
-      <option value="coding">Coding</option>
-      <option value="utr3">3' UTR</option>
-      <option value="utr5">5' UTR</option>
-    </param>
-    <param name="exon" type="select">
-      <label>Only print intervals overlapping an exon</label>
-      <option value="">False</option>
-      <option value="--exons">True</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data name="out_file1" format="bed"/>
-  </outputs>
-<help>
-Read a table dump in the UCSC gene table format and create a BED file corresponding to the requested feature of each gene.
-</help>
-</tool>
\ No newline at end of file
--- a/tools/filters/uniq.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-# Filename: uniq.py
-# Author: Ian N. Schenck
-# Version: 19/12/2005
-#
-# This script accepts an input file, an output file, a column
-# delimiter, and a list of columns.  The script then grabs unique
-# lines based on the columns, and returns those records with a count
-# of occurences of each unique column, inserted before the columns.
-#
-# This executes the command pipeline:
-#       cut -f $fields | sort  | uniq -C
-#
-# -i            Input file
-# -o            Output file
-# -d            Delimiter
-# -c            Column list (Comma Seperated)
-
-import sys
-import re
-import string
-import commands
-
-# This function is exceedingly useful, perhaps package for reuse?
-def getopts(argv):
-    opts = {}
-    while argv:
-        if argv[0][0] == '-':
-            opts[argv[0]] = argv[1]
-            argv = argv[2:]
-        else:
-            argv = argv[1:]
-    return opts
-
-def main():
-    args = sys.argv[1:]
-
-    try:
-        opts = getopts(args)
-    except IndexError:
-        print "Usage:"
-        print " -i        Input file"
-        print " -o        Output file"
-        print " -c        Column list (comma seperated)"
-        print " -d        Delimiter:"
-        print "                     T   Tab"
-        print "                     C   Comma"
-        print "                     D   Dash"
-        print "                     U   Underscore"
-        print "                     P   Pipe"
-        print "                     Dt  Dot"
-        print "                     Sp  Space"
-        return 0
-
-    outputfile = opts.get("-o")
-    if outputfile == None:
-        print "No output file specified."
-        return -1
-
-    inputfile = opts.get("-i")
-    if inputfile == None:
-        print "No input file specified."
-        return -2
-
-    delim = opts.get("-d")
-    if delim == None:
-        print "Field delimiter not specified."
-        return -3
-
-    columns = opts.get("-c")
-    if columns == None or columns == 'None':
-        print "Columns not specified."
-        return -4
-
-    # All inputs have been specified at this point, now validate.
-    fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$")
-    columnRegEx = re.compile("([0-9]{1,},?)+")
-
-    if not columnRegEx.match(columns):
-        print "Illegal column specification."
-        return -4
-    if not fileRegEx.match(outputfile):
-        print "Illegal output filename."
-        return -5
-    if not fileRegEx.match(inputfile):
-        print "Illegal input filename."
-        return -6
-
-    column_list = re.split(",",columns)
-    columns_for_display = ""
-    for col in column_list:
-        columns_for_display += "c"+col+", "
-
-    commandline = "cut "
-    # Set delimiter
-    if delim=='C':
-        commandline += "-d \",\" "
-    if delim=='D':
-        commandline += "-d \"-\" "
-    if delim=='U':
-        commandline += "-d \"_\" "
-    if delim=='P':
-        commandline += "-d \"|\" "
-    if delim=='Dt':
-        commandline += "-d \".\" "
-    if delim=='Sp':
-        commandline += "-d \" \" "
-
-    # set columns
-    commandline += "-f " + columns
-    commandline += " " + inputfile + " | sed s/\ //g | sort | uniq -c | sed s/^\ *// | tr \" \" \"\t\" > " + outputfile
-    errorcode, stdout = commands.getstatusoutput(commandline)
-
-    print "Count of unique values in " + columns_for_display
-    return errorcode
-
-if __name__ == "__main__":
-    main()
--- a/tools/filters/uniq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="Count1" name="Count">
-  <description>occurrences of each record</description>
-  <command interpreter="python">uniq.py -i $input -o $out_file1 -c "$column" -d $delim</command>
-  <inputs>
-    <param name="input" type="data" format="tabular" label="from dataset" help="Dataset missing? See TIP below"/>
-    <param name="column" type="data_column" data_ref="input" multiple="True" numerical="False" label="Count occurrences of values in column(s)" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
-    <param name="delim" type="select" label="Delimited by">
-      <option value="T">Tab</option>
-      <option value="Sp">Whitespace</option>
-      <option value="Dt">Dot</option>
-      <option value="C">Comma</option>
-      <option value="D">Dash</option>
-      <option value="U">Underscore</option>
-      <option value="P">Pipe</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="uniq_out.dat"/>
-      <param name="column" value="1"/>
-      <param name="delim" value="T"/>
-    </test>
-  </tests>
-  <help>
-
- .. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-This tool counts occurrences of unique values in selected column(s).
-
-- If multiple columns are selected, counting is performed on each unique group of all values in the selected columns.
-- The first column of the resulting dataset will be the count of unique values in the selected column(s) and will be followed by each value.
-
------
-
-**Example**
-
-- Input file::
-
-       chr1   10  100  gene1
-       chr1  105  200  gene2
-       chr1  205  300  gene3
-       chr2   10  100  gene4
-       chr2 1000 1900  gene5
-       chr3   15 1656  gene6
-       chr4   10 1765  gene7
-       chr4   10 1765  gene8
-
-- Counting unique values in column c1 will result in::
-
-       3 chr1
-       2 chr2
-       1 chr3
-       2 chr4
-
-- Counting unique values in the grouping of columns c2 and c3 will result in::
-
-       2    10    100
-       2    10    1765
-       1    1000  1900
-       1    105   200
-       1    15    1656
-       1    205   300
-
-</help>
-</tool>
--- a/tools/filters/wc_gnu.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<tool id="wc_gnu" name="Line/Word/Character count">
-    <description>of a dataset</description>
-    <command>
-        #set $word_to_arg = { 'characters':'m', 'words':'w', 'lines':'l' }
-        #set $arg_order = [ 'lines', 'words', 'characters' ]
-        #if not isinstance( $options.value, list ):
-            #set $args = [ $options.value ]
-        #else:
-            #set $args = $options.value
-        #end if
-        #if $include_header.value:
-            echo "#${ "\t".join( [ i for i in $arg_order if i in $args ] ) }" &gt; $out_file1
-            &amp;&amp;
-        #end if
-        wc
-        #for $option in $args:
-           -${ word_to_arg[ str(option) ] }
-        #end for
-        $input1 | awk '{ print ${ '"\\t"'.join( [ "$%i" % ( i+1 ) for i in range( len( $args ) ) ] ) } }'
-        &gt;&gt; $out_file1
-    </command>
-    <inputs>
-        <param format="txt" name="input1" type="data" label="Text file"/>
-        <param name="options" type="select" multiple="True" display="checkboxes" label="Desired values">
-            <!-- <option value="bytes" selected="True">Byte count</option> -->
-            <option value="lines" selected="True">Line count</option>
-            <option value="words" selected="True">Word count</option>
-            <option value="characters" selected="True">Character count</option>
-            <validator type="no_options" message="You must pick at least one attribute to count." />
-        </param>
-        <param name="include_header" type="boolean" label="Include Output header" checked="True"/>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="out_file1"/>
-    </outputs>
-    <tests>
-        <test>
-          <param name="input1" value="1.bed"/>
-          <param name="options" value="lines,words,characters"/>
-          <param name="include_header" value="True"/>
-          <output name="out_file1" file="wc_gnu_out_1.tabular"/>
-        </test>
-        <test>
-          <param name="input1" value="1.bed"/>
-          <param name="options" value="lines,words,characters"/>
-          <param name="include_header" value="False"/>
-          <output name="out_file1" file="wc_gnu_out_2.tabular"/>
-        </test>
-    </tests>
-    <help>
-
-**What it does**
-
-This tool outputs counts of specified attributes (lines, words, characters) of a dataset.
-
------
-
-**Example Output**
-
-::
-
-  #lines  words  characters
-  7499	  41376	 624971
-
-    </help>
-</tool>
--- a/tools/filters/wig_to_bigwig.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="wig_to_bigWig" name="Wig-to-bigWig" version="1.1.0">
-  <description>converter</description>
-  <command>grep -v "^track" $input1 | wigToBigWig stdin $chromInfo $out_file1
-    #if $settings.settingsType == "full":
-      -blockSize=${settings.blockSize} -itemsPerSlot=${settings.itemsPerSlot} ${settings.clip} ${settings.unc}
-    #else:
-      -clip
-    #end if
-    2&gt;&amp;1 || echo "Error running wigToBigWig." >&amp;2</command>
-  <requirements>
-    <requirement type="package">ucsc_tools</requirement>
-  </requirements>
-  <inputs>
-    <param format="wig" name="input1" type="data" label="Convert">
-      <validator type="unspecified_build" />
-    </param>
-    <conditional name="settings">
-      <param name="settingsType" type="select" label="Converter settings to use" help="Default settings should usually be used.">
-        <option value="preset">Default</option>
-        <option value="full">Full parameter list</option>
-      </param>
-      <when value="preset" />
-      <when value="full">
-        <param name="blockSize" size="4" type="integer" value="256" label="Items to bundle in r-tree" help="Default is 256 (blockSize)" />
-        <param name="itemsPerSlot" size="4" type="integer" value="1024" label="Data points bundled at lowest level" help="Default is 1024 (itemsPerSlot)" />
-        <param name="clip" type="boolean" truevalue="-clip" falsevalue="" checked="True" label="Clip chromosome positions" help="Issue warning messages rather than dying if wig file contains items off end of chromosome. (clip)"/>
-        <param name="unc" type="boolean" truevalue="-unc" falsevalue="" checked="False" label="Do not use compression" help="(unc)"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="bigwig" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="2.wig" dbkey="hg17" />
-      <param name="settingsType" value="full" />
-      <param name="blockSize" value="256" />
-      <param name="itemsPerSlot" value="1024" />
-      <param name="clip" value="True" />
-      <param name="unc" value="False" />
-      <output name="out_file1" file="2.bigwig"/>
-    </test>
-    <test>
-      <param name="input1" value="2.wig" dbkey="hg17" />
-      <param name="settingsType" value="preset" />
-      <output name="out_file1" file="2.bigwig"/>
-    </test>
-  </tests>
-  <help>
-**Syntax**
-
-This tool converts wiggle data into bigWig type.
-
-- **Wiggle format**: The .wig format is line-oriented. Wiggle data is preceded by a UCSC track definition line.  Following the track definition line is the track data, which can be entered in three different formats described below.
-
-  - **BED format** with no declaration line and four columns of data::
-
-      chromA  chromStartA  chromEndA  dataValueA
-      chromB  chromStartB  chromEndB  dataValueB
-
-  - **variableStep** two column data; started by a declaration line and followed with chromosome positions and data values::
-
-      variableStep  chrom=chrN  [span=windowSize]
-      chromStartA  dataValueA
-      chromStartB  dataValueB
-
-  - **fixedStep** single column data; started by a declaration line and followed with data values::
-
-      fixedStep  chrom=chrN  start=position  step=stepInterval  [span=windowSize]
-      dataValue1
-      dataValue2
-
-</help>
-</tool>
--- a/tools/filters/wiggle_to_simple.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a wiggle track and print out a series of lines containing
-"chrom position score". Ignores track lines, handles bed, variableStep
-and fixedStep wiggle lines.
-"""
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.wiggle
-from galaxy.tools.exception_handling import *
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    if len( sys.argv ) > 1:
-        in_file = open( sys.argv[1] )
-    else:
-        in_file = open( sys.stdin )
-
-    if len( sys.argv ) > 2:
-        out_file = open( sys.argv[2], "w" )
-    else:
-        out_file = sys.stdout
-
-    try:
-        for fields in bx.wiggle.IntervalReader( UCSCOutWrapper( in_file ) ):
-            out_file.write( "%s\n" % "\t".join( map( str, fields ) ) )
-    except UCSCLimitException:
-        # Wiggle data was truncated, at the very least need to warn the user.
-        print 'Encountered message from UCSC: "Reached output limit of 100000 data values", so be aware your data was truncated.'
-    except ValueError, e:
-        in_file.close()
-        out_file.close()
-        stop_err( str( e ) )
-
-    in_file.close()
-    out_file.close()
-
-if __name__ == "__main__": main()
--- a/tools/filters/wiggle_to_simple.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-<tool id="wiggle2simple1" name="Wiggle-to-Interval">
-  <description>converter</description>
-  <command interpreter="python">wiggle_to_simple.py $input $out_file1 </command>
-  <inputs>
-    <param format="wig" name="input" type="data" label="Convert"/>
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="2.wig" />
-      <output name="out_file1" file="2.interval"/>
-    </test>
-    <test>
-      <param name="input" value="3.wig" />
-      <output name="out_file1" file="3_wig.bed"/>
-    </test>
-  </tests>
-  <help>
-**Syntax**
-
-This tool converts wiggle data into interval type.
-
-- **Wiggle format**: The .wig format is line-oriented. Wiggle data is preceded by a UCSC track definition line.  Following the track definition line is the track data, which can be entered in three different formats described below.
-
-  - **BED format** with no declaration line and four columns of data::
-
-      chromA  chromStartA  chromEndA  dataValueA
-      chromB  chromStartB  chromEndB  dataValueB
-
-  - **variableStep** two column data; started by a declaration line and followed with chromosome positions and data values::
-
-      variableStep  chrom=chrN  [span=windowSize]
-      chromStartA  dataValueA
-      chromStartB  dataValueB
-
-  - **fixedStep** single column data; started by a declaration line and followed with data values::
-
-      fixedStep  chrom=chrN  start=position  step=stepInterval  [span=windowSize]
-      dataValue1
-      dataValue2
-
------
-
-**Example**
-
-- input wiggle format file::
-
-    #track type=wiggle_0 name="Bed Format" description="BED format"
-    chr19 59302000 59302300 -1.0
-    chr19 59302300 59302600 -0.75
-    chr19 59302600 59302900 -0.50
-    chr19 59302900 59303200 -0.25
-    chr19 59303200 59303500 0.0
-    #track type=wiggle_0 name="variableStep" description="variableStep format"
-    variableStep chrom=chr19 span=150
-    59304701 10.0
-    59304901 12.5
-    59305401 15.0
-    59305601 17.5
-    #track type=wiggle_0 name="fixedStep" description="fixed step" visibility=full
-    fixedStep chrom=chr19 start=59307401 step=300 span=200
-    1000
-    900
-    800
-    700
-    600
-
-- convert the above file to interval file::
-
-    chr19	59302000	59302300	+	-1.0
-    chr19	59302300	59302600	+	-0.75
-    chr19	59302600	59302900	+	-0.5
-    chr19	59302900	59303200	+	-0.25
-    chr19	59303200	59303500	+	0.0
-    chr19	59304701	59304851	+	10.0
-    chr19	59304901	59305051	+	12.5
-    chr19	59305401	59305551	+	15.0
-    chr19	59305601	59305751	+	17.5
-    chr19	59307701	59307901	+	1000.0
-    chr19	59308001	59308201	+	900.0
-    chr19	59308301	59308501	+	800.0
-    chr19	59308601	59308801	+	700.0
-    chr19	59308901	59309101	+	600.0
-
-</help>
-</tool>
Binary file tools/galaxy-loc.tar.gz has changed
--- a/tools/gatk/analyze_covariates.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-<tool id="gatk_analyze_covariates" name="Analyze Covariates" version="0.0.1">
-  <description>- perform local realignment</description>
-<command interpreter="python">gatk_wrapper.py
-   --stdout "${output_log}"
-   --html_report_from_directory "${output_html}" "${output_html.files_path}"
-   -p 'java
-    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/AnalyzeCovariates.jar"
-    -recalFile "${input_recal}"
-    -outputDir "${output_html.files_path}"
-    ##-log "${output_log}"
-    ##-Rscript,--path_to_Rscript path_to_Rscript; on path is good enough
-    -resources "${GALAXY_DATA_INDEX_DIR}/gatk/R"
-    #if $analysis_param_type.analysis_param_type_selector == "advanced":
-        --ignoreQ "${analysis_param_type.ignore_q}"
-        --numRG "${analysis_param_type.num_read_groups}"
-        --max_quality_score "${analysis_param_type.max_quality_score}"
-        --max_histogram_value "${analysis_param_type.max_histogram_value}"
-         ${analysis_param_type.do_indel_quality}
-    #end if
-   '
-  </command>
-  <inputs>
-    <param name="input_recal" type="data" format="csv" label="Covariates table recalibration file" />
-    <conditional name="analysis_param_type">
-      <param name="analysis_param_type_selector" type="select" label="Basic or Advanced options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <param name="ignore_q" type="integer" value="5" label="Ignore bases with reported quality less than this number."/>
-        <param name="num_read_groups" type="integer" value="-1" label="Only process N read groups."/>
-        <param name="max_quality_score" type="integer" value="50" label="Max quality score"/>
-        <param name="max_histogram_value" type="integer" value="0" label="Max quality score"/>
-        <param name="do_indel_quality" type="boolean" truevalue="--do_indel_quality" falsevalue="" label="Max quality score"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)" />
-    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
-  </outputs>
-  <tests>
-      <test>
-          <param name="input_recal" value="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" ftype="csv" />
-          <param name="analysis_param_type_selector" value="basic" />
-          <output name="output_html" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.html" />
-          <output name="output_log" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.log.contains" compare="contains" />
-      </test>
-  </tests>
-  <help>
-**What it does**
-
-     Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
-
-
-------
-
-Please cite the website "http://addlink.here" as well as:
-
-Add citation here 2011.
-
-------
-
-**Input formats**
-
-GenomeAnalysisTK: AnalyzeCovariates accepts an recal CSV file.
-
-------
-
-**Outputs**
-
-The output is in and HTML file with links to PDF graphs and a data files, see http://addlink.here for more details.
-
--------
-
-**Settings**::
-
- recal_file                   The input recal csv file to analyze
- output_dir                   The directory in which to output all the plots and intermediate data files
- path_to_Rscript           The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript
- path_to_resources     Path to resources folder holding the Sting R scripts.
- ignoreQ                           Ignore bases with reported quality less than this number.
- numRG                                 Only process N read groups. Default value: -1 (process all read groups)
- max_quality_score          The integer value at which to cap the quality scores, default is 50
- max_histogram_value   If supplied, this value will be the max value of the histogram plots
- do_indel_quality                             If supplied, this value will be the max value of the histogram plots
-
-  </help>
-</tool>
--- a/tools/gatk/count_covariates.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,431 +0,0 @@
-<tool id="gatk_count_covariates" name="Count Covariates" version="0.0.1">
-  <description>on BAM files</description>
-  <command interpreter="python">gatk_wrapper.py
-   --stdout "${output_log}"
-   -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
-   -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
-   -p 'java
-    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
-    -T "CountCovariates"
-    --num_threads 4 ##hard coded, for now
-    -et "NO_ET" ##ET no phone home
-    ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
-    #if $reference_source.reference_source_selector != "history":
-        -R "${reference_source.ref_file.fields.path}"
-    #end if
-    --recal_file "${output_recal}"
-    ${standard_covs}
-    #if $covariates.value:
-        #for $cov in $covariates.value:
-            -cov "${cov}"
-        #end for
-    #end if
-   '
-
-    #set $snp_dataset_provided = False
-    #if str( $input_dbsnp_rod ) != "None":
-        -d "-D" "${input_dbsnp_rod}" "${input_dbsnp_rod.ext}" "dbsnp_rod"
-        #set $snp_dataset_provided = True
-    #end if
-    #set $rod_binding_names = dict()
-    #for $rod_binding in $rod_bind:
-        #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
-            #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
-        #else
-            #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
-        #end if
-        #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'snps':
-            #set $snp_dataset_provided = True
-        #end if
-        #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
-        -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
-        #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ):
-            -p '--rodToIntervalTrackName "${rod_bind_name}"'
-        #end if
-    #end for
-
-    ##start standard gatk options
-    #if $gatk_param_type.gatk_param_type_selector == "advanced":
-        #for $sample_metadata in $gatk_param_type.sample_metadata:
-            -p '--sample_metadata "${sample_metadata.sample_metadata_file}"'
-        #end for
-        #for $read_filter in $gatk_param_type.read_filter:
-            -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
-            ###raise Exception( str( dir( $read_filter ) ) )
-            #for $name, $param in $read_filter.read_filter_type.iteritems():
-                #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
-                    --${name} "${param}"
-                #end if
-            #end for
-            '
-        #end for
-        #if str( $gatk_param_type.input_intervals ) != "None":
-            -d "-L" "${gatk_param_type.input_intervals}" "${gatk_param_type.input_intervals.ext}" "input_intervals"
-        #end if
-        #if str( $gatk_param_type.input_exclude_intervals ) != "None":
-            -d "-XL" "${gatk_param_type.input_exclude_intervals}" "${gatk_param_type.input_exclude_intervals.ext}" "input_intervals"
-        #end if
-
-        -p '--BTI_merge_rule "${gatk_param_type.BTI_merge_rule}"'
-        -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
-        #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
-            -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
-        #end if
-        -p '
-        --baq "${gatk_param_type.baq}"
-        --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
-        ${gatk_param_type.use_original_qualities}
-        --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
-        --validation_strictness "${gatk_param_type.validation_strictness}"
-        --interval_merging "${gatk_param_type.interval_merging}"
-        '
-        #if str( $gatk_param_type.read_group_black_list ) != "None":
-            -d "-read_group_black_list" "${gatk_param_type.read_group_black_list}" "txt" "input_read_group_black_list"
-        #end if
-    #end if
-    #if str( $reference_source.reference_source_selector ) == "history":
-        -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
-    #end if
-    ##end standard gatk options
-
-    ##start analysis specific options
-    #if $analysis_param_type.analysis_param_type_selector == "advanced":
-        -p '
-        #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set":
-            --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}"
-        #end if
-        #if str( $analysis_param_type.default_platform ) != "default":
-            --default_platform "${analysis_param_type.default_platform}"
-        #end if
-        #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set":
-            --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}"
-        #end if
-        #if str( $analysis_param_type.force_platform ) != "default":
-            --force_platform "${analysis_param_type.force_platform}"
-        #end if
-        ${analysis_param_type.exception_if_no_tile}
-        #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set":
-            #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default":
-                --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}"
-            #end if
-            #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default":
-                --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}"
-            #end if
-        #end if
-        --window_size_nqs "${analysis_param_type.window_size_nqs}"
-        --homopolymer_nback "${analysis_param_type.homopolymer_nback}"
-        '
-    #end if
-    #if not $snp_dataset_provided:
-        -p '--run_without_dbsnp_potentially_ruining_quality'
-    #end if
-  </command>
-  <inputs>
-    <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <param name="input_bam" type="data" format="bam" label="BAM file">
-          <validator type="unspecified_build" />
-          <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-        </param>
-        <param name="ref_file" type="select" label="Using reference genome">
-          <options from_data_table="picard_indexes">
-            <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
-          </options>
-        </param>
-      </when>
-      <when value="history"> <!-- FIX ME!!!! -->
-        <param name="input_bam" type="data" format="bam" label="BAM file" />
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-      </when>
-    </conditional>
-    <param name="standard_covs" type="boolean" truevalue="--standard_covs" falsevalue="" label="Use the standard set of covariates in addition to the ones selected" />
-    <param name="covariates" type="select" multiple="True" display="checkboxes" label="Covariates to be used in the recalibration" >
-      <!-- might we want to load the available covariates from an external configuration file, since additional ones can be added to local installs? -->
-      <option value="ReadGroupCovariate" />
-      <option value="QualityScoreCovariate" />
-      <option value="CycleCovariate" />
-      <option value="DinucCovariate" />
-      <!-- covariates below were pull from source code, since the list option doesn't seem to work (when tried) -->
-      <option value="HomopolymerCovariate" />
-      <option value="MappingQualityCovariate" />
-      <option value="MinimumNQSCovariate" />
-      <option value="PositionCovariate" />
-      <option value="PrimerRoundCovariate" />
-      <option value="TileCovariate" />
-    </param>
-    <param name="input_dbsnp_rod" type="data" format="gatk_dbsnp" optional="True" label="dbSNP reference ordered data (ROD)" />
-    <repeat name="rod_bind" title="Binding for reference-ordered data">
-        <conditional name="rod_bind_type">
-	      <param name="rod_bind_type_selector" type="select" label="Binding Type">
-	        <option value="snps" selected="True">SNPs</option>
-	        <option value="indels">INDELs</option>
-	        <option value="mask">Mask</option>
-	        <option value="custom">Custom</option>
-	      </param>
-          <when value="snps">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="indels">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="custom">
-              <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-        </conditional>
-    </repeat>
-
-    <conditional name="gatk_param_type">
-      <param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <repeat name="sample_metadata" title="Sample Metadata">
-            <param name="sample_metadata_file" type="data" format="txt" label="Sample file(s) in JSON format" />
-        </repeat>
-        <repeat name="read_filter" title="Read Filter">
-            <conditional name="read_filter_type">
-		      <param name="read_filter_type_selector" type="select" label="Read Filter Type">
-		        <option value="MaxReadLength" selected="True">MaxReadLength</option>
-		        <option value="ZeroMappingQualityRead">ZeroMappingQualityRead</option>
-		      </param>
-	          <when value="ZeroMappingQualityRead">
-	              <!-- no extra options -->
-	          </when>
-	          <when value="MaxReadLength">
-	              <param name="maxReadLength" type="integer" value="76" label="Max Read Length"/>
-	          </when>
-            </conditional>
-        </repeat>
-        <param name="input_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals over which to operate" />
-        <param name="input_exclude_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals to exclude from processing" />
-        <param name="BTI_merge_rule" type="select" label="BTI merge rule">
-          <option value="UNION" selected="True">UNION</option>
-          <option value="INTERSECTION">INTERSECTION</option>
-        </param>
-        <conditional name="downsampling_type">
-          <param name="downsampling_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-            <option value="NONE" selected="True">NONE</option>
-            <option value="ALL_READS">ALL_READS</option>
-            <option value="BY_SAMPLE">BY_SAMPLE</option>
-          </param>
-          <when value="NONE">
-	          <!-- no more options here -->
-	      </when>
-          <when value="ALL_READS">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-          <when value="BY_SAMPLE">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-        </conditional>
-        <param name="baq" type="select" label="Type of BAQ calculation to apply in the engine">
-          <option value="OFF" selected="True">OFF</option>
-          <option value="CALCULATE_AS_NECESSARY">CALCULATE_AS_NECESSARY</option>
-          <option value="RECALCULATE">RECALCULATE</option>
-        </param>
-        <param name="baq_gap_open_penalty" type="integer" label="BAQ gap open penalty (Phred Scaled)" value="40" help="Default value is 40. 30 is perhaps better for whole genome call sets."/>
-        <param name="use_original_qualities" type="boolean" truevalue="--useOriginalQualities" falsevalue="" label="Use the original base quality scores from the OQ tag" />
-        <param name="default_base_qualities" type="integer" label="Value to be used for all base quality scores, when some are missing" value="-1"/>
-        <param name="validation_strictness" type="select" label="How strict should we be with validation">
-          <option value="STRICT" selected="True">STRICT</option>
-          <option value="LENIENT">LENIENT</option>
-          <option value="SILENT">SILENT</option>
-        </param>
-        <param name="interval_merging" type="select" label="Interval merging rule">
-          <option value="ALL" selected="True">ALL</option>
-          <option value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
-        </param>
-        <param name="read_group_black_list" type="data" format="txt" optional="True" label="Read group black list" />
-      </when>
-    </conditional>
-
-    <conditional name="analysis_param_type">
-      <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <conditional name="default_read_group_type">
-          <param name="default_read_group_type_selector" type="select" label="Set default Read Group">
-            <option value="default" selected="True">Don't Set</option>
-            <option value="set">Set</option>
-          </param>
-          <when value="default">
-            <!-- do nothing here -->
-          </when>
-          <when value="set">
-            <param name="default_read_group" type="text" value="Unknown" label="If a read has no read group then default to the provided String"/>
-          </when>
-        </conditional>
-        <param name="default_platform" type="select" label="Set default Platform">
-          <option value="default" selected="True">Don't Set</option>
-          <option value="illumina">illumina</option>
-          <option value="454">454</option>
-          <option value="solid">solid</option>
-        </param>
-        <conditional name="force_read_group_type">
-          <param name="force_read_group_type_selector" type="select" label="Force Read Group">
-            <option value="default" selected="True">Don't Force</option>
-            <option value="set">Force</option>
-          </param>
-          <when value="default">
-            <!-- do nothing here -->
-          </when>
-          <when value="set">
-            <param name="force_read_group" type="text" value="Unknown" label="If provided, the read group ID of EVERY read will be forced to be the provided String."/>
-          </when>
-        </conditional>
-        <param name="force_platform" type="select" label="Force Platform">
-          <option value="default" selected="True">Don't Force</option>
-          <option value="illumina">illumina</option>
-          <option value="454">454</option>
-          <option value="solid">solid</option>
-        </param>
-        <param name="exception_if_no_tile" type="boolean" checked="False" truevalue="--exception_if_no_tile" falsevalue="" label="Throw an exception when no tile can be found"/>
-        <conditional name="solid_options_type">
-          <param name="solid_options_type_selector" type="select" label="Set SOLiD specific options">
-            <option value="default" selected="True">Don't Set</option>
-            <option value="set">Set</option>
-          </param>
-          <when value="default">
-            <!-- do nothing here -->
-          </when>
-          <when value="set">
-            <param name="solid_recal_mode" type="select" label="How should we recalibrate solid bases in which the reference was inserted">
-              <option value="default" selected="True">Don't set</option>
-              <option value="DO_NOTHING">DO_NOTHING</option>
-              <option value="SET_Q_ZERO">SET_Q_ZERO</option>
-              <option value="SET_Q_ZERO_BASE_N">SET_Q_ZERO_BASE_N</option>
-              <option value="REMOVE_REF_BIAS">REMOVE_REF_BIAS</option>
-            </param>
-            <param name="solid_nocall_strategy" type="select" label="Behavior of the recalibrator when it encounters no calls">
-              <option value="default" selected="True">Don't set</option>
-              <option value="THROW_EXCEPTION">THROW_EXCEPTION</option>
-              <option value="LEAVE_READ_UNRECALIBRATED">LEAVE_READ_UNRECALIBRATED</option>
-              <option value="PURGE_READ">PURGE_READ</option>
-            </param>
-          </when>
-        </conditional>
-        <param name="window_size_nqs" type="integer" value="5" label="Window size used by MinimumNQSCovariate"/>
-        <param name="homopolymer_nback" type="integer" value="7" label="number of previous bases to look at in HomopolymerCovariate" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="csv" name="output_recal" label="${tool.name} on ${on_string} (Covariate File)" />
-    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
-  </outputs>
-  <tests>
-      <test>
-          <param name="reference_source_selector" value="history" />
-          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
-          <param name="input_bam" value="gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.bam" ftype="bam" />
-          <param name="input_dbsnp_rod"  />
-          <param name="rod_bind_type_selector" value="snps" />
-          <param name="rodToIntervalTrackName" />
-          <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
-          <param name="standard_covs" value="True" />
-          <param name="covariates" value="ReadGroupCovariate,HomopolymerCovariate,MinimumNQSCovariate,PositionCovariate" />
-          <param name="gatk_param_type_selector" value="basic" />
-          <param name="analysis_param_type_selector" value="basic" />
-          <output name="output_recal" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" />
-          <output name="output_log" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.log.contains" compare="contains" />
-      </test>
-  </tests>
-  <help>
-.. class:: warningmark
-
-"This calculation is critically dependent on being able to skip over known variant sites. Please provide a dbSNP ROD or a VCF file containing known sites of genetic variation."
-However, if you do not provide this file, the '--run_without_dbsnp_potentially_ruining_quality' flag will be automatically used, and the command will be allowed to run.
-
-**What it does**
-
-     This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal
-     operating only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors
-     and indicative of poor base quality. This walker generates tables based on various user-specified covariates (such
-     as read group, reported quality score, cycle, and dinucleotide) Since there is a large amount of data one can then
-     calculate an empirical probability of error given the particular covariates seen at this site, where p(error) = num
-     mismatches / num observations The output file is a CSV list of (the several covariate values, num observations, num
-     mismatches, empirical quality score) The first non-comment line of the output file gives the name of the covariates
-     that were used for this calculation.  Note: ReadGroupCovariate and QualityScoreCovariate are required covariates
-     and will be added for the user regardless of whether or not they were specified Note: This walker is designed to be
-     used in conjunction with TableRecalibrationWalker.
-
-
-------
-
-Please cite the website "http://addlink.here" as well as:
-
-Add citation here 2011.
-
-------
-
-**Input formats**
-
-GenomeAnalysisTK: CountCovariates accepts an aligned BAM input file.
-
-------
-
-**Outputs**
-
-The output is in CSV format, see http://addlink.here for more details.
-
--------
-
-**Settings**::
-
-
- default_read_group                           If a read has no read group then default to the provided String.
- default_platform                                If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.
- force_read_group                               If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.
- force_platform                                    If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.
- window_size_nqs                                 The window size used by MinimumNQSCovariate for its calculation
- homopolymer_nback                           The number of previous bases to look at in HomopolymerCovariate
- exception_if_no_tile                               If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1
- solid_recal_mode                             How should we recalibrate solid bases in whichthe reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS (DO_NOTHING|SET_Q_ZERO|SET_Q_ZERO_BASE_N|REMOVE_REF_BIAS)
- solid_nocall_strategy   Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ (THROW_EXCEPTION|LEAVE_READ_UNRECALIBRATED|PURGE_READ)
- recal_file                                     Filename for the input covariates table recalibration .csv file
- out                                                           The output CSV file
- recal_file                                                     Filename for the outputted covariates table recalibration file
- standard_covs                                                                Use the standard set of covariates in addition to the ones listed using the -cov argument
- run_without_dbsnp_potentially_ruining_quality   If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.
-
-  </help>
-</tool>
--- a/tools/gatk/gatk_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-
-"""
-A wrapper script for running the GenomeAnalysisTK.jar commands.
-"""
-
-import sys, optparse, os, tempfile, subprocess, shutil
-from string import Template
-
-GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbsnp', 'picard_interval_list':'interval_list' } #items not listed here, will use the galaxy extension as-is
-GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed
-DEFAULT_GATK_PREFIX = "gatk_file"
-CHUNK_SIZE = 2**20 #1mb
-
-
-def cleanup_before_exit( tmp_dir ):
-    if tmp_dir and os.path.exists( tmp_dir ):
-        shutil.rmtree( tmp_dir )
-
-def gatk_filename_from_galaxy( galaxy_filename, galaxy_ext, target_dir = None, prefix = None ):
-    suffix = GALAXY_EXT_TO_GATK_EXT.get( galaxy_ext, galaxy_ext )
-    if prefix is None:
-        prefix = DEFAULT_GATK_PREFIX
-    if target_dir is None:
-        target_dir = os.getcwd()
-    gatk_filename = os.path.join( target_dir, "%s.%s" % ( prefix, suffix ) )
-    os.symlink( galaxy_filename, gatk_filename )
-    return gatk_filename
-
-def gatk_filetype_argument_substitution( argument, galaxy_ext ):
-    return argument % dict( file_type = GALAXY_EXT_TO_GATK_FILE_TYPE.get( galaxy_ext, galaxy_ext ) )
-
-def open_file_from_option( filename, mode = 'rb' ):
-    if filename:
-        return open( filename, mode = mode )
-    return None
-
-def html_report_from_directory( html_out, dir ):
-    html_out.write( '<html>\n<head>\n<title>Galaxy - GATK Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' )
-    for fname in sorted( os.listdir( dir ) ):
-        html_out.write(  '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) )
-    html_out.write( '</ul>\n</body>\n</html>\n' )
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-p', '--pass_through', dest='pass_through_options', action='append', type="string", help='These options are passed through directly to GATK, without any modification.' )
-    parser.add_option( '-d', '--dataset', dest='datasets', action='append', type="string", nargs=4, help='"-argument" "original_filename" "galaxy_filetype" "name_prefix"' )
-    parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' )
-    parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' )
-    parser.add_option( '', '--html_report_from_directory', dest='html_report_from_directory', action='append', type="string", nargs=2, help='"Target HTML File" "Directory"')
-    (options, args) = parser.parse_args()
-
-    tmp_dir = tempfile.mkdtemp()
-    if options.pass_through_options:
-        cmd = ' '.join( options.pass_through_options )
-    else:
-        cmd = ''
-    if options.datasets:
-        for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets:
-            gatk_filename = gatk_filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )
-            if dataset_arg:
-                cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename )
-    #set up stdout and stderr output options
-    stdout = open_file_from_option( options.stdout, mode = 'wb' )
-    stderr = open_file_from_option( options.stderr, mode = 'wb' )
-    #if no stderr file is specified, we'll use our own
-    if stderr is None:
-        stderr = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        stderr.close()
-        stderr = open( stderr.name, 'w+b' )
-
-    proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
-    return_code = proc.wait()
-
-    if return_code:
-        stderr_target = sys.stderr
-    else:
-        stderr_target = sys.stdout
-    stderr.flush()
-    stderr.seek(0)
-    while True:
-        chunk = stderr.read( CHUNK_SIZE )
-        if chunk:
-            stderr_target.write( chunk )
-        else:
-            break
-    stderr.close()
-    #generate html reports
-    if options.html_report_from_directory:
-        for ( html_filename, html_dir ) in options.html_report_from_directory:
-            html_report_from_directory( open( html_filename, 'wb' ), html_dir )
-
-    cleanup_before_exit( tmp_dir )
-
-if __name__=="__main__": __main__()
--- a/tools/gatk/indel_realigner.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,332 +0,0 @@
-<tool id="gatk_indel_realigner" name="Indel Realigner" version="0.0.1">
-  <description>- perform local realignment</description>
-  <command interpreter="python">gatk_wrapper.py
-   --stdout "${output_log}"
-   -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
-   -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
-   -p 'java
-    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
-    -T "IndelRealigner"
-    ##-quiet ##this appears to have no effect...confirmed by gatk programmers
-    -o "${output_bam}"
-    -et "NO_ET" ##ET no phone home
-    ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
-    #if $reference_source.reference_source_selector != "history":
-        -R "${reference_source.ref_file.fields.path}"
-    #end if
-   -LOD "${lod_threshold}"
-    ${knowns_only}
-   '
-
-    #set $rod_binding_names = dict()
-    #if str( $input_dbsnp_rod ) != "None":
-        -d "-D" "${input_dbsnp_rod}" "${input_dbsnp_rod.ext}" "dbsnp_rod"
-    #end if
-    #for $rod_binding in $rod_bind:
-        #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
-            #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
-        #else
-            #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
-        #end if
-        #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
-        -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
-        #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ):
-            -p '--rodToIntervalTrackName "${rod_bind_name}"'
-        #end if
-    #end for
-
-    ##start standard gatk options
-    #if $gatk_param_type.gatk_param_type_selector == "advanced":
-        #for $sample_metadata in $gatk_param_type.sample_metadata:
-            -p '--sample_metadata "${sample_metadata.sample_metadata_file}"'
-        #end for
-        #for $read_filter in $gatk_param_type.read_filter:
-            -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
-            ###raise Exception( str( dir( $read_filter ) ) )
-            #for $name, $param in $read_filter.read_filter_type.iteritems():
-                #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
-                    --${name} "${param}"
-                #end if
-            #end for
-            '
-        #end for
-        #if str( $gatk_param_type.input_intervals ) != "None":
-            -d "-L" "${gatk_param_type.input_intervals}" "${gatk_param_type.input_intervals.ext}" "input_intervals"
-        #end if
-        #if str( $gatk_param_type.input_exclude_intervals ) != "None":
-            -d "-XL" "${gatk_param_type.input_exclude_intervals}" "${gatk_param_type.input_exclude_intervals.ext}" "input_intervals"
-        #end if
-        -p '--BTI_merge_rule "${gatk_param_type.BTI_merge_rule}"'
-        -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
-        #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
-            -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
-        #end if
-        -p '
-        --baq "${gatk_param_type.baq}"
-        --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
-        ${gatk_param_type.use_original_qualities}
-        --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
-        --validation_strictness "${gatk_param_type.validation_strictness}"
-        --interval_merging "${gatk_param_type.interval_merging}"
-        '
-        #if str( $gatk_param_type.read_group_black_list ) != "None":
-            -d "-read_group_black_list" "${gatk_param_type.read_group_black_list}" "txt" "input_read_group_black_list"
-        #end if
-    #end if
-    #if $reference_source.reference_source_selector == "history":
-        -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
-    #end if
-    ##end standard gatk options
-    ##start analysis specific options
-    -d "-targetIntervals" "${target_intervals}" "${target_intervals.ext}" "gatk_target_intervals"
-    -p '
-    -targetNotSorted ##always resort input intervals
-    --disable_bam_indexing
-    '
-    #if $analysis_param_type.analysis_param_type_selector == "advanced":
-        -p '
-        --entropyThreshold "${analysis_param_type.entropy_threshold}"
-        ${analysis_param_type.simplify_bam}
-        --maxIsizeForMovement "${analysis_param_type.max_insert_size_for_movement}"
-        --maxPositionalMoveAllowed "${analysis_param_type.max_positional_move_allowed}"
-        --maxConsensuses "${analysis_param_type.max_consensuses}"
-        --maxReadsForConsensuses "${analysis_param_type.max_reads_for_consensuses}"
-        --maxReadsForRealignment "${analysis_param_type.max_reads_for_realignment}"
-        "${analysis_param_type.no_original_alignment_tags}"
-        '
-    #end if
-  </command>
-  <inputs>
-
-    <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <param name="input_bam" type="data" format="bam" label="BAM file">
-          <validator type="unspecified_build" />
-          <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-        </param>
-        <param name="ref_file" type="select" label="Using reference genome">
-          <options from_data_table="picard_indexes">
-            <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
-          </options>
-        </param>
-      </when>
-      <when value="history"> <!-- FIX ME!!!! -->
-        <param name="input_bam" type="data" format="bam" label="BAM file" />
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-      </when>
-    </conditional>
-    <param name="target_intervals" type="data" format="gatk_interval,bed,picard_interval_list" label="Restrict realignment to provided intervals" />
-    <param name="input_dbsnp_rod" type="data" format="gatk_dbsnp" optional="True" label="dbSNP reference ordered data (ROD)" />
-    <repeat name="rod_bind" title="Binding for reference-ordered data">
-        <conditional name="rod_bind_type">
-	      <param name="rod_bind_type_selector" type="select" label="Binding Type">
-	        <option value="snps" selected="True">SNPs</option>
-	        <option value="indels">INDELs</option>
-	        <option value="custom">Custom</option>
-	      </param>
-          <when value="snps">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="indels">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="custom">
-              <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-        </conditional>
-    </repeat>
-    <param name="lod_threshold" type="float" value="5.0" label="LOD threshold above which the realigner will proceed to realign" />
-    <param name="knowns_only" type="boolean" checked="False" truevalue="-knownsOnly" falsevalue="" label="Use only known indels provided as RODs"/>
-
-    <conditional name="gatk_param_type">
-      <param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <repeat name="sample_metadata" title="Sample Metadata">
-            <param name="sample_metadata_file" type="data" format="txt" label="Sample file(s) in JSON format" />
-        </repeat>
-        <repeat name="read_filter" title="Read Filter">
-            <conditional name="read_filter_type">
-		      <param name="read_filter_type_selector" type="select" label="Read Filter Type">
-		        <option value="MaxReadLength" selected="True">MaxReadLength</option>
-		        <option value="ZeroMappingQualityRead">ZeroMappingQualityRead</option>
-		      </param>
-	          <when value="ZeroMappingQualityRead">
-	              <!-- no extra options -->
-	          </when>
-	          <when value="MaxReadLength">
-	              <param name="maxReadLength" type="integer" value="76" label="Max Read Length"/>
-	          </when>
-            </conditional>
-        </repeat>
-        <param name="input_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals over which to operate" />
-        <param name="input_exclude_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals to exclude from processing" />
-        <param name="BTI_merge_rule" type="select" label="BTI merge rule">
-          <option value="UNION" selected="True">UNION</option>
-          <option value="INTERSECTION">INTERSECTION</option>
-        </param>
-        <conditional name="downsampling_type">
-          <param name="downsampling_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-            <option value="NONE" selected="True">NONE</option>
-            <option value="ALL_READS">ALL_READS</option>
-            <option value="BY_SAMPLE">BY_SAMPLE</option>
-          </param>
-          <when value="NONE">
-	          <!-- no more options here -->
-	      </when>
-          <when value="ALL_READS">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-          <when value="BY_SAMPLE">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-        </conditional>
-        <param name="baq" type="select" label="Type of BAQ calculation to apply in the engine">
-          <option value="OFF" selected="True">OFF</option>
-          <option value="CALCULATE_AS_NECESSARY">CALCULATE_AS_NECESSARY</option>
-          <option value="RECALCULATE">RECALCULATE</option>
-        </param>
-        <param name="baq_gap_open_penalty" type="integer" label="BAQ gap open penalty (Phred Scaled)" value="40" help="Default value is 40. 30 is perhaps better for whole genome call sets."/>
-        <param name="use_original_qualities" type="boolean" truevalue="--useOriginalQualities" falsevalue="" label="Use the original base quality scores from the OQ tag" />
-        <param name="default_base_qualities" type="integer" label="Value to be used for all base quality scores, when some are missing" value="-1"/>
-        <param name="validation_strictness" type="select" label="How strict should we be with validation">
-          <option value="STRICT" selected="True">STRICT</option>
-          <option value="LENIENT">LENIENT</option>
-          <option value="SILENT">SILENT</option>
-        </param>
-        <param name="interval_merging" type="select" label="Interval merging rule">
-          <option value="ALL" selected="True">ALL</option>
-          <option value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
-        </param>
-        <param name="read_group_black_list" type="data" format="txt" optional="True" label="Read group black list" />
-      </when>
-    </conditional>
-
-    <conditional name="analysis_param_type">
-      <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-
-        <param name="entropy_threshold" type="float" value="0.15" label="percentage of mismatching base quality scores at a position to be considered having high entropy" />
-        <param name="simplify_bam" type="boolean" checked="False" truevalue="-simplifyBAM" falsevalue="" label="Simplify BAM"/>
-
-        <param name="max_insert_size_for_movement" type="integer" value="3000" label="Maximum insert size of read pairs that we attempt to realign" />
-        <param name="max_positional_move_allowed" type="integer" value="200" label="Maximum positional move in basepairs that a read can be adjusted during realignment" />
-        <param name="max_consensuses" type="integer" value="30" label="Max alternate consensuses to try" />
-        <param name="max_reads_for_consensuses" type="integer" value="120" label="Max reads (chosen randomly) used for finding the potential alternate consensuses" />
-        <param name="max_reads_for_realignment" type="integer" value="20000" label="Max reads allowed at an interval for realignment" />
-        <param name="no_original_alignment_tags" type="boolean" checked="False" truevalue="--noOriginalAlignmentTags" falsevalue="" label="Don't output the original cigar or alignment start tags for each realigned read in the output bam"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="bam" name="output_bam" label="${tool.name} on ${on_string} (BAM)" />
-    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
-  </outputs>
-  <tests>
-      <test>
-          <param name="reference_source_selector" value="history" />
-          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
-          <param name="target_intervals" value="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" ftype="gatk_interval" />
-          <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" />
-          <param name="input_dbsnp_rod"  />
-          <param name="rod_bind_type_selector" value="snps" />
-          <param name="rodToIntervalTrackName" />
-          <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
-          <param name="lod_threshold" value="5.0" />
-          <param name="knowns_only" />
-          <param name="gatk_param_type_selector" value="basic" />
-          <param name="analysis_param_type_selector" value="basic" />
-          <output name="output_bam" file="gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.bam" ftype="bam" lines_diff="2" />
-          <output name="output_log" file="gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.log.contains" compare="contains" />
-      </test>
-  </tests>
-  <help>
-**What it does**
-
-     Performs local realignment of reads based on misalignments due to the presence of indels. Unlike most mappers, this
-     walker uses the full alignment context to determine whether an appropriate alternate reference (i.e. indel) exists
-     and updates SAMRecords accordingly.
-
-------
-
-Please cite the website "http://addlink.here" as well as:
-
-Add citation here 2011.
-
-------
-
-**Input formats**
-
-GenomeAnalysisTK: IndelRealigner accepts an aligned BAM and a list of intervals to realign as input files.
-
-------
-
-**Outputs**
-
-The output is in the BAM format, see http://addlink.here for more details.
-
--------
-
-**Settings**::
-
- targetIntervals                intervals file output from RealignerTargetCreator
- LODThresholdForCleaning            LOD threshold above which the cleaner will clean
- entropyThreshold                      percentage of mismatches at a locus to be considered having high entropy
- out                                                      Output bam
- bam_compression                       Compression level to use for writing BAM files
- disable_bam_indexing                                              Turn off on-the-fly creation of indices for output BAM files.
- simplifyBAM                                          If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier
- useOnlyKnownIndels                                    Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs for constructing the alternate references.
- maxReadsInMemory                  max reads allowed to be kept in memory at a time by the SAMFileWriter. Keep it low to minimize memory consumption (but the tool may skip realignment on regions with too much coverage.  If it is too low, it may generate errors during realignment); keep it high to maximize realignment (but make sure to give Java enough memory).
- maxIsizeForMovement               maximum insert size of read pairs that we attempt to realign
- maxPositionalMoveAllowed   maximum positional move in basepairs that a read can be adjusted during realignment
- maxConsensuses                   max alternate consensuses to try (necessary to improve performance in deep coverage)
- maxReadsForConsensuses           max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)
- maxReadsForRealignment         max reads allowed at an interval for realignment; if this value is exceeded, realignment is not attempted and the reads are passed to the output file(s) as-is
- noOriginalAlignmentTags                                   Don't output the original cigar or alignment start tags for each realigned read in the output bam.
- targetIntervalsAreNotSorted                      This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, it will throw an exception.  Use this argument when your interval list is not sorted to instruct the Realigner to first sort it in memory.
-
-
-  </help>
-</tool>
--- a/tools/gatk/realigner_target_creator.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,296 +0,0 @@
-<tool id="gatk_realigner_target_creator" name="Realigner Target Creator" version="0.0.1">
-  <description>for use in local realignment</description>
-  <command interpreter="python">gatk_wrapper.py
-   --stdout "${output_log}"
-   -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
-   -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
-   -p 'java
-    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
-    -T "RealignerTargetCreator"
-    -o "${output_interval}"
-    -et "NO_ET" ##ET no phone home
-    ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
-    #if $reference_source.reference_source_selector != "history":
-        -R "${reference_source.ref_file.fields.path}"
-    #end if
-   '
-    #set $rod_binding_names = dict()
-    #if str( $input_dbsnp_rod ) != "None":
-        -d "-D" "${input_dbsnp_rod}" "${input_dbsnp_rod.ext}" "dbsnp_rod"
-    #end if
-    #for $rod_binding in $rod_bind:
-        #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
-            #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
-        #else
-            #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
-        #end if
-        #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
-        -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
-        #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ):
-            -p '--rodToIntervalTrackName "${rod_bind_name}"'
-        #end if
-    #end for
-
-    ##start standard gatk options
-    #if $gatk_param_type.gatk_param_type_selector == "advanced":
-        #for $sample_metadata in $gatk_param_type.sample_metadata:
-            -p '--sample_metadata "${sample_metadata.sample_metadata_file}"'
-        #end for
-        #for $read_filter in $gatk_param_type.read_filter:
-            -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
-            ###raise Exception( str( dir( $read_filter ) ) )
-            #for $name, $param in $read_filter.read_filter_type.iteritems():
-                #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
-                    --${name} "${param}"
-                #end if
-            #end for
-            '
-        #end for
-        #if str( $gatk_param_type.input_intervals ) != "None":
-            -d "-L" "${gatk_param_type.input_intervals}" "${gatk_param_type.input_intervals.ext}" "input_intervals"
-        #end if
-        #if str( $gatk_param_type.input_exclude_intervals ) != "None":
-            -d "-XL" "${gatk_param_type.input_exclude_intervals}" "${gatk_param_type.input_exclude_intervals.ext}" "input_intervals"
-        #end if
-
-        -p '--BTI_merge_rule "${gatk_param_type.BTI_merge_rule}"'
-
-        -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
-        #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
-            -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
-        #end if
-        -p '
-        --baq "${gatk_param_type.baq}"
-        --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
-        ${gatk_param_type.use_original_qualities}
-        --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
-        --validation_strictness "${gatk_param_type.validation_strictness}"
-        --interval_merging "${gatk_param_type.interval_merging}"
-        '
-        #if str( $gatk_param_type.read_group_black_list ) != "None":
-            -d "-read_group_black_list" "${gatk_param_type.read_group_black_list}" "txt" "input_read_group_black_list"
-        #end if
-    #end if
-    #if $reference_source.reference_source_selector == "history":
-        -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
-    #end if
-    ##end standard gatk options
-    ##start analysis specific options
-    #if $analysis_param_type.analysis_param_type_selector == "advanced":
-        -p '
-        --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}"
-        --windowSize "${analysis_param_type.windowSize}"
-        --mismatchFraction "${analysis_param_type.mismatchFraction}"
-        --maxIntervalSize "${analysis_param_type.maxIntervalSize}"
-        '
-    #end if
-  </command>
-  <inputs>
-    <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <param name="input_bam" type="data" format="bam" label="BAM file">
-          <validator type="unspecified_build" />
-          <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-        </param>
-        <param name="ref_file" type="select" label="Using reference genome">
-          <options from_data_table="picard_indexes">
-            <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
-          </options>
-        </param>
-      </when>
-      <when value="history"> <!-- FIX ME!!!! -->
-        <param name="input_bam" type="data" format="bam" label="BAM file" />
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-      </when>
-    </conditional>
-
-    <param name="input_dbsnp_rod" type="data" format="gatk_dbsnp" optional="True" label="dbSNP reference ordered data (ROD)" />
-    <repeat name="rod_bind" title="Binding for reference-ordered data">
-        <conditional name="rod_bind_type">
-	      <param name="rod_bind_type_selector" type="select" label="Binding Type">
-	        <option value="snps" selected="True">SNPs</option>
-	        <option value="indels">INDELs</option>
-	        <option value="custom">Custom</option>
-	      </param>
-          <when value="snps">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="indels">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="custom">
-              <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-        </conditional>
-    </repeat>
-
-    <conditional name="gatk_param_type">
-      <param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <repeat name="sample_metadata" title="Sample Metadata">
-            <param name="sample_metadata_file" type="data" format="txt" label="Sample file(s) in JSON format" />
-        </repeat>
-        <repeat name="read_filter" title="Read Filter">
-            <conditional name="read_filter_type">
-		      <param name="read_filter_type_selector" type="select" label="Read Filter Type">
-		        <option value="MaxReadLength" selected="True">MaxReadLength</option>
-		        <option value="ZeroMappingQualityRead">ZeroMappingQualityRead</option>
-		      </param>
-	          <when value="ZeroMappingQualityRead">
-	              <!-- no extra options -->
-	          </when>
-	          <when value="MaxReadLength">
-	              <param name="maxReadLength" type="integer" value="76" label="Max Read Length"/>
-	          </when>
-            </conditional>
-        </repeat>
-        <param name="input_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals over which to operate" />
-        <param name="input_exclude_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals to exclude from processing" />
-
-        <param name="BTI_merge_rule" type="select" label="BTI merge rule">
-          <option value="UNION" selected="True">UNION</option>
-          <option value="INTERSECTION">INTERSECTION</option>
-        </param>
-
-        <conditional name="downsampling_type">
-          <param name="downsampling_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-            <option value="NONE" selected="True">NONE</option>
-            <option value="ALL_READS">ALL_READS</option>
-            <option value="BY_SAMPLE">BY_SAMPLE</option>
-          </param>
-          <when value="NONE">
-	          <!-- no more options here -->
-	      </when>
-          <when value="ALL_READS">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-          <when value="BY_SAMPLE">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-        </conditional>
-        <param name="baq" type="select" label="Type of BAQ calculation to apply in the engine">
-          <option value="OFF" selected="True">OFF</option>
-          <option value="CALCULATE_AS_NECESSARY">CALCULATE_AS_NECESSARY</option>
-          <option value="RECALCULATE">RECALCULATE</option>
-        </param>
-        <param name="baq_gap_open_penalty" type="integer" label="BAQ gap open penalty (Phred Scaled)" value="40" help="Default value is 40. 30 is perhaps better for whole genome call sets."/>
-        <param name="use_original_qualities" type="boolean" truevalue="--useOriginalQualities" falsevalue="" label="Use the original base quality scores from the OQ tag" />
-        <param name="default_base_qualities" type="integer" label="Value to be used for all base quality scores, when some are missing" value="-1"/>
-        <param name="validation_strictness" type="select" label="How strict should we be with validation">
-          <option value="STRICT" selected="True">STRICT</option>
-          <option value="LENIENT">LENIENT</option>
-          <option value="SILENT">SILENT</option>
-        </param>
-        <param name="interval_merging" type="select" label="Interval merging rule">
-          <option value="ALL" selected="True">ALL</option>
-          <option value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
-        </param>
-        <param name="read_group_black_list" type="data" format="txt" optional="True" label="Read group black list" />
-      </when>
-    </conditional>
-
-    <conditional name="analysis_param_type">
-      <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <param name="windowSize" type="integer" value="10" label="Window size for calculating entropy or SNP clusters (windowSize)" />
-        <param name="mismatchFraction" type="float" value="0.15" label="Fraction of base qualities needing to mismatch for a position to have high entropy (mismatchFraction)" help="to disable set to &lt;= 0 or &gt; 1"/>
-        <param name="minReadsAtLocus" type="integer" value="4" label="Minimum reads at a locus to enable using the entropy calculation (minReadsAtLocus)" />
-        <param name="maxIntervalSize" type="integer" value="500" label="Maximum interval size" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="gatk_interval" name="output_interval" label="${tool.name} on ${on_string} (GATK intervals)" />
-    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
-  </outputs>
-  <tests>
-      <test>
-          <param name="reference_source_selector" value="history" />
-          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
-          <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" />
-          <param name="input_dbsnp_rod"  />
-          <param name="rod_bind_type_selector" value="snps" />
-          <param name="rodToIntervalTrackName" />
-          <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
-          <param name="gatk_param_type_selector" value="basic" />
-          <param name="analysis_param_type_selector" value="basic" />
-          <output name="output_interval" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" />
-          <output name="output_log" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.log.contains" compare="contains"/>
-      </test>
-  </tests>
-  <help>
-**What it does**
-
-Emits intervals for the Local Indel Realigner to target for cleaning.  Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
-
-------
-
-Please cite the website "http://addlink.here" as well as:
-
-Add citation here 2011.
-
-------
-
-**Input formats**
-
-GenomeAnalysisTK: RealignerTargetCreator accepts an aligned BAM input file.
-
-------
-
-**Outputs**
-
-The output is in GATK Interval format, see http://addlink.here for more details.
-
--------
-
-**Settings**::
-
- windowSize          window size for calculating entropy or SNP clusters
- mismatchFraction    fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to &lt;= 0 or &gt; 1
- minReadsAtLocus     minimum reads at a locus to enable using the entropy calculation
- maxIntervalSize     maximum interval size
-
-  </help>
-</tool>
--- a/tools/gatk/table_recalibration.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,400 +0,0 @@
-<tool id="gatk_table_recalibration" name="Table Recalibration" version="0.0.1">
-  <description>on BAM files</description>
-  <command interpreter="python">gatk_wrapper.py
-   --stdout "${output_log}"
-   -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
-   -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
-   -p 'java
-    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
-    -T "TableRecalibration"
-    -o "${output_bam}"
-    -et "NO_ET" ##ET no phone home
-    ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
-    #if $reference_source.reference_source_selector != "history":
-        -R "${reference_source.ref_file.fields.path}"
-    #end if
-    --recal_file "${input_recal}"
-    --disable_bam_indexing
-   '
-    ##start standard gatk options
-    #if $gatk_param_type.gatk_param_type_selector == "advanced":
-        #for $sample_metadata in $gatk_param_type.sample_metadata:
-            -p '--sample_metadata "${sample_metadata.sample_metadata_file}"'
-        #end for
-        #for $read_filter in $gatk_param_type.read_filter:
-            -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
-            ###raise Exception( str( dir( $read_filter ) ) )
-            #for $name, $param in $read_filter.read_filter_type.iteritems():
-                #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
-                    --${name} "${param}"
-                #end if
-            #end for
-            '
-        #end for
-        #if str( $gatk_param_type.input_intervals ) != "None":
-            -d "-L" "${gatk_param_type.input_intervals}" "${gatk_param_type.input_intervals.ext}" "input_intervals"
-        #end if
-        #if str( $gatk_param_type.input_exclude_intervals ) != "None":
-            -d "-XL" "${gatk_param_type.input_exclude_intervals}" "${gatk_param_type.input_exclude_intervals.ext}" "input_intervals"
-        #end if
-        #set $rod_binding_names = dict()
-        #for $rod_binding in $gatk_param_type.rod_bind:
-            #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
-                #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
-            #else
-                #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
-            #end if
-            #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
-            -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
-            #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ):
-                -p '--rodToIntervalTrackName "${rod_bind_name}"'
-            #end if
-        #end for
-        -p '--BTI_merge_rule "${gatk_param_type.BTI_merge_rule}"'
-        #if str( $gatk_param_type.input_dbsnp_rod ) != "None":
-            -d "-D" "${gatk_param_type.input_dbsnp_rod}" "${gatk_param_type.input_dbsnp_rod.ext}" "dbsnp_rod"
-        #end if
-        -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
-        #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
-            -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
-        #end if
-        -p '
-        --baq "${gatk_param_type.baq}"
-        --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
-        ${gatk_param_type.use_original_qualities}
-        --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
-        --validation_strictness "${gatk_param_type.validation_strictness}"
-        --interval_merging "${gatk_param_type.interval_merging}"
-        '
-        #if str( $gatk_param_type.read_group_black_list ) != "None":
-            -d "-read_group_black_list" "${gatk_param_type.read_group_black_list}" "txt" "input_read_group_black_list"
-        #end if
-    #end if
-    #if str( $reference_source.reference_source_selector ) == "history":
-        -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
-    #end if
-    ##end standard gatk options
-
-    ##start analysis specific options
-    #if $analysis_param_type.analysis_param_type_selector == "advanced":
-        -p '
-        #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set":
-            --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}"
-        #end if
-        #if str( $analysis_param_type.default_platform ) != "default":
-            --default_platform "${analysis_param_type.default_platform}"
-        #end if
-        #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set":
-            --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}"
-        #end if
-        #if str( $analysis_param_type.force_platform ) != "default":
-            --force_platform "${analysis_param_type.force_platform}"
-        #end if
-        ${analysis_param_type.exception_if_no_tile}
-        #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set":
-            #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default":
-                --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}"
-            #end if
-            #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default":
-                --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}"
-            #end if
-        #end if
-        ${analysis_param_type.simplify_bam}
-        --preserve_qscores_less_than "${analysis_param_type.preserve_qscores_less_than}"
-        --smoothing "${analysis_param_type.smoothing}"
-        --max_quality_score "${analysis_param_type.max_quality_score}"
-        --window_size_nqs "${analysis_param_type.window_size_nqs}"
-        --homopolymer_nback "${analysis_param_type.homopolymer_nback}"
-        ${analysis_param_type.do_not_write_original_quals}
-        '
-    #end if
-  </command>
-  <inputs>
-    <param name="input_recal" type="data" format="csv" label="Covariates table recalibration file" />
-    <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <param name="input_bam" type="data" format="bam" label="BAM file">
-          <validator type="unspecified_build" />
-          <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-        </param>
-        <param name="ref_file" type="select" label="Using reference genome">
-          <options from_data_table="picard_indexes">
-            <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
-          </options>
-        </param>
-      </when>
-      <when value="history"> <!-- FIX ME!!!! -->
-        <param name="input_bam" type="data" format="bam" label="BAM file" />
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-      </when>
-    </conditional>
-
-    <conditional name="gatk_param_type">
-      <param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <repeat name="sample_metadata" title="Sample Metadata">
-            <param name="sample_metadata_file" type="data" format="txt" label="Sample file(s) in JSON format" />
-        </repeat>
-        <repeat name="read_filter" title="Read Filter">
-            <conditional name="read_filter_type">
-		      <param name="read_filter_type_selector" type="select" label="Read Filter Type">
-		        <option value="MaxReadLength" selected="True">MaxReadLength</option>
-		        <option value="ZeroMappingQualityRead">ZeroMappingQualityRead</option>
-		      </param>
-	          <when value="ZeroMappingQualityRead">
-	              <!-- no extra options -->
-	          </when>
-	          <when value="MaxReadLength">
-	              <param name="maxReadLength" type="integer" value="76" label="Max Read Length"/>
-	          </when>
-            </conditional>
-        </repeat>
-        <param name="input_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals over which to operate" />
-        <param name="input_exclude_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals to exclude from processing" />
-        <repeat name="rod_bind" title="Binding for reference-ordered data">
-            <conditional name="rod_bind_type">
-		      <param name="rod_bind_type_selector" type="select" label="Binding Type">
-		        <option value="snps" selected="True">SNPs</option>
-		        <option value="indels">INDELs</option>
-		        <option value="custom">Custom</option>
-		      </param>
-	          <when value="snps">
-	              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-	              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-	          </when>
-	          <when value="indels">
-	              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-	              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-	          </when>
-	          <when value="custom">
-	              <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>
-	              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-	              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-	          </when>
-            </conditional>
-        </repeat>
-        <param name="BTI_merge_rule" type="select" label="BTI merge rule">
-          <option value="UNION" selected="True">UNION</option>
-          <option value="INTERSECTION">INTERSECTION</option>
-        </param>
-        <param name="input_dbsnp_rod" type="data" format="gatk_dbsnp" optional="True" label="dbSNP reference ordered data (ROD)" />
-        <conditional name="downsampling_type">
-          <param name="downsampling_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-            <option value="NONE" selected="True">NONE</option>
-            <option value="ALL_READS">ALL_READS</option>
-            <option value="BY_SAMPLE">BY_SAMPLE</option>
-          </param>
-          <when value="NONE">
-	          <!-- no more options here -->
-	      </when>
-          <when value="ALL_READS">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-          <when value="BY_SAMPLE">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-        </conditional>
-        <param name="baq" type="select" label="Type of BAQ calculation to apply in the engine">
-          <option value="OFF" selected="True">OFF</option>
-          <option value="CALCULATE_AS_NECESSARY">CALCULATE_AS_NECESSARY</option>
-          <option value="RECALCULATE">RECALCULATE</option>
-        </param>
-        <param name="baq_gap_open_penalty" type="integer" label="BAQ gap open penalty (Phred Scaled)" value="40" help="Default value is 40. 30 is perhaps better for whole genome call sets."/>
-        <param name="use_original_qualities" type="boolean" truevalue="--useOriginalQualities" falsevalue="" label="Use the original base quality scores from the OQ tag" />
-        <param name="default_base_qualities" type="integer" label="Value to be used for all base quality scores, when some are missing" value="-1"/>
-        <param name="validation_strictness" type="select" label="How strict should we be with validation">
-          <option value="STRICT" selected="True">STRICT</option>
-          <option value="LENIENT">LENIENT</option>
-          <option value="SILENT">SILENT</option>
-        </param>
-        <param name="interval_merging" type="select" label="Interval merging rule">
-          <option value="ALL" selected="True">ALL</option>
-          <option value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
-        </param>
-        <param name="read_group_black_list" type="data" format="txt" optional="True" label="Read group black list" />
-      </when>
-    </conditional>
-
-
-    <conditional name="analysis_param_type">
-      <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <conditional name="default_read_group_type">
-          <param name="default_read_group_type_selector" type="select" label="Set default Read Group">
-            <option value="default" selected="True">Don't Set</option>
-            <option value="set">Set</option>
-          </param>
-          <when value="default">
-            <!-- do nothing here -->
-          </when>
-          <when value="set">
-            <param name="default_read_group" type="text" value="Unknown" label="If a read has no read group then default to the provided String"/>
-          </when>
-        </conditional>
-        <param name="default_platform" type="select" label="Set default Platform">
-          <option value="default" selected="True">Don't Set</option>
-          <option value="illumina">illumina</option>
-          <option value="454">454</option>
-          <option value="solid">solid</option>
-        </param>
-        <conditional name="force_read_group_type">
-          <param name="force_read_group_type_selector" type="select" label="Force Read Group">
-            <option value="default" selected="True">Don't Force</option>
-            <option value="set">Force</option>
-          </param>
-          <when value="default">
-            <!-- do nothing here -->
-          </when>
-          <when value="set">
-            <param name="force_read_group" type="text" value="Unknown" label="If provided, the read group ID of EVERY read will be forced to be the provided String."/>
-          </when>
-        </conditional>
-        <param name="force_platform" type="select" label="Force Platform">
-          <option value="default" selected="True">Don't Force</option>
-          <option value="illumina">illumina</option>
-          <option value="454">454</option>
-          <option value="solid">solid</option>
-        </param>
-        <param name="exception_if_no_tile" type="boolean" checked="False" truevalue="--exception_if_no_tile" falsevalue="" label="Throw an exception when no tile can be found"/>
-        <conditional name="solid_options_type">
-          <param name="solid_options_type_selector" type="select" label="Set SOLiD specific options">
-            <option value="default" selected="True">Don't Set</option>
-            <option value="set">Set</option>
-          </param>
-          <when value="default">
-            <!-- do nothing here -->
-          </when>
-          <when value="set">
-            <param name="solid_recal_mode" type="select" label="How should we recalibrate solid bases in which the reference was inserted">
-              <option value="default" selected="True">Don't set</option>
-              <option value="DO_NOTHING">DO_NOTHING</option>
-              <option value="SET_Q_ZERO">SET_Q_ZERO</option>
-              <option value="SET_Q_ZERO_BASE_N">SET_Q_ZERO_BASE_N</option>
-              <option value="REMOVE_REF_BIAS">REMOVE_REF_BIAS</option>
-            </param>
-            <param name="solid_nocall_strategy" type="select" label="Behavior of the recalibrator when it encounters no calls">
-              <option value="default" selected="True">Don't set</option>
-              <option value="THROW_EXCEPTION">THROW_EXCEPTION</option>
-              <option value="LEAVE_READ_UNRECALIBRATED">LEAVE_READ_UNRECALIBRATED</option>
-              <option value="PURGE_READ">PURGE_READ</option>
-            </param>
-          </when>
-        </conditional>
-        <param name="simplify_bam" type="boolean" checked="False" truevalue="-simplifyBAM" falsevalue="" label="Simplify BAM"/>
-        <param name="window_size_nqs" type="integer" value="5" label="Window size used by MinimumNQSCovariate"/>
-        <param name="homopolymer_nback" type="integer" value="7" label="Number of previous bases to look at in HomopolymerCovariate" />
-        <param name="preserve_qscores_less_than" type="integer" value="5" label="Bases with quality scores less than this threshold won't be recalibrated"/>
-        <param name="smoothing" type="integer" value="1" label="smoothing"/>
-        <param name="max_quality_score" type="integer" value="50" label="Max quality score"/>
-        <param name="do_not_write_original_quals" type="boolean" checked="False" truevalue="--doNotWriteOriginalQuals" falsevalue="" label="Do Not Write Original Quality tag"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="bam" name="output_bam" label="${tool.name} on ${on_string} (BAM)" />
-    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
-  </outputs>
-  <tests>
-      <test>
-          <param name="input_recal" value="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" ftype="csv" />
-          <param name="reference_source_selector" value="history" />
-          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
-          <param name="input_bam" value="gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.bam" ftype="bam" />
-          <param name="gatk_param_type_selector" value="basic" />
-          <param name="analysis_param_type_selector" value="basic" />
-          <output name="output_bam" file="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" lines_diff="2" />
-          <output name="output_log" file="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.log.contains" compare="contains" />
-      </test>
-  </tests>
-  <help>
-**What it does**
-
-     This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal.  For
-     each base in each read this walker calculates various user-specified covariates (such as read group, reported
-     quality score, cycle, and dinuc) Using these values as a key in a large hashmap the walker calculates an empirical
-     base quality score and overwrites the quality score currently in the read. This walker then outputs a new bam file
-     with these updated (recalibrated) reads.  Note: This walker expects as input the recalibration table file generated
-     previously by CovariateCounterWalker. Note: This walker is designed to be used in conjunction with
-     CovariateCounterWalker.
-
-------
-
-Please cite the website "http://addlink.here" as well as:
-
-Add citation here 2011.
-
-------
-
-**Input formats**
-
-GenomeAnalysisTK: TableRecalibration accepts an aligned BAM and a recalibration CSV input files.
-
-------
-
-**Outputs**
-
-The output is in BAM format, see http://addlink.here for more details.
-
--------
-
-**Settings**::
-
- default_read_group                           If a read has no read group then default to the provided String.
- default_platform                                If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.
- force_read_group                               If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.
- force_platform                                    If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.
- window_size_nqs                                 The window size used by MinimumNQSCovariate for its calculation
- homopolymer_nback                           The number of previous bases to look at in HomopolymerCovariate
- exception_if_no_tile                               If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1
- solid_recal_mode                             How should we recalibrate solid bases in whichthe reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS (DO_NOTHING|SET_Q_ZERO|SET_Q_ZERO_BASE_N|REMOVE_REF_BIAS)
- solid_nocall_strategy   Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ (THROW_EXCEPTION|LEAVE_READ_UNRECALIBRATED|PURGE_READ)
- recal_file                                     Filename for the input covariates table recalibration .csv file
- out                                                           The output BAM file
- bam_compression                            Compression level to use for writing BAM files
- disable_bam_indexing                                                   Turn off on-the-fly creation of indices for output BAM files.
- simplifyBAM                                               If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier
- preserve_qscores_less_than            Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below &lt; 5, since base callers use these values to indicate random or bad bases
- smoothing                                              Number of imaginary counts to add to each bin bin order to smooth out bins with few data points, default=1
- max_quality_score                            The integer value at which to cap the quality scores, default=50
- doNotWriteOriginalQuals                                         If true, we will not write the original quality (OQ) tag for each read
-
-  </help>
-</tool>
--- a/tools/gatk/unified_genotyper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,414 +0,0 @@
-<tool id="gatk_unified_genotyper" name="Unified Genotyper" version="0.0.1">
-  <description>SNP and indel caller</description>
-  <command interpreter="python">gatk_wrapper.py
-   --stdout "${output_log}"
-   #for $i, $input_bam in enumerate( $reference_source.input_bams ):
-       -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
-       -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
-   #end for
-   -p 'java
-    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
-    -T "UnifiedGenotyper"
-    -o "${output_vcf}"
-    -et "NO_ET" ##ET no phone home
-    ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
-    #if $reference_source.reference_source_selector != "history":
-        -R "${reference_source.ref_file.fields.path}"
-    #end if
-    --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"
-    --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"
-   '
-    #set $rod_binding_names = dict()
-    #if str( $input_dbsnp_rod ) != "None":
-        -d "-D" "${input_dbsnp_rod}" "${input_dbsnp_rod.ext}" "dbsnp_rod"
-    #end if
-    #for $rod_binding in $rod_bind:
-        #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
-            #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
-        #else
-            #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
-        #end if
-        #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
-        -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
-        #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ):
-            -p '--rodToIntervalTrackName "${rod_bind_name}"'
-        #end if
-    #end for
-
-    ##start standard gatk options
-    #if $gatk_param_type.gatk_param_type_selector == "advanced":
-        #for $sample_metadata in $gatk_param_type.sample_metadata:
-            -p '--sample_metadata "${sample_metadata.sample_metadata_file}"'
-        #end for
-        #for $read_filter in $gatk_param_type.read_filter:
-            -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
-            ###raise Exception( str( dir( $read_filter ) ) )
-            #for $name, $param in $read_filter.read_filter_type.iteritems():
-                #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
-                    --${name} "${param}"
-                #end if
-            #end for
-            '
-        #end for
-        #if str( $gatk_param_type.input_intervals ) != "None":
-            -d "-L" "${gatk_param_type.input_intervals}" "${gatk_param_type.input_intervals.ext}" "input_intervals"
-        #end if
-        #if str( $gatk_param_type.input_exclude_intervals ) != "None":
-            -d "-XL" "${gatk_param_type.input_exclude_intervals}" "${gatk_param_type.input_exclude_intervals.ext}" "input_intervals"
-        #end if
-
-        -p '--BTI_merge_rule "${gatk_param_type.BTI_merge_rule}"'
-
-        -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
-        #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
-            -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
-        #end if
-        -p '
-        --baq "${gatk_param_type.baq}"
-        --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
-        ${gatk_param_type.use_original_qualities}
-        --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
-        --validation_strictness "${gatk_param_type.validation_strictness}"
-        --interval_merging "${gatk_param_type.interval_merging}"
-        '
-        #if str( $gatk_param_type.read_group_black_list ) != "None":
-            -d "-read_group_black_list" "${gatk_param_type.read_group_black_list}" "txt" "input_read_group_black_list"
-        #end if
-    #end if
-    #if $reference_source.reference_source_selector == "history":
-        -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
-    #end if
-    ##end standard gatk options
-    ##start analysis specific options
-    #if $analysis_param_type.analysis_param_type_selector == "advanced":
-        -p '
-        --genotype_likelihoods_model "${analysis_param_type.genotype_likelihoods_model}"
-        --p_nonref_model "${analysis_param_type.p_nonref_model}"
-        --heterozygosity "${analysis_param_type.heterozygosity}"
-        --pcr_error_rate "${analysis_param_type.pcr_error_rate}"
-        --genotyping_mode "${analysis_param_type.genotyping_mode}"
-        --output_mode "${analysis_param_type.output_mode}"
-        ${analysis_param_type.noSLOD}
-        --min_base_quality_score "${analysis_param_type.min_base_quality_score}"
-        --min_mapping_quality_score "${analysis_param_type.min_mapping_quality_score}"
-        --max_deletion_fraction "${analysis_param_type.max_deletion_fraction}"
-        --min_indel_count_for_genotyping "${analysis_param_type.min_indel_count_for_genotyping}"
-        --indel_heterozygosity "${analysis_param_type.indel_heterozygosity}"
-        --indelGapContinuationPenalty "${analysis_param_type.indelGapContinuationPenalty}"
-        --indelGapOpenPenalty "${analysis_param_type.indelGapOpenPenalty}"
-        --indelHaplotypeSize "${analysis_param_type.indelHaplotypeSize}"
-        ${analysis_param_type.doContextDependentGapPenalties}
-        #if $analysis_param_type.annotation.value:
-            #for $annotation in $analysis_param_type.annotation.value:
-                --annotation "${annotation}"
-            #end for
-        #end if
-        #if $analysis_param_type.group.value:
-            #for $group in $analysis_param_type.group.value:
-                --group "${group}"
-            #end for
-        #end if
-        '
-    #end if
-  </command>
-  <inputs>
-    <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <repeat name="input_bams" title="Sample BAM file" min="1">
-            <param name="input_bam" type="data" format="bam" label="BAM file">
-              <validator type="unspecified_build" />
-              <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-            </param>
-        </repeat>
-        <param name="ref_file" type="select" label="Using reference genome">
-          <options from_data_table="picard_indexes">
-            <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
-          </options>
-        </param>
-      </when>
-      <when value="history"> <!-- FIX ME!!!! -->
-        <repeat name="input_bams" title="Sample BAM file" min="1">
-            <param name="input_bam" type="data" format="bam" label="BAM file" />
-        </repeat>
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-      </when>
-    </conditional>
-
-    <param name="input_dbsnp_rod" type="data" format="gatk_dbsnp" optional="True" label="dbSNP reference ordered data (ROD)" />
-    <repeat name="rod_bind" title="Binding for reference-ordered data">
-        <conditional name="rod_bind_type">
-	      <param name="rod_bind_type_selector" type="select" label="Binding Type">
-	        <option value="snps" selected="True">SNPs</option>
-	        <option value="indels">INDELs</option>
-	        <option value="custom">Custom</option>
-	      </param>
-          <when value="snps">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="indels">
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-          <when value="custom">
-              <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>
-              <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
-              <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
-          </when>
-        </conditional>
-    </repeat>
-
-    <param name="standard_min_confidence_threshold_for_calling" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called" />
-    <param name="standard_min_confidence_threshold_for_emitting" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)" />
-
-
-    <conditional name="gatk_param_type">
-      <param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <repeat name="sample_metadata" title="Sample Metadata">
-            <param name="sample_metadata_file" type="data" format="txt" label="Sample file(s) in JSON format" />
-        </repeat>
-        <repeat name="read_filter" title="Read Filter">
-            <conditional name="read_filter_type">
-		      <param name="read_filter_type_selector" type="select" label="Read Filter Type">
-		        <option value="MaxReadLength" selected="True">MaxReadLength</option>
-		        <option value="ZeroMappingQualityRead">ZeroMappingQualityRead</option>
-		      </param>
-	          <when value="ZeroMappingQualityRead">
-	              <!-- no extra options -->
-	          </when>
-	          <when value="MaxReadLength">
-	              <param name="maxReadLength" type="integer" value="76" label="Max Read Length"/>
-	          </when>
-            </conditional>
-        </repeat>
-        <param name="input_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals over which to operate" />
-        <param name="input_exclude_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals to exclude from processing" />
-
-        <param name="BTI_merge_rule" type="select" label="BTI merge rule">
-          <option value="UNION" selected="True">UNION</option>
-          <option value="INTERSECTION">INTERSECTION</option>
-        </param>
-
-        <conditional name="downsampling_type">
-          <param name="downsampling_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-            <option value="NONE" selected="True">NONE</option>
-            <option value="ALL_READS">ALL_READS</option>
-            <option value="BY_SAMPLE">BY_SAMPLE</option>
-          </param>
-          <when value="NONE">
-	          <!-- no more options here -->
-	      </when>
-          <when value="ALL_READS">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-          <when value="BY_SAMPLE">
-	          <conditional name="downsample_to_type">
-	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
-	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
-	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
-	              </param>
-	              <when value="downsample_to_fraction">
-	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
-	              </when>
-	              <when value="downsample_to_coverage">
-	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
-	              </when>
-	          </conditional>
-	      </when>
-        </conditional>
-        <param name="baq" type="select" label="Type of BAQ calculation to apply in the engine">
-          <option value="OFF" selected="True">OFF</option>
-          <option value="CALCULATE_AS_NECESSARY">CALCULATE_AS_NECESSARY</option>
-          <option value="RECALCULATE">RECALCULATE</option>
-        </param>
-        <param name="baq_gap_open_penalty" type="integer" label="BAQ gap open penalty (Phred Scaled)" value="40" help="Default value is 40. 30 is perhaps better for whole genome call sets."/>
-        <param name="use_original_qualities" type="boolean" truevalue="--useOriginalQualities" falsevalue="" label="Use the original base quality scores from the OQ tag" />
-        <param name="default_base_qualities" type="integer" label="Value to be used for all base quality scores, when some are missing" value="-1"/>
-        <param name="validation_strictness" type="select" label="How strict should we be with validation">
-          <option value="STRICT" selected="True">STRICT</option>
-          <option value="LENIENT">LENIENT</option>
-          <option value="SILENT">SILENT</option>
-        </param>
-        <param name="interval_merging" type="select" label="Interval merging rule">
-          <option value="ALL" selected="True">ALL</option>
-          <option value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
-        </param>
-        <param name="read_group_black_list" type="data" format="txt" optional="True" label="Read group black list" />
-      </when>
-    </conditional>
-
-    <conditional name="analysis_param_type">
-      <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <!-- Do nothing here -->
-      </when>
-      <when value="advanced">
-        <param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ">
-          <option value="BOTH" selected="True">BOTH</option>
-          <option value="SNP">SNP</option>
-          <option value="INDEL">INDEL</option>
-        </param>
-        <param name="p_nonref_model" type="select" label="Non-reference probability calculation model to employ">
-          <option value="EXACT" selected="True">EXACT</option>
-          <option value="GRID_SEARCH">GRID_SEARCH</option>
-        </param>
-        <param name="heterozygosity" type="float" value="1e-3" label="Heterozygosity value used to compute prior likelihoods for any locus" />
-        <param name="pcr_error_rate" type="float" value="1e-4" label="The PCR error rate to be used for computing fragment-based likelihoods" />
-        <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping">
-          <option value="DISCOVERY" selected="True">DISCOVERY</option>
-          <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
-        </param>
-        <param name="output_mode" type="select" label="Should we output confident genotypes (i.e. including ref calls) or just the variants?">
-          <option value="EMIT_VARIANTS_ONLY" selected="True">EMIT_VARIANTS_ONLY</option>
-          <option value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option>
-          <option value="EMIT_ALL_SITES">EMIT_ALL_SITES</option>
-        </param>
-        <param name="noSLOD" type="boolean" truevalue="--noSLOD" falsevalue="" label="Do not calculate the SLOD" />
-        <param name="min_base_quality_score" type="integer" value="17" label="Minimum base quality required to consider a base for calling" />
-        <param name="min_mapping_quality_score" type="integer" value="20" label="Minimum read mapping quality required to consider a read for calling" />
-        <param name="max_deletion_fraction" type="float" value="0.05" label="Maximum fraction of reads with deletions spanning this locus for it to be callable" help="to disable, set to &lt; 0 or &gt; 1" />
-        <param name="min_indel_count_for_genotyping" type="integer" value="5" label="Minimum number of consensus indels required to trigger genotyping run" />
-        <param name="indel_heterozygosity" type="float" value="0.000125" label="Heterozygosity for indel calling" help="1.0/8000==0.000125"/>
-        <param name="indelGapContinuationPenalty" type="float" value="10.0" label="Indel gap continuation penalty" />
-        <param name="indelGapOpenPenalty" type="float" value="45.0" label="Indel gap open penalty" />
-        <param name="indelHaplotypeSize" type="integer" value="80" label="Indel haplotype size" />
-        <param name="doContextDependentGapPenalties" type="boolean" truevalue="--doContextDependentGapPenalties" falsevalue="" label="Vary gap penalties by context" />
-	    <param name="annotation" type="select" multiple="True" display="checkboxes" label="Annotation Types">
-	      <option value="AlleleBalance">AlleleBalance</option>
-	      <option value="BaseQualityRankSumTest">BaseQualityRankSumTest</option>
-	      <option value="DepthOfCoverage">DepthOfCoverage</option>
-	      <option value="HomopolymerRun">HomopolymerRun</option>
-	      <option value="MappingQualityRankSumTest">MappingQualityRankSumTest</option>
-	      <option value="MappingQualityZero">MappingQualityZero</option>
-	      <option value="QualByDepth">QualByDepth</option>
-	      <option value="RMSMappingQuality">RMSMappingQuality</option>
-	      <option value="SpanningDeletions">SpanningDeletions</option>
-	      <option value="HaplotypeScore">HaplotypeScore</option>
-	    </param>
-	    <param name="group" type="select" multiple="True" display="checkboxes" label="Annotation Interfaces/Groups">
-	      <option value="Standard">Standard</option>
-	      <option value="Experimental">Experimental</option>
-	      <option value="WorkInProgress">WorkInProgress</option>
-	      <!-- <option value="none">none</option> -->
-	    </param>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (VCF)" />
-    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
-  </outputs>
-  <tests>
-      <test>
-          <param name="reference_source_selector" value="history" />
-          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
-          <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
-          <param name="input_dbsnp_rod"  />
-          <param name="rod_bind_type_selector" value="snps" />
-          <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
-          <param name="rodToIntervalTrackName" />
-          <param name="standard_min_confidence_threshold_for_calling" value="4" />
-          <param name="standard_min_confidence_threshold_for_emitting" value="4" />
-          <param name="gatk_param_type_selector" value="basic" />
-          <param name="analysis_param_type_selector" value="advanced" />
-          <param name="genotype_likelihoods_model" value="BOTH" />
-          <param name="p_nonref_model" value="EXACT" />
-          <param name="heterozygosity" value="0.001" />
-          <param name="pcr_error_rate" value="0.0001" />
-          <param name="genotyping_mode" value="DISCOVERY" />
-          <param name="output_mode" value="EMIT_ALL_CONFIDENT_SITES" />
-          <param name="noSLOD" />
-          <param name="min_base_quality_score" value="17" />
-          <param name="min_mapping_quality_score" value="20" />
-          <param name="max_deletion_fraction" value="-1" />
-          <param name="min_indel_count_for_genotyping" value="2" />
-          <param name="indel_heterozygosity" value="0.000125" />
-          <param name="indelGapContinuationPenalty" value="10" />
-          <param name="indelGapOpenPenalty" value="3" />
-          <param name="indelHaplotypeSize" value="80" />
-          <param name="doContextDependentGapPenalties" />
-          <!-- <param name="annotation" value="" />
-          <param name="group" value="" /> -->
-          <output name="output_interval" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf" lines_diff="2"/>
-          <output name="output_log" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.log.contains" compare="contains"/>
-      </test>
-  </tests>
-  <help>
-**What it does**
-
-     A variant caller which unifies the approaches of several disparate callers.  Works for single-sample and
-     multi-sample data.  The user can choose from several different incorporated calculation models.
-
-------
-
-Please cite the website "http://addlink.here" as well as:
-
-Add citation here 2011.
-
-------
-
-**Input formats**
-
-GenomeAnalysisTK: UnifiedGenotyper accepts an aligned BAM input file.
-
-------
-
-**Outputs**
-
-The output is in VCF format, see http://addlink.here for more details.
-
--------
-
-**Settings**::
-
- genotype_likelihoods_model                           Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH)
- p_nonref_model                                                  Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available. (EXACT|GRID_SEARCH)
- heterozygosity                                                  Heterozygosity value used to compute prior likelihoods for any locus
- pcr_error_rate                                             The PCR error rate to be used for computing fragment-based likelihoods
- genotyping_mode                                             Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)
- output_mode                                                    Should we output confident genotypes (i.e. including ref calls) or just the variants? (EMIT_VARIANTS_ONLY|EMIT_ALL_CONFIDENT_SITES|EMIT_ALL_SITES)
- standard_min_confidence_threshold_for_calling                         The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called
- standard_min_confidence_threshold_for_emitting                        The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)
- noSLOD                                                                            If provided, we will not calculate the SLOD
- min_base_quality_score                                   Minimum base quality required to consider a base for calling
- min_mapping_quality_score                             Minimum read mapping quality required to consider a read for calling
- max_deletion_fraction                               Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to &lt; 0 or &gt; 1; default:0.05]
- min_indel_count_for_genotyping           Minimum number of consensus indels required to trigger genotyping run
- indel_heterozygosity                       Heterozygosity for indel calling
- indelGapContinuationPenalty                    Indel gap continuation penalty
- indelGapOpenPenalty                                    Indel gap open penalty
- indelHaplotypeSize                                    Indel haplotype size
- doContextDependentGapPenalties                                                  Vary gap penalties by context
- indel_recal_file                                         Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE
- indelDebug                                                                 Output indel debug info
- out                                                                           File to which variants should be written
- annotation                                                             One or more specific annotations to apply to variant calls
- group                                                                       One or more classes/groups of annotations to apply to variant calls
-
-  </help>
-</tool>
--- a/tools/genetrack/genetrack_indexer.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Wraps genetrack.scripts.tabs2genetrack so the tool can be executed from Galaxy.
-
-usage: %prog input output shift
-"""
-
-import sys, shutil, os
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "GeneTrack" )
-
-from genetrack.scripts import tabs2genetrack
-from genetrack import logger
-
-if __name__ == "__main__":
-    import os
-    os.environ[ 'LC_ALL' ] = 'C'
-    #os.system( 'export' )
-
-    parser = tabs2genetrack.option_parser()
-
-    options, args = parser.parse_args()
-
-    # uppercase the format
-    options.format = options.format.upper()
-
-    if options.format not in ('BED', 'GFF'):
-        sys.stdout = sys.stderr
-        parser.print_help()
-        sys.exit(-1)
-
-    logger.disable(options.verbosity)
-
-    # missing file names
-    if not (options.inpname and options.outname and options.format):
-        parser.print_help()
-        sys.exit(-1)
-    else:
-        tabs2genetrack.transform(inpname=options.inpname, outname=options.outname,\
-            format=options.format, shift=options.shift, index=options.index, options=options)
--- a/tools/genetrack/genetrack_indexer.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-<tool id="bed2genetrack" name="GeneTrack indexer" version="1.0.1">
-
-  <description>on a BED file</description>
-
-  <command interpreter="python">
-    genetrack_indexer.py -i $input -o $output -s $shift -v 0 -f BED -x
-  </command>
-
-  <inputs>
-
-    <param format="bed6" name="input" type="data" help="Input data">
-      <label>Select input bed file</label>
-    </param>
-
-    <param name="shift" size="4" type="integer" value="0" help="distance in basepairs">
-        <label>Shift at 5' end</label>
-    </param>
-
-    <!-- this parameter is currently not used, may not be feasible to use it
-    <param name="coverage" type="select" label="Full coverage">
-      <option value="no">NO</option>
-      <option value="yes">YES</option>
-    </param>
-    -->
-
-  </inputs>
-
-  <outputs>
-    <data format="genetrack" name="output" />
-  </outputs>
-
-<help>
-**Help**
-
-This tool will create a visualization of the bed file that is selected.
-
-**Parameters**
-
-- **Shift at 5' end** should be used when the location of interest is at a fixed distance from
-  the 5' end for **all sequenced fragments**!
-
-  For example if the sequenced sample consists
-  mono-nucleosomal DNA (146bp) we should expect that
-  each nucleosome midpoint is located at 73 bp from the 5' end of the fragment.
-  Therefore we would enter 73 as the shift parameter. Once corrected the reads
-  on each strand will coincide and indicate the actual midpoints
-  of the nucleosomes.
-
-  When shifting the averaging process in GeneTrack is able correct for longer or shorter
-  than expected fragment sizes as long as the errors are reasonably random.
-
-</help>
-
-</tool>
--- a/tools/genetrack/genetrack_peak_prediction.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Wraps genetrack.scripts.peakpred so the tool can be executed from Galaxy.
-
-usage: %prog input output level sigma mode exclusion strand
-"""
-
-import sys
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "GeneTrack" )
-
-from genetrack.scripts import peakpred
-from genetrack import logger
-
-if __name__ == "__main__":
-
-    parser = peakpred.option_parser()
-
-    options, args = parser.parse_args()
-
-    logger.disable(options.verbosity)
-
-    from genetrack import conf
-
-    # trigger test mode
-    if options.test:
-        options.inpname = conf.testdata('test-hdflib-input.gtrack')
-        options.outname = conf.testdata('predictions.bed')
-
-    # missing input file name
-    if not options.inpname and not options.outname:
-        parser.print_help()
-    else:
-        print 'Sigma = %s' % options.sigma
-        print 'Minimum peak = %s' % options.level
-        print 'Peak-to-peak = %s' % options.exclude
-
-        peakpred.predict(options.inpname, options.outname, options)
--- a/tools/genetrack/genetrack_peak_prediction.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-<tool id="predict2genetrack" name="Peak predictor">
-
-  <description>on GeneTrack index</description>
-
-  <command interpreter="python">
-      genetrack_peak_prediction.py -i $input -o $output --level=$level --sigma=$sigma --mode=$mode --exclusion=$exclusion --strand=$strand -v 0 -x
-  </command>
-
-  <inputs>
-
-    <param format="genetrack" name="input" type="data" help="Input data" label="Select input data"/>
-
-    <param name="method" type="select" label="Smoothing method" help="The function used to average nearby read values">
-      <option value="gauss">Gaussian kernel</option>
-      <!-- <option value="yes">Moving averages</option> -->
-    </param>
-
-    <param name="sigma" size="4" type="integer" value="10" label="Smoothing factor" help="The interval over which each read is averaged" />
-
-
-    <param name="mode" type="select" label="Peak prediction" help="Peak prediction method">
-      <option value="nolap">Maximal non-overlapping</option>
-      <!-- <option value="above">Above a threshold</option> -->
-      <option value="all">All peaks</option>
-    </param>
-
-    <param name="exclusion" type="integer" size="4" value="0" help="The minimal distance between peaks"  label="Peak-to-peak distance">
-    </param>
-
-    <param name="level" size="4" type="float" value="1" label="Threshold" help="Return only peaks above this value" />
-
-    <param name="strand" type="select" label="Strands" help="Combine strand data or predict on each strand separately">
-      <option value="all">Merge strands</option>
-      <!-- <option value="yes1">Above a threshold</option> -->
-      <option value="two">Separate strands</option>
-    </param>
-
-  </inputs>
-
-  <outputs>
-    <data format="bed" name="output" />
-  </outputs>
-
-<help>
-**Help**
-
-This tool will generate genome wide peak prediction from an index file.
-
-**Parameters**
-
-- **Smoothing method** the function used to average nearby values
-
-- **Smoothing value** the factor used in the method
-
-- **Prediction method** the function used to average nearby values
-
-</help>
-
-</tool>
--- a/tools/genome_diversity/cdblib.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,230 +0,0 @@
-#!/usr/bin/env python2.5
-
-'''
-Manipulate DJB's Constant Databases. These are 2 level disk-based hash tables
-that efficiently handle many keys, while remaining space-efficient.
-
-    http://cr.yp.to/cdb.html
-
-When generated databases are only used with Python code, consider using hash()
-rather than djb_hash() for a tidy speedup.
-'''
-
-from _struct import Struct
-from itertools import chain
-
-
-def py_djb_hash(s):
-    '''Return the value of DJB's hash function for the given 8-bit string.'''
-    h = 5381
-    for c in s:
-        h = (((h << 5) + h) ^ ord(c)) & 0xffffffff
-    return h
-
-try:
-    from _cdblib import djb_hash
-except ImportError:
-    djb_hash = py_djb_hash
-
-read_2_le4 = Struct('<LL').unpack
-write_2_le4 = Struct('<LL').pack
-
-
-class Reader(object):
-    '''A dictionary-like object for reading a Constant Database accessed
-    through a string or string-like sequence, such as mmap.mmap().'''
-
-    def __init__(self, data, hashfn=djb_hash):
-        '''Create an instance reading from a sequence and using hashfn to hash
-        keys.'''
-        if len(data) < 2048:
-            raise IOError('CDB too small')
-
-        self.data = data
-        self.hashfn = hashfn
-
-        self.index = [read_2_le4(data[i:i+8]) for i in xrange(0, 2048, 8)]
-        self.table_start = min(p[0] for p in self.index)
-        # Assume load load factor is 0.5 like official CDB.
-        self.length = sum(p[1] >> 1 for p in self.index)
-
-    def iteritems(self):
-        '''Like dict.iteritems(). Items are returned in insertion order.'''
-        pos = 2048
-        while pos < self.table_start:
-            klen, dlen = read_2_le4(self.data[pos:pos+8])
-            pos += 8
-
-            key = self.data[pos:pos+klen]
-            pos += klen
-
-            data = self.data[pos:pos+dlen]
-            pos += dlen
-
-            yield key, data
-
-    def items(self):
-        '''Like dict.items().'''
-        return list(self.iteritems())
-
-    def iterkeys(self):
-        '''Like dict.iterkeys().'''
-        return (p[0] for p in self.iteritems())
-    __iter__ = iterkeys
-
-    def itervalues(self):
-        '''Like dict.itervalues().'''
-        return (p[1] for p in self.iteritems())
-
-    def keys(self):
-        '''Like dict.keys().'''
-        return [p[0] for p in self.iteritems()]
-
-    def values(self):
-        '''Like dict.values().'''
-        return [p[1] for p in self.iteritems()]
-
-    def __getitem__(self, key):
-        '''Like dict.__getitem__().'''
-        value = self.get(key)
-        if value is None:
-            raise KeyError(key)
-        return value
-
-    def has_key(self, key):
-        '''Return True if key exists in the database.'''
-        return self.get(key) is not None
-    __contains__ = has_key
-
-    def __len__(self):
-        '''Return the number of records in the database.'''
-        return self.length
-
-    def gets(self, key):
-        '''Yield values for key in insertion order.'''
-        # Truncate to 32 bits and remove sign.
-        h = self.hashfn(key) & 0xffffffff
-        start, nslots = self.index[h & 0xff]
-
-        if nslots:
-            end = start + (nslots << 3)
-            slot_off = start + (((h >> 8) % nslots) << 3)
-
-            for pos in chain(xrange(slot_off, end, 8),
-                             xrange(start, slot_off, 8)):
-                rec_h, rec_pos = read_2_le4(self.data[pos:pos+8])
-
-                if not rec_h:
-                    break
-                elif rec_h == h:
-                    klen, dlen = read_2_le4(self.data[rec_pos:rec_pos+8])
-                    rec_pos += 8
-
-                    if self.data[rec_pos:rec_pos+klen] == key:
-                        rec_pos += klen
-                        yield self.data[rec_pos:rec_pos+dlen]
-
-    def get(self, key, default=None):
-        '''Get the first value for key, returning default if missing.'''
-        # Avoid exception catch when handling default case; much faster.
-        return chain(self.gets(key), (default,)).next()
-
-    def getint(self, key, default=None, base=0):
-        '''Get the first value for key converted it to an int, returning
-        default if missing.'''
-        value = self.get(key, default)
-        if value is not default:
-            return int(value, base)
-        return value
-
-    def getints(self, key, base=0):
-        '''Yield values for key in insertion order after converting to int.'''
-        return (int(v, base) for v in self.gets(key))
-
-    def getstring(self, key, default=None, encoding='utf-8'):
-        '''Get the first value for key decoded as unicode, returning default if
-        not found.'''
-        value = self.get(key, default)
-        if value is not default:
-            return value.decode(encoding)
-        return value
-
-    def getstrings(self, key, encoding='utf-8'):
-        '''Yield values for key in insertion order after decoding as
-        unicode.'''
-        return (v.decode(encoding) for v in self.gets(key))
-
-
-class Writer(object):
-    '''Object for building new Constant Databases, and writing them to a
-    seekable file-like object.'''
-
-    def __init__(self, fp, hashfn=djb_hash):
-        '''Create an instance writing to a file-like object, using hashfn to
-        hash keys.'''
-        self.fp = fp
-        self.hashfn = hashfn
-
-        fp.write('\x00' * 2048)
-        self._unordered = [[] for i in xrange(256)]
-
-    def put(self, key, value=''):
-        '''Write a string key/value pair to the output file.'''
-        assert type(key) is str and type(value) is str
-
-        pos = self.fp.tell()
-        self.fp.write(write_2_le4(len(key), len(value)))
-        self.fp.write(key)
-        self.fp.write(value)
-
-        h = self.hashfn(key) & 0xffffffff
-        self._unordered[h & 0xff].append((h, pos))
-
-    def puts(self, key, values):
-        '''Write more than one value for the same key to the output file.
-        Equivalent to calling put() in a loop.'''
-        for value in values:
-            self.put(key, value)
-
-    def putint(self, key, value):
-        '''Write an integer as a base-10 string associated with the given key
-        to the output file.'''
-        self.put(key, str(value))
-
-    def putints(self, key, values):
-        '''Write zero or more integers for the same key to the output file.
-        Equivalent to calling putint() in a loop.'''
-        self.puts(key, (str(value) for value in values))
-
-    def putstring(self, key, value, encoding='utf-8'):
-        '''Write a unicode string associated with the given key to the output
-        file after encoding it as UTF-8 or the given encoding.'''
-        self.put(key, unicode.encode(value, encoding))
-
-    def putstrings(self, key, values, encoding='utf-8'):
-        '''Write zero or more unicode strings to the output file. Equivalent to
-        calling putstring() in a loop.'''
-        self.puts(key, (unicode.encode(value, encoding) for value in values))
-
-    def finalize(self):
-        '''Write the final hash tables to the output file, and write out its
-        index. The output file remains open upon return.'''
-        index = []
-        for tbl in self._unordered:
-            length = len(tbl) << 1
-            ordered = [(0, 0)] * length
-            for pair in tbl:
-                where = (pair[0] >> 8) % length
-                for i in chain(xrange(where, length), xrange(0, where)):
-                    if not ordered[i][0]:
-                        ordered[i] = pair
-                        break
-
-            index.append((self.fp.tell(), length))
-            for pair in ordered:
-                self.fp.write(write_2_le4(*pair))
-
-        self.fp.seek(0)
-        for pair in index:
-            self.fp.write(write_2_le4(*pair))
-        self.fp = None # prevent double finalize()
--- a/tools/genome_diversity/extract_flanking_dna.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#!/usr/bin/env python2.5
-
-import os
-import sys
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function( parse_arguments=None ):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: ( None, arguments )
-    def main_decorator( to_decorate ):
-        def decorated_main( arguments=None ):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments( arguments )
-            rc = 1
-            try:
-                rc = to_decorate( options, arguments )
-            except Exception, err:
-                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
-                traceback.print_exc()
-            finally:
-                sys.exit( rc )
-        return decorated_main
-    return main_decorator
-
-def parse_arguments( arguments ):
-    parser = OptionParser()
-    parser.add_option('--input',
-                        type='string', dest='input',
-                        help='file of selected SNPs')
-    parser.add_option('--output',
-                        type='string', dest='output',
-                        help='output file')
-    parser.add_option('--snps_loc',
-                        type='string', dest='snps_loc',
-                        help='snps .loc file')
-    parser.add_option('--scaffold_col',
-                        type="int", dest='scaffold_col',
-                        help='scaffold column in the input file')
-    parser.add_option('--pos_col',
-                        type="int", dest='pos_col',
-                        help='position column in the input file')
-    parser.add_option('--output_format',
-                        type="string", dest='output_format',
-                        help='output format, fasta or primer3')
-    parser.add_option('--species',
-                        type="string", dest='species',
-                        help='species')
-    return parser.parse_args( arguments[1:] )
-
-
-@main_function( parse_arguments )
-def main( options, arguments ):
-    if not options.input:
-        raise RuntimeError( 'missing --input option' )
-    if not options.output:
-        raise RuntimeError( 'missing --output option' )
-    if not options.snps_loc:
-        raise RuntimeError( 'missing --snps_loc option' )
-    if not options.scaffold_col:
-        raise RuntimeError( 'missing --scaffold_col option' )
-    if not options.pos_col:
-        raise RuntimeError( 'missing --pos_col option' )
-    if not options.output_format:
-        raise RuntimeError( 'missing --output_format option' )
-    if not options.species:
-        raise RuntimeError( 'missing --species option' )
-
-    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
-
-    out_fh = gd._openfile( options.output, 'w' )
-
-    snpcalls_file = gd.get_filename_from_loc( options.species, options.snps_loc )
-    file_root, file_ext = os.path.splitext( snpcalls_file )
-    snpcalls_index_file = file_root + ".cdb"
-    snpcalls = gd.SnpcallsFile( data_file=snpcalls_file, index_file=snpcalls_index_file )
-
-    while snps.next():
-        seq, pos = snps.get_seq_pos()
-        flanking_dna = snpcalls.get_flanking_dna( sequence=seq, position=pos, format=options.output_format )
-        if flanking_dna:
-            out_fh.write( flanking_dna )
-
-    out_fh.close()
-
-if __name__ == "__main__":
-    main()
-
--- a/tools/genome_diversity/extract_flanking_dna.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-<tool id="gd_extract_flanking_dna" name="Extract" version="1.0.0">
-  <description>DNA flanking chosen SNPs</description>
-
-  <command interpreter="python2.5">
-    extract_flanking_dna.py "--input=$input" "--output=$output" "--snps_loc=${GALAXY_DATA_INDEX_DIR}/gd.snps.loc"
-    #if $override_metadata.choice == "0":
-      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
-    #else
-      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
-    #end if
-    "--output_format=$output_format"
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <param name="output_format" type="select" format="integer" label="output format">
-        <option value="fasta" selected="true">FastA format</option>
-        <option value="primer3">Primer3 input</option>
-    </param>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
-        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="txt" name="output"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="output_format" value="primer3"/>
-      <param name="choice" value="0"/>
-      <output name="output" file="gd.extract_flanking_dna.txt"/>
-    </test>
-  </tests>
-
-  <help>
-**What it does**
-
-  It reports a DNA segment containing each SNP, with up to 200 nucleotides on
-  either side of the SNP position, which is indicated by "n". Fewer nucleotides
-  are reported if the SNP is near an end of the assembled genome fragment.
-
------
-
-**Example**
-
-- input file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-
-- output file::
-
-    > chr2_75111355_75112576 314 A C
-    TATCTTCATTTTTATTATAGACTCTCTGAACCAATTTGCCCTGAGGCAGACTTTTTAAAGTACTGTGTAATGTATGAAGTCCTTCTGCTCAAGCAAATCATTGGCATGAAAACAGTTGCAAACTTATTGTGAGAGAAGAGTCCAAGAGTTTTAACAGTCTGTAAGTATATAGCCTGTGAGTTTGATTTCCTTCTTGTTTTTnTTCCAGAAACATGATCAGGGGCAAGTTCTATTGGATATAGTCTTCAAGCATCTTGATTTGACTGAGCGTGACTATTTTGGTTTGCAGTTGACTGACGATTCCACTGATAACCCAGTAAGTTTAAGCTGTTGTCTTTCATTGTCATTGCAATTTTTCTGTCTTTATACTAGGTCCTTTCTGATTTACATTGTTCACTGATT
-    > chr8_93901796_93905612 2471 A C
-    GCTGCCGCTGGATTTACTTCTGCTTGGGTCGAGAGCGGGCTGGATGGGTGAAGAGTGGGCTCCCCGGCCCCTGACCAGGCAGGTGCAGACAAGTCGGAAGAAGGCCCGCCGCATCTCCTTGCTGGCCAGCGTGTAGATGACGGGGTTCATGGCAGAGTTGAGCACGGCCAGCACGATGAACCACTGGGCCTTGAACAGGATnGCGCACTCCTTCACCTTGCAGGCCACATCCACAAGGAAAAGGATGAAGAGTGGGGACCAGCAGGCGATGAACACGCTCACCACGATCACCACGGTCCGCAGCAGGGCCATGGACCGCTCTGAGTTGTGCGGGCTGGCCACCCTGCGGCTGCTGGACTTCACCAGGAAGTAGATGCGTGCGTACAGGATCACGATGGTCAC
-    > chr10_7434473_7435447 524 T C
-    ATTATTAACAGAAACATTTCTTTTTCATTACCCAGGGGTTACACTGGTCGTTGATGTTAATCAGTTTTTGGAGAAGGAGAAGCAAAGTGATATTTTGTCTGTTCTGAAGCCTGCCGTTGGTAATACAAATGACGTAATCCCTGAATGTGCTGACAGGTACCATGACGCCCTGGCAAAAGCAAAAGAGCAAAAATCTAGAAGnGGTAAGCATCTTCACTGTTTAGCACAAATTAAATAGCACTTTGAATATGATGATTTCTGTGGTATTGTGTTATCTTACTTTTGAGACAAATAATCGCTTTCAAATGAATATTTCTGAATGTTTGTCATCTCTGGCAAGGAAATTTTTTAGTGTTTCTTTTCCTTTTTTGTCTTTTGGAAATCTGTGATTAACTTGGTGGC
-    > chr14_80021455_80022064 138 G A
-    ACCCAGGGATCAAACCCAGGTCTCCCGCATTGCAGGCGGATTCTTTACTGTCTGAGCCTCCAGGGAAGCCCTCGGGGCTGAAGGGATGGTTATGAAGGTGAGAAACAGGGGCCACCTGTCCCCAAGGTACCTTGCGACnTGCCATCTGCGCTCCACCAGTAAATGGACGTCTTCGATCCTTCTGTTGTTGGCGTAGTGCAAACGTTTGGGAAGGTGCTGTTTCAAGTAAGGCTTAAAGTGCTGGTCTGGTTTTTTACACTGAAATATAAATGGACATTGGATTTTGCAATGGAGAGTCTTCTAGAAGAGTCCAAGACATTCTCTCCAGAAAGCTGAAGG
-    > chr15_64470252_64471048 89 G A
-    TGTGTGTGTGTGTGTGTGTGTGTGCCTGTGTCTGTACATGCACACCACGTGGCCTCACCCAGTGCCCTCAGCTCCATGGTGATGTCCACnTAGCCGTGCTCCGCGCTGTAGTACATGGCCTCCTGGAGGGCCTTGGTGCGCGTCCGGCTCAGGCGCATGGGCCCCTCGCTGCCGCTGCCCTGGCTGGATGCATCGCTCTCTTCCACGCCCTCAGCCAGGATCTCCTCCAGGGACAGCACATCTGCTTTGGCCTGCTGTGGCTGAGTCAGGAGCTTCCTCAGGACGTTCCT
-    etc.
-  </help>
-</tool>
--- a/tools/genome_diversity/extract_primers.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-#!/usr/bin/env python2.5
-
-import os
-import sys
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function( parse_arguments=None ):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: ( None, arguments )
-    def main_decorator( to_decorate ):
-        def decorated_main( arguments=None ):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments( arguments )
-            rc = 1
-            try:
-                rc = to_decorate( options, arguments )
-            except Exception, err:
-                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
-                traceback.print_exc()
-            finally:
-                sys.exit( rc )
-        return decorated_main
-    return main_decorator
-
-def parse_arguments( arguments ):
-    parser = OptionParser()
-    parser.add_option('--input',
-                        type='string', dest='input',
-                        help='file of selected SNPs')
-    parser.add_option('--output',
-                        type='string', dest='output',
-                        help='output file')
-    parser.add_option('--primers_loc',
-                        type='string', dest='primers_loc',
-                        help='primers .loc file')
-    parser.add_option('--scaffold_col',
-                        type="int", dest='scaffold_col',
-                        help='scaffold column in the input file')
-    parser.add_option('--pos_col',
-                        type="int", dest='pos_col',
-                        help='position column in the input file')
-    parser.add_option('--species',
-                        type="string", dest='species',
-                        help='species')
-    return parser.parse_args( arguments[1:] )
-
-
-@main_function( parse_arguments )
-def main( options, arguments ):
-    if not options.input:
-        raise RuntimeError( 'missing --input option' )
-    if not options.output:
-        raise RuntimeError( 'missing --output option' )
-    if not options.primers_loc:
-        raise RuntimeError( 'missing --primers_loc option' )
-    if not options.scaffold_col:
-        raise RuntimeError( 'missing --scaffold_col option' )
-    if not options.pos_col:
-        raise RuntimeError( 'missing --pos_col option' )
-    if not options.species:
-        raise RuntimeError( 'missing --species option' )
-
-    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
-
-    out_fh = gd._openfile( options.output, 'w' )
-
-    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
-    file_root, file_ext = os.path.splitext( primer_data_file )
-    primer_index_file = file_root + ".cdb"
-    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
-
-    while snps.next():
-        seq, pos = snps.get_seq_pos()
-        primer = primers.get_entry( seq, pos )
-        if primer:
-            out_fh.write( primer )
-
-    out_fh.close()
-
-if __name__ == "__main__":
-    main()
-
--- a/tools/genome_diversity/extract_primers.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-<tool id="gd_extract_primers" name="Extract primers" version="1.0.0">
-  <description>for selected SNPs</description>
-
-  <command interpreter="python2.5">
-    extract_primers.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
-    #if $override_metadata.choice == "0":
-      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
-    #else
-      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
-    #end if
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
-        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="txt" name="output"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="choice" value="0"/>
-      <output name="output" file="gd.extract_primers.txt"/>
-    </test>
-  </tests>
-
-
-  <help>
-**What it does**
-
-  This tool extracts primers for SNPs in the dataset using the Primer3 program.
-  The first line of output for a given SNP reports the name of the assembled
-  contig, the SNP's position in the contig, the two variant nucleotides, and
-  Primer3's "pair penalty".  The next line, if not blank, names restriction
-  enzymes (from the user-adjustable list) that differentially cut at that
-  site, but do not cut at any other position between and including the
-  primer positions.  The next lines show the SNP's flanking regions, with
-  the SNP position indicated by "n", including the primer positions and an
-  additional 3 nucleotides.
-
------
-
-**Example**
-
-- input file::
-
-    chr5_30800874_30802049    734   G  A  chr5   30801606   A  24  0  99   4  11  97   Y  496  0.502  0.033  0.215  6
-    chr8_55117827_55119487    994   A  G  chr8   55118815   G  25  0  102  4  11  96   Y  22   0.502  0.025  2.365  1
-    chr9_100484836_100485311  355   C  T  chr9   100485200  T  27  0  108  6  17  100  Y  190  0.512  0.880  2.733  4
-    chr12_3635530_3637738     2101  T  C  chr12  3637630    T  25  0  102  4  13  93   Y  169  0.554  0.024  0.366  4
-
-- output file::
-
-    chr5_30800874_30802049 734 G A 0.352964
-     BglII,MboI,Sau3AI,Tru9I,XhoII
-      1 CTGAAGGTGAGCAGGATTCAGGAGACAGAAAACAAAGCCCAGGCCTGCCCAAGGTGGAAA
-           >>>>>>>>>>>>>>>>>>>>
-
-     61 AGTCTAACAACTCGCCCTCTGCTTAnATCTGAGACTCACAGGGATAATAACACACTTGGT
-
-
-     21 CAAGGAATAAACTAGATATTATTCACTCCTCTAGAAGGCTGCCAGGAAAATTGCCTGACT
-                                                             &lt;&lt;&lt;&lt;&lt;&lt;&lt;
-
-    181 TGAACCTTGGCTCTGA
-        &lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;
-    etc.
-  </help>
-</tool>
--- a/tools/genome_diversity/genome_diversity.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,266 +0,0 @@
-#!/usr/bin/env python2.5
-
-import sys
-import cdblib
-
-def _openfile( filename=None, mode='r' ):
-    try:
-        fh = open( filename, mode )
-    except IOError, err:
-        raise RuntimeError( "can't open file: %s\n" % str( err ) )
-    return fh
-
-def get_filename_from_loc( species=None, filename=None ):
-    fh = _openfile( filename )
-    for line in fh:
-        if line and not line.startswith( '#' ):
-            line = line.rstrip( '\r\n' )
-            if line:
-                elems = line.split( '\t' )
-                if len( elems ) >= 2 and elems[0] == species:
-                    return elems[1]
-    return None
-
-
-class SnpFile( object ):
-    def __init__( self, filename=None, seq_col=1, pos_col=2, ref_seq_col=7, ref_pos_col=8 ):
-        self.filename = filename
-        self.fh = _openfile( filename )
-        self.seq_col = seq_col
-        self.pos_col = pos_col
-        self.ref_seq_col = ref_seq_col
-        self.ref_pos_col = ref_pos_col
-        self.elems = None
-        self.line = None
-        self.comments = []
-
-    def next( self ):
-        while self.fh:
-            try:
-                self.line = self.fh.next()
-            except StopIteration:
-                self.line = None
-                self.elems = None
-                return None
-            if self.line:
-                self.line = self.line.rstrip( '\r\n' )
-                if self.line:
-                    if self.line.startswith( '#' ):
-                        self.comments.append( self.line )
-                    else:
-                        self.elems = self.line.split( '\t' )
-                        return 1
-
-    def get_seq_pos( self ):
-        if self.elems:
-            return self.elems[ self.seq_col - 1 ], self.elems[ self.pos_col - 1 ]
-        else:
-            return None, None
-
-    def get_ref_seq_pos( self ):
-        if self.elems:
-            return self.elems[ self.ref_seq_seq - 1 ], self.elems[ self.ref_pos_col - 1 ]
-        else:
-            return None, None
-
-
-class IndexedFile( object ):
-
-    def __init__( self, data_file=None, index_file=None ):
-        self.data_file = data_file
-        self.index_file = index_file
-        self.data_fh = _openfile( data_file )
-        self.index_fh = _openfile( index_file )
-        self._reader = cdblib.Reader( self.index_fh.read(), hash )
-
-    def get_indexed_line( self, key=None ):
-        line = None
-        if key in self._reader:
-            offset = self._reader.getint( key )
-            self.data_fh.seek( offset )
-            try:
-                line = self.data_fh.next()
-            except StopIteration:
-                raise RuntimeError( 'index file out of sync for %s' % key )
-        return line
-
-class PrimersFile( IndexedFile ):
-    def get_primer_header( self, sequence=None, position=None ):
-        key = "%s %s" % ( str( sequence ), str( position ) )
-        header = self.get_indexed_line( key )
-        if header:
-            if header.startswith( '>' ):
-                elems = header.split()
-                if len( elems ) < 3:
-                    raise RuntimeError( 'short primers header for %s' % key )
-                if sequence != elems[1] or str( position ) != elems[2]:
-                    raise RuntimeError( 'primers index for %s finds %s %s' % ( key, elems[1], elems[2] ) )
-            else:
-                raise RuntimeError( 'primers index out of sync for %s' % key )
-        return header
-
-    def get_entry( self, sequence=None, position=None ):
-        entry = self.get_primer_header( sequence, position )
-        if entry:
-            while self.data_fh:
-                try:
-                    line = self.data_fh.next()
-                except StopIteration:
-                    break
-                if line.startswith( '>' ):
-                    break
-                entry += line
-        return entry
-
-    def get_enzymes( self, sequence=None, position=None ):
-        entry = self.get_primer_header( sequence, position )
-        enzyme_list = []
-        if entry:
-            try:
-                line = self.data_fh.next()
-            except StopIteration:
-                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
-            if line.startswith( '>' ):
-                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
-            line.rstrip( '\r\n' )
-            if line:
-                enzymes = line.split( ',' )
-                for enzyme in enzymes:
-                    enzyme = enzyme.strip()
-                    if enzyme:
-                        enzyme_list.append( enzyme )
-        return enzyme_list
-
-class SnpcallsFile( IndexedFile ):
-    def get_snp_seq( self, sequence=None, position=None ):
-        key = "%s %s" % ( str( sequence ), str( position ) )
-        line = self.get_indexed_line( key )
-        if line:
-            elems = line.split( '\t' )
-            if len (elems) < 3:
-                raise RuntimeError( 'short snpcalls line for %s' % key )
-            if sequence != elems[0] or str( position ) != elems[1]:
-                raise RuntimeError( 'snpcalls index for %s finds %s %s' % ( key, elems[0], elems[1] ) )
-            return elems[2]
-        else:
-            return None
-
-    def get_flanking_dna( self, sequence=None, position=None, format='fasta' ):
-        if format != 'fasta' and format != 'primer3':
-            raise RuntimeError( 'invalid format for flanking dna: %s' % str( format ) )
-        seq = self.get_snp_seq( sequence, position )
-        if seq:
-            p = seq.find('[')
-            if p == -1:
-                raise RuntimeError( 'snpcalls entry for %s %s missing left bracket: %s' % ( str( sequence ), str( position ), seq ) )
-            q = seq.find(']', p + 1)
-            if q == -1:
-                raise RuntimeError( 'snpcalls entry for %s %s missing right bracket: %s' % ( str( sequence ), str( position ), seq ) )
-            q += 1
-
-            if format == 'fasta':
-                flanking_seq = '> '
-            else:
-                flanking_seq = 'SEQUENCE_ID='
-
-            flanking_seq += "%s %s %s %s\n" % ( str( sequence ), str( position ), seq[p+1], seq[p+3] )
-
-            if format == 'primer3':
-                flanking_seq += 'SEQUENCE_TEMPLATE='
-
-            flanking_seq += "%sn%s\n" % ( seq[0:p], seq[q:] )
-
-            if format == 'primer3':
-                flanking_seq += "SEQUENCE_TARGET=%d,11\n=\n" % ( p - 5 )
-
-            return flanking_seq
-        else:
-            return None
-
-
-
-class LocationFile( object ):
-    def __init__(self, filename):
-        self.build_map(filename)
-
-    def build_map(self, filename):
-        self.map = {}
-        self.open_file(filename)
-        for line in self.read_lines():
-            elems = line.split('\t', 1)
-            if len(elems) == 2:
-                self.map[ elems[0].strip() ] = elems[1].strip()
-        self.close_file()
-
-    def read_lines(self):
-        for line in self.fh:
-            if not line.startswith('#'):
-                line = line.rstrip('\r\n')
-                yield line
-
-    def open_file(self, filename):
-        self.filename = filename
-        try:
-            self.fh = open(filename, 'r')
-        except IOError, err:
-            print >> sys.stderr, "Error opening location file '%s': %s" % (filename, str(err))
-            sys.exit(1)
-
-    def close_file(self):
-        self.fh.close()
-
-    def loc_file( self, key ):
-        if key in self.map:
-            return self.map[key]
-        else:
-            print >> sys.stderr, "'%s' does not appear in location file '%s'" % (key, self.filename)
-            sys.exit(1)
-
-class ChrLens( object ):
-    def __init__( self, location_file, species ):
-        self.chrlen_loc = LocationFile( location_file )
-        self.chrlen_filename = self.chrlen_loc.loc_file( species )
-        self.build_map()
-
-    def build_map(self):
-        self.map = {}
-        self.open_file(self.chrlen_filename)
-        for line in self.read_lines():
-            elems = line.split('\t', 1)
-            if len(elems) == 2:
-                chrom = elems[0].strip()
-                chrom_len_text = elems[1].strip()
-                try:
-                    chrom_len = int( chrom_len_text )
-                except ValueError:
-                    print >> sys.stderr, "Bad length '%s' for chromosome '%s' in '%s'" % (chrom_len_text, chrom, self.chrlen_filename)
-                self.map[ chrom ] = chrom_len
-        self.close_file()
-
-    def read_lines(self):
-        for line in self.fh:
-            if not line.startswith('#'):
-                line = line.rstrip('\r\n')
-                yield line
-
-    def open_file(self, filename):
-        self.filename = filename
-        try:
-            self.fh = open(filename, 'r')
-        except IOError, err:
-            print >> sys.stderr, "Error opening chromosome length file '%s': %s" % (filename, str(err))
-            sys.exit(1)
-
-    def close_file(self):
-        self.fh.close()
-
-    def length( self, key ):
-        if key in self.map:
-            return self.map[key]
-        else:
-            return None
-
-    def __iter__( self ):
-        for chrom in self.map:
-            yield chrom
-
--- a/tools/genome_diversity/select_restriction_enzymes.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-#!/usr/bin/env python2.5
-
-import os
-import sys
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function( parse_arguments=None ):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: ( None, arguments )
-    def main_decorator( to_decorate ):
-        def decorated_main( arguments=None ):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments( arguments )
-            rc = 1
-            try:
-                rc = to_decorate( options, arguments )
-            except Exception, err:
-                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
-                traceback.print_exc()
-            finally:
-                sys.exit( rc )
-        return decorated_main
-    return main_decorator
-
-def parse_arguments( arguments ):
-    parser = OptionParser()
-    parser.add_option('--input',
-                        type='string', dest='input',
-                        help='file of selected SNPs')
-    parser.add_option('--output',
-                        type='string', dest='output',
-                        help='output file')
-    parser.add_option('--primers_loc',
-                        type='string', dest='primers_loc',
-                        help='primers .loc file')
-    parser.add_option('--scaffold_col',
-                        type="int", dest='scaffold_col',
-                        help='scaffold column in the input file')
-    parser.add_option('--pos_col',
-                        type="int", dest='pos_col',
-                        help='position column in the input file')
-    parser.add_option('--enzyme_list',
-                        type="string", dest='enzyme_list_string',
-                        help='comma separated list of enzymes')
-    parser.add_option('--species',
-                        type="string", dest='species',
-                        help='species')
-    return parser.parse_args( arguments[1:] )
-
-
-@main_function( parse_arguments )
-def main( options, arguments ):
-    if not options.input:
-        raise RuntimeError( 'missing --input option' )
-    if not options.output:
-        raise RuntimeError( 'missing --output option' )
-    if not options.primers_loc:
-        raise RuntimeError( 'missing --primers_loc option' )
-    if not options.scaffold_col:
-        raise RuntimeError( 'missing --scaffold_col option' )
-    if not options.pos_col:
-        raise RuntimeError( 'missing --pos_col option' )
-    if not options.enzyme_list_string:
-        raise RuntimeError( 'missing --enzyme_list option' )
-    if not options.species:
-        raise RuntimeError( 'missing --species option' )
-
-    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
-
-    out_fh = gd._openfile( options.output, 'w' )
-
-    enzyme_dict = {}
-    for enzyme in options.enzyme_list_string.split( ',' ):
-        enzyme = enzyme.strip()
-        if enzyme:
-            enzyme_dict[enzyme] = 1
-
-    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
-    file_root, file_ext = os.path.splitext( primer_data_file )
-    primer_index_file = file_root + ".cdb"
-    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
-
-    comments_printed = False
-
-    while snps.next():
-        seq, pos = snps.get_seq_pos()
-        enzyme_list = primers.get_enzymes( seq, pos )
-        for enzyme in enzyme_list:
-            if enzyme in enzyme_dict:
-                if not comments_printed:
-                    for comment in snps.comments:
-                        out_fh.write( "%s\n" % comment )
-                    comments_printed = True
-                out_fh.write( "%s\n" % snps.line )
-                break
-
-    out_fh.close()
-
-if __name__ == "__main__":
-    main()
-
--- a/tools/genome_diversity/select_restriction_enzymes.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-<tool id="gd_select_restriction_enzymes" name="Specify" version="1.0.0">
-  <description>a set of restriction enzymes</description>
-
-  <command interpreter="python2.5">
-    select_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
-    #if $override_metadata.choice == "0":
-      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
-    #else
-      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
-    #end if
-    "--enzyme_list=$enzymes"
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
-        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-
-    <param name="enzymes" type="select" display="checkboxes" multiple="true" label="Choose enzymes">
-        <options from_file="gd.restriction_enzymes.txt">
-            <column name="name" index="0"/>
-            <column name="value" index="1"/>
-        </options>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="wsf" name="output" metadata_source="input"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="choice" value="0"/>
-      <param name="enzymes" value="BanI,BstOI,Hsp92II"/>
-      <output name="output" file="gd.select_restriction_enzymes.wsf"/>
-    </test>
-  </tests>
-
-  <help>
-**What it does**
-
-  It selects the SNPs that are differentially cut by at least one of the
-  specified restriction enzymes. The enzymes are required to cut the amplified
-  segment (for the specified PCR primers) only at the SNP.
-
------
-
-**Example**
-
-- input file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-
-- output file::
-
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-  </help>
-</tool>
--- a/tools/genome_diversity/select_snps.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import math
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function(parse_arguments=None):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: (None, arguments)
-    def main_decorator(to_decorate):
-        def decorated_main(arguments=None):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments(arguments)
-            sys.exit(to_decorate(options, arguments))
-        return decorated_main
-    return main_decorator
-
-def parse_arguments(arguments):
-    parser = OptionParser()
-    parser.add_option('--input', dest='input')
-    parser.add_option('--output', dest='output')
-    parser.add_option('--chrlens_loc', dest='chrlens_loc')
-    parser.add_option('--num_snps', dest='num_snps')
-    parser.add_option('--ref_chrom_col', dest='ref_chrom_col')
-    parser.add_option('--ref_pos_col', dest='ref_pos_col')
-    parser.add_option('--species', dest='species')
-    return parser.parse_args(arguments[1:])
-
-@main_function(parse_arguments)
-def main(options, arguments):
-
-    ref_chrom_idx = to_int( options.ref_chrom_col ) -1
-    ref_pos_idx = to_int( options.ref_pos_col ) -1
-
-    if (ref_chrom_idx < 1) or (ref_pos_idx < 1) or (ref_chrom_idx == ref_pos_idx):
-        print >> sys.stderr, "Cannot locate reference genome sequence (ref) or reference genome position (rPos) column for this dataset."
-        sys.exit(1)
-
-    chrlens = gd.ChrLens( options.chrlens_loc, options.species )
-
-    total_len = 0
-    for chrom in chrlens:
-        total_len += chrlens.length(chrom)
-
-    total_requested = int( options.num_snps )
-    lines, data, comments = get_snp_lines_data_and_comments( options.input, ref_chrom_idx, ref_pos_idx )
-    selected = select_snps( data, total_len, total_requested )
-    out_data = fix_selection_and_order_like_input(data, selected, total_requested)
-    write_selected_snps( options.output, out_data, lines, comments )
-
-def to_int( value ):
-    try:
-        int_value = int( value )
-    except ValueError:
-        int_value = 0
-    return int_value
-
-def get_snp_lines_data_and_comments( filename, chrom_idx, pos_idx ):
-    fh = open( filename, 'r' )
-    if (chrom_idx >= pos_idx):
-        needed = chrom_idx + 1
-    else:
-        needed = pos_idx + 1
-    lines = []
-    data = []
-    comments = []
-    line_idx = 0
-    line_num = 0
-    for line in fh:
-        line_num += 1
-        line = line.rstrip('\r\n')
-        if line:
-            if line.startswith('#'):
-                comments.append(line)
-            else:
-                elems = line.split('\t')
-                if len(elems) >= needed:
-                    chrom = elems[chrom_idx]
-                    try:
-                        pos = int(elems[pos_idx])
-                    except ValueError:
-                        sys.stderr.write( "bad reference position in line %d column %d: %s\n" % ( line_num, pos_idx+1, elems[pos_idx] ) )
-                        sys.exit(1)
-                    lines.append(line)
-                    chrom_sort = chrom.lstrip('chr')
-                    data.append( [chrom_sort, chrom, pos, line_num, line_idx] )
-                    line_idx += 1
-    fh.close()
-    data = sorted( data, key=lambda x: (x[0], x[2]) )
-    return lines, data, comments
-
-def select_snps( data, total_len, requested ):
-    old_chrom = None
-    next_print = 0
-    selected = []
-    space = total_len / requested
-    for data_idx, datum in enumerate( data ):
-        chrom = datum[1]
-        pos = datum[2]
-        if chrom != old_chrom:
-            old_chrom = chrom
-            next_print = 0
-        if pos >= next_print:
-            selected.append(data_idx)
-            next_print += space
-    return selected
-
-def fix_selection_and_order_like_input(data, selected, requested):
-    total_selected = len( selected )
-    a = float( total_selected ) / requested
-    b = a / 2
-
-    idx_list = []
-    for i in range( requested ):
-        idx = int( math.ceil( i * a + b ) - 1 )
-        idx_list.append( idx )
-
-    out_data = []
-
-    for i, data_idx in enumerate(selected):
-        if total_selected > requested:
-            if i in idx_list:
-                out_data.append(data[data_idx])
-        else:
-            out_data.append(data[data_idx])
-
-    out_data = sorted( out_data, key=lambda x: x[3] )
-
-    return out_data
-
-def write_selected_snps( filename, data, lines, comments ):
-    fh = open( filename, 'w' )
-
-    for comment in comments:
-        fh.write("%s\n" % comment )
-
-    for datum in data:
-        line_idx = datum[4]
-        fh.write("%s\n" % lines[line_idx])
-
-    fh.close()
-
-if __name__ == "__main__":
-    main()
-
-
--- a/tools/genome_diversity/select_snps.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="gd_select_snps" name="Select" version="1.0.0">
-  <description>a specified number of SNPs</description>
-
-  <command interpreter="python">
-    select_snps.py "--input=$input" "--output=$output" "--chrlens_loc=${GALAXY_DATA_INDEX_DIR}/gd.chrlens.loc" "--num_snps=$num_snps"
-    #if $override_metadata.choice == "0":
-      "--ref_chrom_col=${input.metadata.ref}" "--ref_pos_col=${input.metadata.rPos}" "--species=${input.metadata.species}"
-    #else
-      "--ref_chrom_col=$ref_col" "--ref_pos_col=$rpos_col" "--species=$species"
-    #end if
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <param name="num_snps" type="integer" value="10" optional="false" min="1" label="Number of SNPs"/>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome"/>
-        <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="wsf" name="output" metadata_source="input"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="num_snps" value="5"/>
-      <param name="choice" value="0"/>
-      <output name="output" file="gd.select_snps.wsf"/>
-    </test>
-  </tests>
-
-
-  <help>
-**What it does**
-
-  It attempts to select a specified number of SNPs from the dataset, making them
-  approximately uniformly spaced relative to the reference genome. The number
-  actually selected may be slightly more than the specified number.
-
------
-
-**Example**
-
-- input file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-
-- output file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    etc.
-  </help>
-</tool>
--- a/tools/human_genome_variation/BEAM2_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-#
-# Galaxy wrapper for Yu Zhang's BEAM2 adds two new options
-#  significance=foo    renames significance.txt to foo after BEAM2 is run
-#  posterior=bar       renames posterior.txt    to bar after BEAM2 is run
-#
-
-set -e
-
-export PATH=$PATH:$(dirname $0)
-
-## options
-significance=
-posterior=
-new_args=
-map=
-ped=
-
-TFILE="/tmp/BEAM2.$$.tmp"
-
-## separate significance and posterior arguments from arguments to BEAM2
-until [ $# -eq 0 ]
-do
-  case $1 in
-    significance=*)
-      significance=${1#significance=}
-      ;;
-    posterior=*)
-      posterior=${1#posterior=}
-      ;;
-    map=*)
-      map=${1#map=}
-      ;;
-    ped=*)
-      ped=${1#ped=}
-      ;;
-    *)
-      if [ -z "$new_args" ]; then
-        new_args=$1
-      else
-        new_args="$new_args $1"
-      fi
-      ;;
-  esac
-
-  shift
-done
-
-## convert input for use with BEAM2
-lped_to_geno.pl $map $ped > $TFILE
-if [ $? -ne 0 ]; then
-  echo "failed: lped_to_geno.pl $map $ped > $TFILE"
-  exit 1
-fi
-
-## run BEAM2
-BEAM2 $TFILE $new_args 1>/dev/null
-if [ $? -ne 0 ]; then
-  echo "failed: BEAM2 $TFILE $new_args"
-  exit 1
-fi
-
-mergeSnps.pl significance.txt $TFILE
-if [ $? -ne 0 ]; then
-  echo "failed: mergeSnps.pl significance.txt $TFILE"
-  exit 1
-fi
-
-## move output files
-mv significance.txt $significance
-mv posterior.txt $posterior
-
-## cleanup
-rm -f $TFILE
-
--- a/tools/human_genome_variation/beam.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-<tool id="hgv_beam" name="BEAM" version="1.0.0">
-  <description>significant single- and multi-locus SNP associations in case-control studies</description>
-
-  <command interpreter="bash">
-    BEAM2_wrapper.sh map=${input.extra_files_path}/${input.metadata.base_name}.map ped=${input.extra_files_path}/${input.metadata.base_name}.ped $burnin $mcmc $pvalue significance=$significance posterior=$posterior
-  </command>
-
-  <inputs>
-    <param format="lped" name="input" type="data" label="Dataset"/>
-    <param name="burnin" label="Number of MCMC burn-in steps" type="integer" value="200" />
-    <param name="mcmc" label="Number of MCMC sampling steps" type="integer" value="200" />
-    <param name="pvalue" label="Significance cutoff (after Bonferroni adjustment)" type="float" value="0.05" />
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="significance" />
-    <data format="tabular" name="posterior" />
-  </outputs>
-
-  <requirements>
-    <requirement type="package">beam</requirement>
-    <requirement type="binary">mv</requirement>
-    <requirement type="binary">rm</requirement>
-  </requirements>
-
-  <!-- broken.  will be fixed soon.
-  <tests>
-    <test>
-      <param name='input' value='gpass_and_beam_input' ftype='lped' >
-        <metadata name='base_name' value='gpass_and_beam_input' />
-        <composite_data value='gpass_and_beam_input.ped' />
-        <composite_data value='gpass_and_beam_input.map' />
-        <edit_attributes type='name' value='gpass_and_beam_input' />
-      </param>
-      <param name="burnin" value="200"/>
-      <param name="mcmc" value="200"/>
-      <param name="pvalue" value="0.05"/>
-      <output name="significance" file="beam_output1.tab"/>
-      <output name="posterior" file="beam_output2.tab"/>
-    </test>
-  </tests>
-  -->
-
-  <help>
-.. class:: infomark
-
-This tool can take a long time to run, depending on the number of SNPs, the
-sample size, and the number of MCMC steps specified.  If you have hundreds
-of thousands of SNPs, it may take over a day.  The main tasks that slow down
-this tool are searching for interactions and dynamically partitioning the
-SNPs into blocks.  Optimization is certainly possible, but hasn't been done
-yet.  **If your only interest is to detect SNPs with primary effects (i.e.,
-single-SNP associations), please use the GPASS tool instead.**
-
------
-
-**Dataset formats**
-
-The input dataset must be in lped_ format.  The output datasets are both tabular_.
-(`Dataset missing?`_)
-
-.. _lped: ./static/formatHelp.html#lped
-.. _tabular: ./static/formatHelp.html#tabular
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-BEAM (Bayesian Epistasis Association Mapping) uses a Markov Chain Monte Carlo (MCMC) method to infer SNP block structures and detect both single-marker
-and interaction effects from case-control SNP data.
-This tool also partitions SNPs into blocks based on linkage disequilibrium (LD).  The method utilized is Bayesian, so the outputs are posterior probabilities of association, along with block partitions.  An advantage of this method is that it provides uncertainty measures for the associations and block partitions, and it scales well from small to large sample sizes. It is powerful in detecting gene-gene interactions, although slow for large datasets.
-
------
-
-**Example**
-
-- input map file::
-
-    1  rs0  0  738547
-    1  rs1  0  5597094
-    1  rs2  0  9424115
-    etc.
-
-- input ped file::
-
-    1 1 0 0 1  1  G G  A A  A A  A A  A A  A G  A A  G G  G G  A A  G G  G G  G G  A A  A A  A G  A A  G G  A G  A G  A A  G G  A A  G G  A A  G G  A G  A A  G G  A A  G G  A A  A G  A G  G G  A G  G G  G G  A A  A G  A A  G G  G G  G G  G G  A G  A A  A A  A A  A A
-    1 1 0 0 1  1  G G  A G  G G  A A  A A  A G  A A  G G  G G  G G  A A  G G  A G  A G  G G  G G  A G  G G  A G  A A  G G  A G  G G  A A  G G  G G  A G  A G  G G  A G  A A  A A  G G  G G  A G  A G  G G  A G  A A  A A  A G  G G  A G  G G  A G  G G  G G  A A  G G  A G
-    etc.
-
-- first output file, significance.txt::
-
-    ID   chr   position  results
-    rs0  chr1  738547    10 20 score= 45.101397 , df= 8 , p= 0.000431 , N=1225
-
-- second output file, posterior.txt::
-
-    id:  chr position  marginal + interaction = total posterior
-    0:   1 738547      0.0000 + 0.0000 = 0.0000
-    1:   1 5597094     0.0000 + 0.0000 = 0.0000
-    2:   1 9424115     0.0000 + 0.0000 = 0.0000
-    3:   1 13879818    0.0000 + 0.0000 = 0.0000
-    4:   1 13934751    0.0000 + 0.0000 = 0.0000
-    5:   1 16803491    0.0000 + 0.0000 = 0.0000
-    6:   1 17236854    0.0000 + 0.0000 = 0.0000
-    7:   1 18445387    0.0000 + 0.0000 = 0.0000
-    8:   1 21222571    0.0000 + 0.0000 = 0.0000
-    etc.
-
-    id:  chr position block_boundary  | allele counts in cases and controls
-    0:   1 738547      1.000          | 156 93 251 | 169 83 248
-    1:   1 5597094     1.000          | 323 19 158 | 328 16 156
-    2:   1 9424115     1.000          | 366 6 128 | 369 11 120
-    3:   1 13879818    1.000          | 252 31 217 | 278 32 190
-    4:   1 13934751    1.000          | 246 64 190 | 224 58 218
-    5:   1 16803491    1.000          | 91 160 249 | 91 174 235
-    6:   1 17236854    1.000          | 252 43 205 | 249 44 207
-    7:   1 18445387    1.000          | 205 66 229 | 217 56 227
-    8:   1 21222571    1.000          | 353 9 138 | 352 8 140
-    etc.
-
-  The "id" field is an internally used index.
-
------
-
-**References**
-
-Zhang Y, Liu JS. (2007)
-Bayesian inference of epistatic interactions in case-control studies.
-Nat Genet. 39(9):1167-73. Epub 2007 Aug 26.
-
-Zhang Y, Zhang J, Liu JS. (2010)
-Block-based bayesian epistasis association mapping with application to WTCCC type 1 diabetes data.
-Submitted.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/ctd.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-use LWP::UserAgent;
-require HTTP::Cookies;
-
-#######################################################
-# ctd.pl
-# Submit a batch query to CTD and fetch results into galaxy history
-# usage: ctd.pl inFile idCol inputType resultType actionType outFile
-#######################################################
-
-if (!@ARGV or scalar @ARGV != 6) {
-   print "usage: ctd.pl inFile idCol inputType resultType actionType outFile\n";
-   exit;
-}
-
-my $in = shift @ARGV;
-my $col = shift @ARGV;
-if ($col < 1) {
-   print "The column number is with a 1 start\n";
-   exit 1;
-}
-my $type = shift @ARGV;
-my $resType = shift @ARGV;
-my $actType = shift @ARGV;
-my $out = shift @ARGV;
-
-my @data;
-open(FH, $in) or die "Couldn't open $in, $!\n";
-while (<FH>) {
-   chomp;
-   my @f = split(/\t/);
-   if (scalar @f < $col) {
-      print "ERROR the requested column is not in the file $col\n";
-      exit 1;
-   }
-   push(@data, $f[$col-1]);
-}
-close FH or die "Couldn't close $in, $!\n";
-
-my $url = 'http://ctd.mdibl.org/tools/batchQuery.go';
-#my $url = 'http://globin.bx.psu.edu/cgi-bin/print-query';
-my $d = join("\n", @data);
-#list maintains order, where hash doesn't
-#order matters at ctd
-#to use input file (gives error can't find file)
-#my @form = ('inputType', $type, 'inputTerms', '', 'report', $resType,
-   #'queryFile', [$in, ''], 'queryFileColumn', $col, 'format', 'tsv', 'action', 'Submit');
-my @form = ('inputType', $type, 'inputTerms', $d, 'report', $resType,
-  'queryFile', '', 'format', 'tsv', 'action', 'Submit');
-if ($resType eq 'cgixns') { #only add if this type
-   push(@form, 'actionTypes', $actType);
-}
-my $ua = LWP::UserAgent->new;
-$ua->cookie_jar(HTTP::Cookies->new( () ));
-$ua->agent('Mozilla/5.0');
-my $page = $ua->post($url, \@form, 'Content_Type'=>'form-data');
-if ($page->is_success) {
-   open(FH, ">", $out) or die "Couldn't open $out, $!\n";
-   print FH "#";
-   print FH $page->content, "\n";
-   close FH or die "Couldn't close $out, $!\n";
-}else {
-   print "ERROR failed to get page from CTD, ", $page->status_line, "\n";
-   print $page->content, "\n";
-   my $req = $page->request();
-   print "Requested \n";
-   foreach my $k(keys %$req) {
-      if ($k eq '_headers') {
-         my $t = $req->{$k};
-         foreach my $k2 (keys %$t) { print "$k2 => $t->{$k2}\n"; }
-      }else { print "$k => $req->{$k}\n"; }
-   }
-   exit 1;
-}
-exit;
-
--- a/tools/human_genome_variation/ctd.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,320 +0,0 @@
-<tool id="hgv_ctdBatch" name="CTD" version="1.0.0">
-  <description>analysis of chemicals, diseases, or genes</description>
-
-  <command interpreter="perl">
-    ctd.pl $input $numerical_column $inType.inputType
-    #if $inType.inputType == "disease"
-      $inType.report ANY
-    #else if $inType.reportType.report == "cgixns"
-      $inType.reportType.report $inType.reportType.actType
-    #else
-      $inType.reportType.report ANY
-    #end if
-    $out_file1
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Dataset" />
-    <param name="numerical_column" type="data_column" data_ref="input" label="Column with identifiers" />
-    <conditional name="inType">
-      <param name="inputType" label="Identifier type" type="select">
-        <option value="chem">Chemicals (MeSH names, synonyms or accession IDs, or CAS RNs)</option>
-        <option value="disease">Diseases (MeSH or OMIM names, synonyms or accession IDs)</option>
-        <option value="gene" selected="true">Genes (NCBI official symbols or accession IDs)</option>
-      </param>
-      <when value="chem">
-        <conditional name='reportType'>
-          <param name="report" label="Data to extract" type="select">
-            <option value="cgixns">Curated chemical-gene interactions</option>
-            <option value="genes">Curated gene associations</option>
-            <option value="pathways">Pathway associations</option>
-            <option value="diseases" selected="true">All disease relationships</option>
-            <option value="diseases_direct">  Direct disease relationships only</option>
-            <option value="diseases_inferred">  Inferred disease relationships only</option>
-            <option value="go">All GO associations</option>
-            <option value="go_p">  GO biological Processes only</option>
-            <option value="go_f">  GO molecular Functions only</option>
-            <option value="go_c">  GO cellular Components only</option>
-          </param>
-          <when value="genes">
-            <!-- do nothing -->
-          </when>
-          <when value="pathways">
-            <!-- do nothing -->
-          </when>
-          <when value="diseases">
-            <!-- do nothing -->
-          </when>
-          <when value="diseases_direct">
-            <!-- do nothing -->
-          </when>
-          <when value="diseases_inferred">
-            <!-- do nothing -->
-          </when>
-          <when value="go">
-            <!-- do nothing -->
-          </when>
-          <when value="go_p">
-            <!-- do nothing -->
-          </when>
-          <when value="go_f">
-            <!-- do nothing -->
-          </when>
-          <when value="go_c">
-            <!-- do nothing -->
-          </when>
-          <when value="cgixns">
-            <param name="actType" label="Interaction type" type="select">
-              <option value="ANY">ANY</option>
-              <option value="abundance">abundance</option>
-              <option value="activity">activity</option>
-              <option value="binding">binding</option>
-              <option value="cotreatment">cotreatment</option>
-              <option value="expression">expression</option>
-              <option value="folding">folding</option>
-              <option value="localization">localization</option>
-              <option value="metabolic processing">metabolic processing</option>
-              <option value="acetylation">- acetylation</option>
-              <option value="acylation">- acylation</option>
-              <option value="alkylation">- alkylation</option>
-              <option value="amination">- amination</option>
-              <option value="carbamoylation">- carbamoylation</option>
-              <option value="carboxylation">- carboxylation</option>
-              <option value="chemical synthesis">- chemical synthesis</option>
-              <option value="degradation">- degradation</option>
-              <option value="cleavage">    - cleavage</option>
-              <option value="hydrolysis">      - hydrolysis</option>
-              <option value="ethylation">- ethylation</option>
-              <option value="glutathionylation">- glutathionylation</option>
-              <option value="glycation">- glycation</option>
-              <option value="glycosylation">- glycosylation</option>
-              <option value="N-linked glycosylation">    - N-linked glycosylation</option>
-              <option value="O-linked glycosylation">    - O-linked glycosylation</option>
-              <option value="glucuronidation">    - glucuronidation</option>
-              <option value="hydroxylation">- hydroxylation</option>
-              <option value="lipidation">- lipidation</option>
-              <option value="farnesylation">    - farnesylation</option>
-              <option value="geranolyation">    - geranolyation</option>
-              <option value="myristolylation">    - myristolylation</option>
-              <option value="palmitoylation">    - palmitoylation</option>
-              <option value="prenylation">    - prenylation</option>
-              <option value="methylation">- methylation</option>
-              <option value="nitrosation">- nitrosation</option>
-              <option value="nucleotidylation">- nucleotidylation</option>
-              <option value="oxidation">- oxidation</option>
-              <option value="phosphorylation">- phosphorylation</option>
-              <option value="reduction">- reduction</option>
-              <option value="ribosylation">- ribosylation</option>
-              <option value="ADP-ribosylation">    - ADP-ribosylation</option>
-              <option value="sulfation">- sulfation</option>
-              <option value="sumoylation">- sumoylation</option>
-              <option value="ubiquitination">- ubiquitination</option>
-              <option value="mutagenesis">mutagenesis</option>
-              <option value="reaction">reaction</option>
-              <option value="response to chemical">response to chemical</option>
-              <option value="splicing">splicing</option>
-              <option value="stability">stability</option>
-              <option value="transport">transport</option>
-              <option value="secretion">- secretion</option>
-              <option value="export">    - export</option>
-              <option value="uptake">- uptake</option>
-              <option value="import">    - import</option>
-            </param>
-          </when>
-        </conditional>
-      </when>
-      <when value="disease">
-        <param name="report" label="Data to extract" type="select">
-          <option value="chems">Chemical associations</option>
-          <option value="chems_direct">Chemical associations direct relationships only</option>
-          <option value="chems_inferred">Chemical associations inferred relationships only</option>
-          <option value="genes">Curated gene associations</option>
-          <option value="pathways">Pathway associations</option>
-        </param>
-      </when>
-      <when value="gene">
-        <conditional name='reportType'>
-          <param name="report" label="Data to extract" type="select">
-            <option value="cgixns">Curated chemical-gene interactions</option>
-            <option value="chems">Curated chemical associations</option>
-            <option value="pathways">Pathway associations</option>
-            <option value="diseases" selected="true">All disease relationships</option>
-            <option value="diseases_direct">  Direct disease relationships only</option>
-            <option value="diseases_inferred">  Inferred disease relationships only</option>
-            <option value="go">All GO associations</option>
-            <option value="go_p">  GO biological Processes only</option>
-            <option value="go_f">  GO molecular Functions only</option>
-            <option value="go_c">  GO cellular Components only</option>
-          </param>
-          <when value="chems">
-            <!-- do nothing -->
-          </when>
-          <when value="pathways">
-            <!-- do nothing -->
-          </when>
-          <when value="diseases">
-            <!-- do nothing -->
-          </when>
-          <when value="diseases_direct">
-            <!-- do nothing -->
-          </when>
-          <when value="diseases_inferred">
-            <!-- do nothing -->
-          </when>
-          <when value="go">
-            <!-- do nothing -->
-          </when>
-          <when value="go_p">
-            <!-- do nothing -->
-          </when>
-          <when value="go_f">
-            <!-- do nothing -->
-          </when>
-          <when value="go_c">
-            <!-- do nothing -->
-          </when>
-          <when value="cgixns">
-            <param name="actType" label="Interaction type" type="select">
-              <option value="ANY">ANY</option>
-              <option value="abundance">abundance</option>
-              <option value="activity">activity</option>
-              <option value="binding">binding</option>
-              <option value="cotreatment">cotreatment</option>
-              <option value="expression">expression</option>
-              <option value="folding">folding</option>
-              <option value="localization">localization</option>
-              <option value="metabolic processing">metabolic processing</option>
-              <option value="acetylation">- acetylation</option>
-              <option value="acylation">- acylation</option>
-              <option value="alkylation">- alkylation</option>
-              <option value="amination">- amination</option>
-              <option value="carbamoylation">- carbamoylation</option>
-              <option value="carboxylation">- carboxylation</option>
-              <option value="chemical synthesis">- chemical synthesis</option>
-              <option value="degradation">- degradation</option>
-              <option value="cleavage">    - cleavage</option>
-              <option value="hydrolysis">      - hydrolysis</option>
-              <option value="ethylation">- ethylation</option>
-              <option value="glutathionylation">- glutathionylation</option>
-              <option value="glycation">- glycation</option>
-              <option value="glycosylation">- glycosylation</option>
-              <option value="N-linked glycosylation">    - N-linked glycosylation</option>
-              <option value="O-linked glycosylation">    - O-linked glycosylation</option>
-              <option value="glucuronidation">    - glucuronidation</option>
-              <option value="hydroxylation">- hydroxylation</option>
-              <option value="lipidation">- lipidation</option>
-              <option value="farnesylation">    - farnesylation</option>
-              <option value="geranolyation">    - geranolyation</option>
-              <option value="myristolylation">    - myristolylation</option>
-              <option value="palmitoylation">    - palmitoylation</option>
-              <option value="prenylation">    - prenylation</option>
-              <option value="methylation">- methylation</option>
-              <option value="nitrosation">- nitrosation</option>
-              <option value="nucleotidylation">- nucleotidylation</option>
-              <option value="oxidation">- oxidation</option>
-              <option value="phosphorylation">- phosphorylation</option>
-              <option value="reduction">- reduction</option>
-              <option value="ribosylation">- ribosylation</option>
-              <option value="ADP-ribosylation">    - ADP-ribosylation</option>
-              <option value="sulfation">- sulfation</option>
-              <option value="sumoylation">- sumoylation</option>
-              <option value="ubiquitination">- ubiquitination</option>
-              <option value="mutagenesis">mutagenesis</option>
-              <option value="reaction">reaction</option>
-              <option value="response to chemical">response to chemical</option>
-              <option value="splicing">splicing</option>
-              <option value="stability">stability</option>
-              <option value="transport">transport</option>
-              <option value="secretion">- secretion</option>
-              <option value="export">    - export</option>
-              <option value="uptake">- uptake</option>
-              <option value="import">    - import</option>
-            </param>
-          </when>
-        </conditional>
-      </when>
-    </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-
-  <!-- broken for now.  will be fixed soon.
-  <tests>
-    <test>
-      <param name="input" ftype="tabular" value="ctdBatchInput.txt" />
-      <param name="numerical_column" value="1" />
-      <param name="inputType" value="gene" />
-      <param name="report" value="diseases" />
-      <output name="out_file1" file="ctdBatchOut.txt" />
-    </test>
-  </tests>
-  -->
-
-  <help>
-**Dataset formats**
-
-The input and output datasets are tabular_.
-(`Dataset missing?`_)
-
-.. _tabular: ./static/formatHelp.html#tab
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool extracts data related to the provided list of identifiers
-from the Comparative Toxicogenomics Database (CTD).  The fields
-extracted vary with the type of data requested; the first row
-of the output identifies the columns.
-
-For the curated chemical-gene interactions, you can also choose the
-interaction type from the search-and-select box.  The choices that
-start with '-' are a subset of a choice above them; you can chose
-either the general interaction type or a more specific one.
-
-Website: http://ctd.mdibl.org/
-
------
-
-**Examples**
-
-- input data file:
-    HBB
-
-- select Column = c1, Identifier type = Genes, and Data to extract = All disease relationships
-
-- output file::
-
-    #Input  GeneSymbol  GeneName          GeneID  DiseaseName                  DiseaseID     GeneDiseaseRelation         OmimIDs  PubMedIDs
-    hbb     HBB         hemoglobin, beta  3043    Abnormalities, Drug-Induced  MESH:D000014  inferred via Ethanol                 17676605|18926900
-    hbb     HBB         hemoglobin, beta  3043    Abnormalities, Drug-Induced  MESH:D000014  inferred via Valproic Acid           8875741
-    etc.
-
-Another example:
-
-- same input file:
-    HBB
-
-- select Column = c1, Identifier type = Genes, Data to extract = Curated chemical-gene interactions, and Interaction type = ANY
-
-- output file::
-
-    #Input  GeneSymbol  GeneName          GeneID  ChemicalName             ChemicalID  CasRN    Organism         OrganismID  Interaction                                         InteractionTypes  PubMedIDs
-    hbb     HBB         hemoglobin, beta  3043    1-nitronaphthalene       C016614     86-57-7  Macaca mulatta   9544        1-nitronaphthalene metabolite binds to HBB protein  binding           16453347
-    hbb     HBB         hemoglobin, beta  3043    2,6-diisocyanatotoluene  C026942     91-08-7  Cavia porcellus  10141       2,6-diisocyanatotoluene binds to HBB protein        binding           8728499
-    etc.
-
------
-
-**Reference**
-
-Davis AP, Murphy CG, Saraceni-Richards CA, Rosenstein MC, Wiegers TC, Mattingly CJ. (2009)
-Comparative Toxicogenomics Database: a knowledgebase and discovery tool for
-chemical-gene-disease networks.
-Nucleic Acids Res. 37(Database issue):D786-92. Epub 2008 Sep 9.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/disease_ontology_gene_fuzzy_selector.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-##################################################################
-# Select genes that are associated with the diseases listed in the
-# disease ontology.
-# ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page
-# gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/
-# Sept 2010, switch to doLite
-# input: build outfile sourceFileLoc.loc term or partial term
-##################################################################
-
-if (!@ARGV or @ARGV < 3) {
-   print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n";
-   exit;
-}
-
-my $build = shift @ARGV;
-my $out = shift @ARGV;
-my $in = shift @ARGV;
-my $term = shift @ARGV;
-$term =~ s/^'//; #remove quotes protecting from shell
-$term =~ s/'$//;
-my $data;
-open(LOC, $in) or die  "Couldn't open $in, $!\n";
-while (<LOC>) {
-   chomp;
-   if (/^\s*#/) { next; }
-   my @f = split(/\t/);
-   if ($f[0] eq $build) {
-      if ($f[1] eq 'disease associated genes') {
-         $data = $f[2];
-      }
-   }
-}
-close LOC or die "Couldn't close $in, $!\n";
-if (!$data) {
-   print "Error $build not found in $in\n";
-   exit;
-}
-if (!defined $term) {
-   print "No disease term entered\n";
-   exit;
-}
-
-#start with just fuzzy term matches
-open(OUT, ">", $out) or die "Couldn't open $out, $!\n";
-open(FH, $data) or die "Couldn't open data file $data, $!\n";
-$term =~ s/\s+/|/g; #use OR between words
-while (<FH>) {
-   chomp;
-   my @f = split(/\t/); #chrom start end strand geneName geneID disease
-   if ($f[6] =~ /($term)/i) {
-      print OUT join("\t", @f), "\n";
-   }elsif ($term eq 'disease') { #print all with disease
-      print OUT join("\t", @f), "\n";
-   }
-}
-close FH or die "Couldn't close data file $data, $!\n";
-close OUT or die "Couldn't close $out, $!\n";
-
-exit;
--- a/tools/human_genome_variation/freebayes.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-<?xml version="1.0"?>
-<tool id="freebayes_wrapper" name="Call SNPS with Freebayes" version="0.5.0">
-  <requirements>
-    <requirement type="package">freebayes</requirement>
-  </requirements>
-  <description>Bayesian genetic variant detector</description>
-  <command>
-    ln -s $reference localref.fa;
-    ln -s $bamfile localbam.bam;
-    samtools faidx localref.fa;
-    samtools sort localbam.bam localbam.bam;
-    samtools index localbam.bam;
-    freebayes --fasta-reference localref.fa localbam.bam --vcf $output
-            #if $params.source_select == "full":
-                $params.showRefRepeats
-                -T $params.theta
-                -p $params.ploidy
-                $params.pooled
-                $params.mnps
-                $params.nosnps
-                -n $params.bestAlleles
-                $params.allAlleles
-                $params.duplicateReads
-                -M $params.refMapQuality
-                $params.ignoreRefAllele
-                $params.haploidReference
-                -m $params.minMapQuality
-                -q $params.minBaseQuality
-                $params.noFilters
-                -x $params.indelExclusionWindow
-                <!-- -D $readDependenceFactor -->
-                -V $params.diffusionPriorScalar
-                -W $params.postIntegBandwidth
-                -Y $params.postIntegBanddepth
-                -F $params.minAltFraction
-                -C $params.minAltCount
-                -G $params.minAltTotal
-                --min-coverage $params.minCoverage
-            #end if
-  </command>
-  <inputs>
-    <param format="fasta" name="reference" type="data" metadata_name="dbkey" label="Reference File"/>
-    <param format="bam" name="bamfile" type="data" label="Bam Alignment File"/>
-    <conditional name="params">
-      <param name="source_select" type="select" label="Freebayes Settings to Use" help="For straight forward mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
-        <option value="pre_set">Commonly Used</option>
-        <option value="full">Full Parameter List</option>
-      </param>
-      <when value="pre_set"/>
-      <when value="full">
-        <param name="indels" type="select" label="Include insertion and deletion alleles in the analysis">
-          <option value="">No</option>
-          <option value="-i -N --report-all-alternates --left-align-indels">Yes</option>
-        </param>
-        <param name="theta" size="5" type="float" value="0.001" label="Theta" help="The expected mutation rate or pairwise nucleotide diversity among the population under analysis.  This serves as the single parameter to the Ewens Sampling Formula prior model"/>
-        <param name="showRefRepeats" type="select" label="Show Reference Repeats" help="Calculate and show information about reference repeats in the VCF output">
-          <option value="">No</option>
-          <option value="-_">Yes</option>
-        </param>
-        <param name="ploidy" size="5" type="integer" value="2" label="Ploidy" help="Sets the default ploidy for the analysis"/>
-        <param name="pooled" type="select" label="Pooled" help="Assume that samples result from pooled sequencing. When using this flag, set --ploidy to the number of alleles in each sample">
-          <option value="">No</option>
-          <option value="-J">Yes</option>
-        </param>
-        <param name="mnps" type="select" label="Include multi-nuceotide polymorphisms in the analysis">
-          <option value="">No</option>
-          <option value="--mnps">Yes</option>
-        </param>
-        <param name="nosnps" type="select" label="Ignore SNP alleles">
-          <option value="">No</option>
-          <option value="--no-snps">Yes</option>
-        </param>
-        <param name="duplicateReads" type="select" label="Include duplicate-marked alignments in the analysis">
-          <option value="">No</option>
-          <option value="--use-duplicate-reads">Yes</option>
-        </param>
-        <param name="bestAlleles" size="5" type="integer" value="2" label="Use Best N Alleles" help="Evaluate only the best N alleles, ranked by sum of supporting quality scores"/>
-        <param name="allAlleles" type="select" label="Evaluate all possible alleles">
-          <option value="">No</option>
-          <option value="--use-all-alleles">Yes</option>
-        </param>
-        <param name="refMapQuality" size="5" type="integer" value="100" label="Assign mapping quality of Q to the reference allele at each site"/>
-        <param name="refBaseQuality" size="5" type="integer" value="60" label="Reference Base Quality" help="Assign a base quality of Q to the reference allele at each site"/>
-        <param name="minMapQuality" size="5" type="integer" value="10" label="Minimum Mapping Quality" help="Exclude alignments from analysis if they have a mapping quality less than Q"/>
-        <param name="minBaseQuality" size="5" type="integer" value="5" label="Minimum Base Quality" help="Exclude alleles from analysis if their supporting base quality is less than Q"/>
-        <param name="indelExclusionWindow" size="5" type="integer" value="0" label="Indel Exclusion Window" help="Ignore portions of alignments N bases from a putative insertion or deletion allele"/>
-        <param name="ignoreRefAllele" type="select" label="Ignore Reference Allele" help="By default, the reference allele is considered as another sample.  This flag excludes it from the analysis">
-          <option value="">No</option>
-          <option value="--ignore-reference-allele">Yes</option>
-        </param>
-        <param name="haploidReference" type="select" label="Haploid Reference" help="If using the reference sequence as a sample, consider it to be haploid">
-          <option value="">No</option>
-          <option value="--haploid-reference">Yes</option>
-        </param>
-        <param name="noFilters" type="select" label="No Filters" help="Do not use any input base and mapping quality filters. Equivalent to -m 0 -q 0 -R 0 -S 0">
-          <option value="">No</option>
-          <option value="--no-filters">Yes</option>
-        </param>
-        <!-- <param name="readDependenceFactor" size="5" type="float" value="0.9" label="Read Dependence Factor" help="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations"/> -->
-        <param name="diffusionPriorScalar" size="5" type="float" value="1" label="Diffusion Prior Scalar" help="Downgrade the significance of P(genotype combo | allele frequency) by taking the Nth root of this component of the prior"/>
-        <param name="postIntegBandwidth" size="5" type="integer" value="2" label="Posterior Integratoin Bandwidth" help="Integrate all genotype combinations in our posterior space which lie no more than N steps from the most likely combination in terms of data likelihoods, taking the N steps from the most to least likely genotype for each individual"/>
-        <param name="postIntegBanddepth" size="5" type="integer" value="2" label="Posterior Integratoin Banddepth" help="Generate all genotype combinations for which up to this number of samples have up to their -W'th worst genotype according to data likelihood"/>
-        <param name="minAltFraction" size="5" type="integer" value="0" label="Minimum Alternative Fraction" help="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position"/>
-        <param name="minAltCount" size="5" type="integer" value="1" label="Minimum Alternative Count" help="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position"/>
-        <param name="minAltTotal" size="5" type="integer" value="1" label="Minimum Alternative Total" help="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis"/>
-        <param name="minCoverage" size="5" type="integer" value="0" label="Minimum Coverage" help="Require at least this coverage to process a site"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="vcf" name="output" metadata_source="reference" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="reference" ftype="fasta" value="mosaik_test_ref.fasta"/>
-      <param name="bamfile" ftype="bam" value="freebayes_in.bam"/>
-      <param name="source_select" value="pre_set"/>
-      <output name="output" file="freebayes_out.vcf" lines_diff="4"/>
-    </test>
-  </tests>
-  <help>
-This tool uses Freebayes to call SNPS given a reference sequence and a BAM alignment file.
-  </help>
-</tool>
--- a/tools/human_genome_variation/funDo.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-<tool id="hgv_funDo" name="FunDO" version="1.0.0">
-  <description>human genes associated with disease terms</description>
-
-  <command interpreter="perl">
-    disease_ontology_gene_fuzzy_selector.pl $build $out_file1 ${GALAXY_DATA_INDEX_DIR}/funDo.loc '$term'
-  </command>
-
-  <inputs>
-    <param name="build" type="select" label="Database build">
-      <options from_file="funDo.loc">
-        <column name="name" index="0"/>
-        <column name="value" index="0"/>
-        <filter type="unique_value" column="0"/>
-      </options>
-    </param>
-    <param name="term" size="40" type="text" label="Disease term(s)" />
-  </inputs>
-
-  <outputs>
-    <data format="interval" name="out_file1">
-    </data>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="term" value="lung"/>
-      <param name="build" value="hg18"/>
-      <output name="out_file1" file="funDo_output1.interval" />
-    </test>
-  </tests>
-
-  <help>
-**Dataset formats**
-
-There is no input dataset.  The output is in interval_ format.
-
-.. _interval: ./static/formatHelp.html#interval
-
------
-
-**What it does**
-
-This tool searches the disease-term field of the DOLite mappings
-used by the FunDO project and returns a set of genes that
-are associated with terms matching the specified pattern.  (This is the
-reverse of what FunDO's own server does.)
-
-The search is case insensitive, and selects terms that contain any of
-the given words, either exactly or within a longer word (e.g. "nemia"
-selects not only "anemia", but also "hyperglycinemia", "tyrosinemias",
-and many other things).  Multiple words should be separated by spaces,
-not commas.  As a special case, entering the word "disease" returns all
-genes associated with any disease, even if that word does not actually
-appear in the term field.
-
-Website: http://django.nubic.northwestern.edu/fundo/
-
------
-
-**Example**
-
-Typing::
-
-    carcinoma
-
-results in::
-
-    1.     2.         3.         4. 5.       6.     7.
-    chr11  89507465   89565427   +  NAALAD2  10003  Adenocarcinoma
-    chr15  50189113   50192264   -  BCL2L10  10017  Carcinoma
-    chr7   150535855  150555250  -  ABCF2    10061  Clear cell carcinoma
-    chr7   150540508  150555250  -  ABCF2    10061  Clear cell carcinoma
-    chr10  134925911  134940397  -  ADAM8    101    Adenocarcinoma
-    chr10  134925911  134940397  -  ADAM8    101    Adenocarcinoma
-    etc.
-
-where the column contents are as follows::
-
- 1. chromosome name
- 2. start position of the gene
- 3. end position of the gene
- 4. strand
- 4. gene name
- 6. Entrez Gene ID
- 7. disease term
-
------
-
-**References**
-
-Du P, Feng G, Flatow J, Song J, Holko M, Kibbe WA, Lin SM. (2009)
-From disease ontology to disease-ontology lite: statistical methods to adapt a general-purpose
-ontology for the test of gene-ontology associations.
-Bioinformatics. 25(12):i63-8.
-
-Osborne JD, Flatow J, Holko M, Lin SM, Kibbe WA, Zhu LJ, Danila MI, Feng G, Chisholm RL. (2009)
-Annotating the human genome with Disease Ontology.
-BMC Genomics. 10 Suppl 1:S6.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/gpass.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-use File::Basename;
-use File::Temp qw/ tempfile /;
-
-$ENV{'PATH'} .= ':' . dirname($0);
-
-#this is a wrapper for gpass that converts a linkage pedigree file to input
-#for this program
-
-my($map, $ped, $out, $fdr) = @ARGV;
-
-if (!$map or !$ped or !$out or !$fdr) { die "missing args\n"; }
-
-my($fh, $name) = tempfile();
-#by default this file is removed when these variable go out of scope
-print $fh "map=$map ped=$ped\n";
-close $fh;  #converter will overwrite, just keep name
-
-#run converter
-system("lped_to_geno.pl $map $ped > $name") == 0
-	or die "system lped_to_geno.pl $map $ped > $name failed\n";
-
-#system("cp $name tmp.middle");
-
-#run GPASS
-system("gpass $name -o $out -fdr $fdr 1>/dev/null") == 0
-	or die "system gpass $name -o $out -fdr $fdr, failed\n";
-
-#merge SNP data with results
-merge();
-
-exit;
-
-########################################
-
-#merge the input and output files so have SNP data with result
-sub merge {
-   open(FH, $out) or die "Couldn't open $out, $!\n";
-   my %res;
-   my @ind;
-   while (<FH>) {
-      chomp;
-      my $line = $_;
-      if ($line =~ /^(\d+)/) { $res{$1} = $line; push(@ind, $1); }
-      else { $res{'index'} = $line; }
-   }
-   close FH;
-   if (!@ind) { return; } #no results, leave alone
-   @ind = sort { $a <=> $b } @ind;
-   $res{'index'} =~ s/Index/#ID\tchr\tposition/;
-   #read input file to get SNP data
-   open(FH, $name) or die "Couldn't open $name, $!\n";
-   my $i = 0; #index is 0 based not counting header line
-   my $c = shift @ind;
-   while (<FH>) {
-      chomp;
-      if (/^ID/) { next; }
-      my @f = split(/\s+/);
-      if ($i == $c) {
-         $res{$i} =~ s/^$i/$f[0]\t$f[1]\t$f[2]/;
-         if (!@ind) { last; }
-         $c = shift @ind;
-      }
-      $i++;
-   }
-   close FH;
-   #now reprint results with SNP data included
-   open(FH, ">", $out) or die "Couldn't write to $out, $!\n";
-   print FH $res{'index'}, "\n";
-   delete $res{'index'};
-   foreach $i (keys %res) {
-      print FH $res{$i}, "\n";
-   }
-   close FH;
-}
-
--- a/tools/human_genome_variation/gpass.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-<tool id="hgv_gpass" name="GPASS" version="1.0.0">
-  <description>significant single-SNP associations in case-control studies</description>
-
-  <command interpreter="perl">
-    gpass.pl ${input1.extra_files_path}/${input1.metadata.base_name}.map ${input1.extra_files_path}/${input1.metadata.base_name}.ped $output $fdr
-  </command>
-
-  <inputs>
-    <param name="input1" type="data" format="lped" label="Dataset"/>
-    <param name="fdr" type="float" value="0.05" label="FDR"/>
-  </inputs>
-
-  <outputs>
-    <data name="output" format="tabular" />
-  </outputs>
-
-  <requirements>
-    <requirement type="package">gpass</requirement>
-  </requirements>
-
-  <!-- we need to be able to set the seed for the random number generator
-  <tests>
-    <test>
-      <param name='input1' value='gpass_and_beam_input' ftype='lped' >
-        <metadata name='base_name' value='gpass_and_beam_input' />
-        <composite_data value='gpass_and_beam_input.ped' />
-        <composite_data value='gpass_and_beam_input.map' />
-        <edit_attributes type='name' value='gpass_and_beam_input' />
-      </param>
-      <param name="fdr" value="0.05" />
-      <output name="output" file="gpass_output.txt" />
-    </test>
-  </tests>
-  -->
-
-  <help>
-**Dataset formats**
-
-The input dataset must be in lped_ format, and the output is tabular_.
-(`Dataset missing?`_)
-
-.. _lped: ./static/formatHelp.html#lped
-.. _tabular: ./static/formatHelp.html#tab
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-GPASS (Genome-wide Poisson Approximation for Statistical Significance)
-detects significant single-SNP associations in case-control studies at a user-specified FDR.  Unlike previous methods, this tool can accurately approximate the genome-wide significance and FDR of SNP associations, while adjusting for millions of multiple comparisons, within seconds or minutes.
-
-The program has two main functionalities:
-
-1. Detect significant single-SNP associations at a user-specified false
-   discovery rate (FDR).
-
-   *Note*: a "typical" definition of FDR could be
-            FDR = E(# of false positive SNPs / # of significant SNPs)
-
-   This definition however is very inappropriate for association mapping, since SNPs are
-   highly correlated.  Our FDR is
-   defined differently to account for SNP correlations, and thus will obtain
-   a proper FDR in terms of "proportion of false positive loci".
-
-2. Approximate the significance of a list of candidate SNPs, adjusting for
-   multiple comparisons. If you have isolated a few SNPs of interest and want
-   to know their significance in a GWAS, you can supply the GWAS data and let
-   the program specifically test those SNPs.
-
-
-*Also note*: the number of SNPs in a study cannot be both too small and at the same
-time too clustered in a local region. A few hundreds of SNPs, or tens of SNPs
-spread in different regions, will be fine. The sample size cannot be too small
-either; around 100 or more individuals (case + control combined) will be fine.
-Otherwise use permutation.
-
------
-
-**Example**
-
-- input map file::
-
-    1  rs0  0  738547
-    1  rs1  0  5597094
-    1  rs2  0  9424115
-    etc.
-
-- input ped file::
-
-    1 1 0 0 1  1  G G  A A  A A  A A  A A  A G  A A  G G  G G  A A  G G  G G  G G  A A  A A  A G  A A  G G  A G  A G  A A  G G  A A  G G  A A  G G  A G  A A  G G  A A  G G  A A  A G  A G  G G  A G  G G  G G  A A  A G  A A  G G  G G  G G  G G  A G  A A  A A  A A  A A
-    1 1 0 0 1  1  G G  A G  G G  A A  A A  A G  A A  G G  G G  G G  A A  G G  A G  A G  G G  G G  A G  G G  A G  A A  G G  A G  G G  A A  G G  G G  A G  A G  G G  A G  A A  A A  G G  G G  A G  A G  G G  A G  A A  A A  A G  G G  A G  G G  A G  G G  G G  A A  G G  A G
-    etc.
-
-- output dataset, showing significant SNPs and their p-values and FDR::
-
-    #ID   chr   position   Statistics  adj-Pvalue  FDR
-    rs35  chr1  136606952  4.890849    0.991562    0.682138
-    rs36  chr1  137748344  4.931934    0.991562    0.795827
-    rs44  chr2  14423047   7.712832    0.665086    0.218776
-    etc.
-
------
-
-**Reference**
-
-Zhang Y, Liu JS. (2010)
-Fast and accurate significance approximation for genome-wide association studies.
-Submitted.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/hilbertvis.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-#!/usr/bin/env bash
-
-input_file="$1"
-output_file="$2"
-chromInfo_file="$3"
-chrom="$4"
-score_col="$5"
-hilbert_curve_level="$6"
-summarization_mode="$7"
-chrom_col="$8"
-start_col="$9"
-end_col="${10}"
-strand_col="${11}"
-
-## use first sequence if chrom filed is empty
-if [ -z "$chrom" ]; then
-    chrom=$( head -n 1 "$input_file" | cut -f$chrom_col )
-fi
-
-## get sequence length
-if [ ! -r "$chromInfo_file" ]; then
-    echo "Unable to read chromInfo_file $chromInfo_file" 1>&2
-    exit 1
-fi
-
-chrom_len=$( awk '$1 == chrom {print $2}' chrom=$chrom $chromInfo_file )
-
-## error if we can't find the chrom_len
-if [ -z "$chrom_len" ]; then
-    echo "Can't find length for sequence \"$chrom\" in chromInfo_file $chromInfo_file" 1>&2
-    exit 1
-fi
-
-## make sure chrom_len is positive
-if [ $chrom_len -le 0 ]; then
-    echo "sequence \"$chrom\" length $chrom_len <= 0" 1>&2
-    exit 1
-fi
-
-## modify R script depending on the inclusion of a score column, strand information
-input_cols="\$${start_col}, \$${end_col}"
-col_types='beg=0, end=0, strand=""'
-
-# if strand_col == 0 (strandCol metadata is not set), assume everything's on the plus strand
-if [ $strand_col -ne 0 ]; then
-    input_cols="${input_cols}, \$${strand_col}"
-else
-    input_cols="${input_cols}, \\\"+\\\""
-fi
-
-# set plot value (either from data or use a constant value)
-if [ $score_col -eq -1 ]; then
-    value=1
-else
-    input_cols="${input_cols}, \$${score_col}"
-    col_types="${col_types}, score=0"
-    value='chunk$score[i]'
-fi
-
-R --vanilla &> /dev/null <<endOfCode
-library(HilbertVis);
-
-chrom_len <- ${chrom_len};
-chunk_size <- 1000;
-interval_count <- 0;
-invalid_strand <- 0;
-
-awk_cmd <- paste(
-  "awk 'BEGIN{FS=\"\t\";OFS=\"\t\"}",
-    "\$${chrom_col} == \"${chrom}\"",
-      "{print ${input_cols}}' ${input_file}"
-);
-
-col_types <- list(${col_types});
-vec <- vector(mode="numeric", length=chrom_len);
-conn <- pipe(description=awk_cmd, open="r");
-
-repeat {
-  chunk <- scan(file=conn, what=col_types, sep="\t", nlines=chunk_size, quiet=TRUE);
-
-  if ((rows <- length(chunk\$beg)) == 0)
-        break;
-
-  interval_count <- interval_count + rows;
-
-  for (i in 1:rows) {
-    if (chunk\$strand[i] == '+') {
-      start <- chunk\$beg[i] + 1;
-      stop <- chunk\$end[i];
-    } else if (chunk\$strand[i] == '-') {
-      start <- chrom_len - chunk\$end[i] - 1;
-      stop <- chrom_len - chunk\$beg[i];
-    } else {
-      invalid_strand <- invalid_strand + 1;
-      interval_count <- interval_count - 1;
-      next;
-    }
-    vec[start:stop] <- ${value};
-  }
-}
-
-close(conn);
-
-hMat <- hilbertImage(vec, level=$hilbert_curve_level, mode="$summarization_mode");
-pdf(file="$output_file", onefile=TRUE, width=8, height=10.5, paper="letter");
-showHilbertImage(hMat);
-dev.off();
-endOfCode
-
--- a/tools/human_genome_variation/hilbertvis.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-<tool id="hgv_hilbertvis" name="HVIS" version="1.0.0">
-  <description>visualization of genomic data with the Hilbert curve</description>
-
-  <command interpreter="bash">
-    hilbertvis.sh $input $output $chromInfo "$chrom" $plot_value.score_col $level $mode
-    #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__)
-      1 4 5 7
-    #else
-      ${input.metadata.chromCol} ${input.metadata.startCol} ${input.metadata.endCol} ${input.metadata.strandCol}
-    #end if
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="interval,gff" label="Dataset">
-      <validator type="unspecified_build"/>
-      <validator type="metadata" check="chromCol" message="chromCol missing"/>
-      <validator type="metadata" check="startCol" message="startCol missing"/>
-      <validator type="metadata" check="endCol" message="endCol missing"/>
-    </param>
-    <param name="chrom" type="text" label="Sequence to plot" help="Name of sequence (from the chromosome column in the dataset) to plot.  If left blank, the first sequence in the dataset will be plotted."/>
-    <conditional name="plot_value">
-      <param name="choice" type="select" label="Value to plot">
-        <option value="score" selected="true">Score column from dataset</option>
-        <option value="exist">Same value for each base (existence)</option>
-      </param>
-      <when value="score">
-        <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Score column"/>
-      </when>
-      <when value="exist">
-        <param name="score_col" type="hidden" value="-1"/>
-      </when>
-    </conditional>
-    <param name="level" type="integer" value="9" label="Level" help="Level of Hilbert curve.  The resulting image will have 2&lt;sup&gt;level&lt;/sup&gt; by 2&lt;sup&gt;level&lt;/sup&gt; pixels.">
-      <validator type="in_range" min="1" message="The level must be an integer &gt;= 1."/>
-    </param>
-    <param name="mode" type="select" label="Summarization mode" help="Method used to determine a value for a point in the plot which covers multiple values in the input.">
-      <option value="max">Maximal value in each bin</option>
-      <option value="min">Minimal value in each bin</option>
-      <option value="absmax" selected="true">Maximal absolute value in each bin</option>
-      <option value="mean">Mean value of each bin</option>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data name="output" format="pdf"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="hvis_mkar_chr22.tab"/>
-      <param name="chrom" value="chr22"/>
-      <param name="choice" value="score"/>
-      <param name="score_col" value="15"/>
-      <param name="level" value="9"/>
-      <param name="mode" value="absmax"/>
-      <output name="output" file="hvis_mkar_chr22.pdf" compare="sim_size" delta="7168"/>
-    </test>
-  </tests>
-
-  <help>
-**Dataset formats**
-
-The input format is interval_, and the output is an image in PDF format.
-(`Dataset missing?`_)
-
-.. _interval: ./static/formatHelp.html#interval
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-HilbertVis uses the Hilbert space-filling curve to visualize the structure of
-position-dependent data.  It maps the traditional one-dimensional line
-visualization onto a two-dimensional square.  For example, here is a diagram
-showing the path of a level-2 Hilbert curve.
-
-.. image:: ./static/images/hilbertvisDiagram.png
-
-The shade of each pixel represents the value for the corresponding bin of
-consecutive genomic positions, calculated according to the specified
-summarization mode.  The pixels are arranged so that bins that are close
-to each other on the data vector are represented by pixels that are close
-to each other in the plot.  In particular, adjacent bins are mapped to
-adjacent pixels.  Hence, dark spots in a figure represent a peak; the area
-of the spot in the two-dimensional plot is proportional to the width of the
-peak in the one-dimensional data, and the darkness of the spot corresponds to
-the height of the peak.
-
-The input file is in interval format, and typically contains a column with
-scores or other numbers, such as conservation scores, SNP density, the
-coverage of aligned reads from ChIP-Seq data, etc.
-
-Website: http://www.ebi.ac.uk/huber-srv/hilbert/
-
------
-
-**Examples**
-
-Here are some examples from the HilbertVis homepage, using ChIP-Seq data.
-
-.. image:: ./static/images/hilbertvis1.png
-
------
-
-.. image:: ./static/images/hilbertvis2.png
-
------
-
-**Reference**
-
-Anders S. (2009)
-Visualization of genomic data with the Hilbert curve.
-Bioinformatics. 25(10):1231-5. Epub 2009 Mar 17.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/ldtools.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-<tool id="hgv_ldtools" name="LD" version="1.0.0">
-  <description>linkage disequilibrium and tag SNPs</description>
-
-  <command interpreter="bash">
-    ldtools_wrapper.sh rsquare=$rsquare freq=$freq input=$input output=$output
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Dataset"/>
-    <param name="rsquare" label="r&lt;sup&gt;2&lt;/sup&gt; threshold" type="float" value="0.64">
-      <validator type="in_range" message="rsquare must be in range [0.00, 1.00]" min="0.00" max="1.00" />
-    </param>
-    <param name="freq" label="Minimum allele frequency threshold" type="float" value="0.00">
-      <validator type="in_range" message="freq must be in range (0.00, 0.50]" min="0.00" max="0.50" />
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="ldInput1.txt" />
-      <param name="rsquare" value="0.64" />
-      <param name="freq" value="0.00" />
-      <output name="output" file="ldOutput1.txt" />
-    </test>
-  </tests>
-
-  <help>
-**Dataset formats**
-
-The input and output datasets are tabular_.
-(`Dataset missing?`_)
-
-.. _tabular: ./static/formatHelp.html#tab
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool can be used to analyze the patterns of linkage disequilibrium
-(LD) between polymorphic sites in a locus.  SNPs are grouped based on the
-threshold level of LD as measured by r\ :sup:`2` (regardless of genomic
-position), and a representative "tag SNP" is reported for each group.
-The other SNPs in the group are in LD with the tag SNP, but not necessarily
-with each other.
-
-The underlying algorithm is the same as the one used in ldSelect (Carlson
-et al. 2004).  However, this tool is implemented to be much faster and more
-efficient than ldSelect.
-
-The input is a tabular file with genotype information for each individual
-at each SNP site, in exactly four columns: site ID, sample ID, and the
-two allele nucleotides.
-
------
-
-**Example**
-
-- input file::
-
-    rs2334386  NA20364  G  T
-    rs2334386  NA20363  G  G
-    rs2334386  NA20360  G  G
-    rs2334386  NA20359  G  G
-    rs2334386  NA20358  G  G
-    rs2334386  NA20356  G  G
-    rs2334386  NA20357  G  G
-    rs2334386  NA20350  G  G
-    rs2334386  NA20349  G  G
-    rs2334386  NA20348  G  G
-    rs2334386  NA20347  G  G
-    rs2334386  NA20346  G  G
-    rs2334386  NA20345  G  G
-    rs2334386  NA20344  G  G
-    rs2334386  NA20342  G  G
-    etc.
-
-- output file::
-
-    rs2238748  rs2793064,rs6518516,rs6518517,rs2283641,rs5993533,rs715590,rs2072123,rs2105421,rs2800954,rs1557847,rs807750,rs807753,rs5993488,rs8138035,rs2800980,rs2525079,rs5992353,rs712966,rs2525036,rs807743,rs1034727,rs807744,rs2074003
-    rs2871023  rs1210715,rs1210711,rs5748189,rs1210709,rs3788298,rs7284649,rs9306217,rs9604954,rs1210703,rs5748179,rs5746727,rs5748190,rs5993603,rs2238766,rs885981,rs2238763,rs5748165,rs9605996,rs9606001,rs5992398
-    rs7292006  rs13447232,rs5993665,rs2073733,rs1057457,rs756658,rs5992395,rs2073760,rs739369,rs9606017,rs739370,rs4493360,rs2073736
-    rs2518840  rs1061325,rs2283646,rs362148,rs1340958,rs361956,rs361991,rs2073754,rs2040771,rs2073740,rs2282684
-    rs2073775  rs10160,rs2800981,rs807751,rs5993492,rs2189490,rs5747997,rs2238743
-    rs5747263  rs12159924,rs2300688,rs4239846,rs3747025,rs3747024,rs3747023,rs2300691
-    rs433576   rs9605439,rs1109052,rs400509,rs401099,rs396012,rs410456,rs385105
-    rs2106145  rs5748131,rs2013516,rs1210684,rs1210685,rs2238767,rs2277837
-    rs2587082  rs2257083,rs2109659,rs2587081,rs5747306,rs2535704,rs2535694
-    rs807667   rs2800974,rs756651,rs762523,rs2800973,rs1018764
-    rs2518866  rs1206542,rs807467,rs807464,rs807462,rs712950
-    rs1110661  rs1110660,rs7286607,rs1110659,rs5992917,rs1110662
-    rs759076   rs5748760,rs5748755,rs5748752,rs4819925,rs933461
-    rs5746487  rs5992895,rs2034113,rs2075455,rs1867353
-    rs5748212  rs5746736,rs4141527,rs5748147,rs5748202
-    etc.
-
------
-
-**Reference**
-
-Carlson CS, Eberle MA, Rieder MJ, Yi Q, Kruglyak L, Nickerson DA. (2004)
-Selecting a maximally informative set of single-nucleotide polymorphisms for
-association analyses using linkage disequilibrium.
-Am J Hum Genet. 74(1):106-20. Epub 2003 Dec 15.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/ldtools_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-#!/usr/bin/env bash
-#
-# Galaxy wrapper for Aakrosh Ratan's ldtools
-#
-
-set -e
-
-export PATH=$PATH:$(dirname $0)
-
-## pagetag options
-input=
-rsquare=0.64
-freq=0.00
-sample=###
-
-## senatag options
-excluded=###
-required=###
-output=
-
-until [ $# -eq 0 ]
-do
-  case $1 in
-    rsquare=*)
-      rsquare=${1#rsquare=}
-      ;;
-    freq=*)
-      freq=${1#freq=}
-      ;;
-    input=*)
-      input=${1#input=}
-      ;;
-    output=*)
-      output=${1#output=}
-      ;;
-    *)
-      if [ -z "$new_args" ]; then
-        new_args=$1
-      else
-        new_args="$new_args $1"
-      fi
-      ;;
-  esac
-
-  shift
-done
-
-## run pagetag
-pagetag.py --rsquare $rsquare --freq $freq $input snps.txt neighborhood.txt &> /dev/null
-if [ $? -ne 0 ]; then
-	echo "failed: pagetag.py --rsquare $rsquare --freq $freq $input snps.txt neighborhood.txt"
-	exit 1
-fi
-
-## run sentag
-senatag.py neighborhood.txt snps.txt > $output 2> /dev/null
-if [ $? -ne 0 ]; then
-	echo "failed: senatag.py neighborhood.txt snps.txt"
-	exit 1
-fi
-
-## cleanup
-rm -f snps.txt neighborhood.txt
-
--- a/tools/human_genome_variation/linkToDavid.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-###################################################
-# linkToDavid.pl
-# Generates a link to David for a list of gene IDs.
-###################################################
-
-if (!@ARGV or scalar @ARGV != 4) {
-   print "usage: linkToDavid.pl infile.tab 1basedCol idType outfile\n";
-   exit 1;
-}
-
-my $in = shift @ARGV;
-my $col = shift @ARGV;
-my $type = shift @ARGV;
-my $out = shift @ARGV;
-
-if ($col < 1) {
-   print "ERROR the column number should be 1 based counting\n";
-   exit 1;
-}
-my @gene;
-open(FH, $in) or die "Couldn't open $in, $!\n";
-while (<FH>) {
-   chomp;
-   my @f = split(/\t/);
-   if (scalar @f < $col) {
-      print "ERROR there is no column $col in $in\n";
-      exit 1;
-   }
-   if ($f[$col-1]) { push(@gene, $f[$col-1]); }
-}
-close FH or die "Couldn't close $in, $!\n";
-
-if (scalar @gene > 400) {
-   print "ERROR David only allows 400 genes submitted via a link\n";
-   exit 1;
-}
-
-my $link = 'http://david.abcc.ncifcrf.gov/api.jsp?type=TYPE&ids=GENELIST&tool=summary';
-
-my $g = join(",", @gene);
-$link =~ s/GENELIST/$g/;
-$link =~ s/TYPE/$type/;
-#print output
-if (length $link > 2048) {
-   print "ERROR too many genes to fit in URL, please select a smaller set\n";
-   exit;
-}
-open(FH, ">", $out) or die "Couldn't open $out, $!\n";
-print FH "<html><head><title>DAVID link</title></head><body>\n",
-      '<A TARGET=_BLANK HREF="', $link, '">click here to send of identifiers to DAVID</A>', "\n",
-      '</body></html>', "\n";
-close FH or die "Couldn't close $out, $!\n";
-
-exit;
--- a/tools/human_genome_variation/linkToDavid.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-<tool id="hgv_david" name="DAVID" version="1.0.0">
-  <description>functional annotation for a list of genes</description>
-
-  <command interpreter="perl">
-    linkToDavid.pl $input $numerical_column $type $out_file1
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Dataset" />
-    <param name="numerical_column" type="data_column" data_ref="input" label="Column with identifiers" />
-    <param name="type" label="Identifier type" type="select">
-      <option value="AFFYMETRIX_3PRIME_IVT_ID">AFFYMETRIX_3PRIME_IVT_ID</option>
-      <option value="AFFYMETRIX_EXON_GENE_ID">AFFYMETRIX_EXON_GENE_ID</option>
-      <option value="AFFYMETRIX_SNP_ID">AFFYMETRIX_SNP_ID</option>
-      <option value="AGILENT_CHIP_ID">AGILENT_CHIP_ID</option>
-      <option value="AGILENT_ID">AGILENT_ID</option>
-      <option value="AGILENT_OLIGO_ID">AGILENT_OLIGO_ID</option>
-      <option value="ENSEMBL_GENE_ID">ENSEMBL_GENE_ID</option>
-      <option value="ENSEMBL_TRANSCRIPT_ID">ENSEMBL_TRANSCRIPT_ID</option>
-      <option value="ENTREZ_GENE_ID">ENTREZ_GENE_ID</option>
-      <option value="FLYBASE_GENE_ID">FLYBASE_GENE_ID</option>
-      <option value="FLYBASE_TRANSCRIPT_ID">FLYBASE_TRANSCRIPT_ID</option>
-      <option value="GENBANK_ACCESSION">GENBANK_ACCESSION</option>
-      <option value="GENPEPT_ACCESSION">GENPEPT_ACCESSION</option>
-      <option value="GENOMIC_GI_ACCESSION">GENOMIC_GI_ACCESSION</option>
-      <option value="PROTEIN_GI_ACCESSION">PROTEIN_GI_ACCESSION</option>
-      <option value="ILLUMINA_ID">ILLUMINA_ID</option>
-      <option value="IPI_ID">IPI_ID</option>
-      <option value="MGI_ID">MGI_ID</option>
-      <option value="GENE_SYMBOL" selected="true">GENE_SYMBOL</option>
-      <option value="PFAM_ID">PFAM_ID</option>
-      <option value="PIR_ACCESSION">PIR_ACCESSION</option>
-      <option value="PIR_ID">PIR_ID</option>
-      <option value="PIR_NREF_ID">PIR_NREF_ID</option>
-      <option value="REFSEQ_GENOMIC">REFSEQ_GENOMIC</option>
-      <option value="REFSEQ_MRNA">REFSEQ_MRNA</option>
-      <option value="REFSEQ_PROTEIN">REFSEQ_PROTEIN</option>
-      <option value="REFSEQ_RNA">REFSEQ_RNA</option>
-      <option value="RGD_ID">RGD_ID</option>
-      <option value="SGD_ID">SGD_ID</option>
-      <option value="TAIR_ID">TAIR_ID</option>
-      <option value="UCSC_GENE_ID">UCSC_GENE_ID</option>
-      <option value="UNIGENE">UNIGENE</option>
-      <option value="UNIPROT_ACCESSION">UNIPROT_ACCESSION</option>
-      <option value="UNIPROT_ID">UNIPROT_ID</option>
-      <option value="UNIREF100_ID">UNIREF100_ID</option>
-      <option value="WORMBASE_GENE_ID">WORMBASE_GENE_ID</option>
-      <option value="WORMPEP_ID">WORMPEP_ID</option>
-      <option value="ZFIN_ID">ZFIN_ID</option>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="html" name="out_file1" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" ftype="tabular" value="linkToDavid.tabular" />
-      <param name="numerical_column" value="1" />
-      <param name="type" value="ENTREZ_GENE_ID" />
-      <output name="out_file1" file="linkToDavid_1.out" />
-    </test>
-  </tests>
-
-  <help>
- .. class:: infomark
-
-The list is limited to 400 IDs.
-
------
-
-**Dataset formats**
-
-The input dataset is in tabular_ format.  The output dataset is html_ with
-a link to the DAVID website as described below.
-(`Dataset missing?`_)
-
-.. _tabular: ./static/formatHelp.html#tab
-.. _html: ./static/formatHelp.html#html
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool creates a link to the Database for Annotation,
-Visualization, and Integrated Discovery (DAVID) website at NIH,
-sending a list of IDs from the selected column of a tabular
-Galaxy dataset.  To follow the created link, click on the
-eye icon once the Galaxy tool has finished running.
-
-DAVID provides a comprehensive set of functional annotation tools
-to help investigators discover biological meaning behind large
-lists of genes.
-
------
-
-**References**
-
-Huang DW, Sherman BT, Lempicki RA. (2009) Systematic and integrative analysis
-of large gene lists using DAVID bioinformatics resources.
-Nat Protoc. 4(1):44-57.
-
-Dennis G, Sherman BT, Hosack DA, Yang J, Gao W, Lane HC, Lempicki RA. (2003)
-DAVID: database for annotation, visualization, and integrated discovery.
-Genome Biol. 4(5):P3. Epub 2003 Apr 3.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/linkToGProfile.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-###################################################
-# linkToGProfile.pl
-# Generates a link to gprofile for a list of gene IDs.
-# g:Profiler a web-based toolset for functional profiling of gene lists from large-scale experiments (2007) NAR 35 W193-W200
-###################################################
-
-if (!@ARGV or scalar @ARGV != 4) {
-   print "usage: linkToGProfile.pl infile.tab 1basedCol idType outfile\n";
-   exit 1;
-}
-
-my $in = shift @ARGV;
-my $col = shift @ARGV;
-my $type = shift @ARGV;
-my $out = shift @ARGV;
-
-if ($col < 1) {
-   print "ERROR the column number should be 1 based counting\n";
-   exit 1;
-}
-my @gene;
-open(FH, $in) or die "Couldn't open $in, $!\n";
-while (<FH>) {
-   chomp;
-   my @f = split(/\t/);
-   if (scalar @f < $col) {
-      print "ERROR there is no column $col in $in\n";
-      exit 1;
-   }
-   if ($f[$col-1]) { push(@gene, $f[$col-1]); }
-}
-close FH or die "Couldn't close $in, $!\n";
-
-my $link = 'http://biit.cs.ut.ee/gprofiler/index.cgi?organism=hsapiens&query=GENELIST&r_chr=1&r_start=start&r_end=end&analytical=1&domain_size_type=annotated&term=&significant=1&sort_by_structure=1&user_thr=1.00&output=png&prefix=TYPE';
-$link =~ s/TYPE/$type/;
-my $g = join("+", @gene);
-$link =~ s/GENELIST/$g/;
-#print output
-if (length $link > 2048) {
-   print "ERROR too many genes to fit in URL, please select a smaller set\n";
-   exit;
-}
-open(FH, ">", $out) or die "Couldn't open $out, $!\n";
-print FH "<html><head><title>g:Profiler link</title></head><body>\n",
-      '<A TARGET=_BLANK HREF="', $link, '">click here to send list of identifiers to g:Profiler</A>', "\n",
-      '</body></html>', "\n";
-close FH or die "Couldn't close $out, $!\n";
-
-#also do link that prints text that could be pulled back into Galaxy?
-exit;
--- a/tools/human_genome_variation/linkToGProfile.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-<tool id="hgv_linkToGProfile" name="g:Profiler" version="1.0.0">
-  <description>tools for functional profiling of gene lists</description>
-
-  <command interpreter="perl">
-    linkToGProfile.pl $input $numerical_column $type $out_file1
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Dataset" />
-    <param name="numerical_column" type="data_column" data_ref="input" numerical="True" label="Column with identifiers" />
-    <param name="type" label="Identifier type" type="select">
-      <option value="ENTREZGENE_ACC" selected="true">Entrez Gene Acc</option>
-      <option value="MIM_MORBID">OMIM Morbid Map</option>
-      <option value="MIM_GENE">OMIM Gene ID</option>
-      <option value="AFFY_HUGENE_1_0_ST_V1">AFFY_HUGENE_1_0_ST_V1</option>
-      <option value="HGNC_AUTOMATIC_GENE_ACC">HGNC_AUTOMATIC_GENE_ACC</option>
-      <option value="HGNC_MB001_ACC">HGNC_MB001_ACC</option>
-      <option value="HGNC_ACC">HGNC_ACC</option>
-      <option value="WIKIGENE_ACC">WIKIGENE_ACC</option>
-      <option value="DBASS5_ACC">DBASS5_ACC</option>
-      <option value="ILLUMINA_HUMANWG_6_V1">ILLUMINA_HUMANWG_6_V1</option>
-      <option value="AFFY_HUEX_1_0_ST_V2">AFFY_HUEX_1_0_ST_V2</option>
-      <option value="DBASS3_ACC">DBASS3_ACC</option>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="html" name="out_file1" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" ftype="tabular" value="linkToGProfile.tabular" />
-      <param name="numerical_column" value="2" />
-      <param name="type" value="ENTREZGENE_ACC" />
-      <output name="out_file1" file="linkToGProfile_1.out" />
-    </test>
-  </tests>
-
-  <help>
-**Dataset formats**
-
-The input dataset is tabular_ with a column of identifiers.
-The output dataset is html_ with a link to g:Profiler.
-(`Dataset missing?`_)
-
-.. _tabular: ./static/formatHelp.html#tab
-.. _html: ./static/formatHelp.html#html
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool creates a link to the g:GOSt tool (Gene Group Functional
-Profiling), which is part of the g:Profiler site at the University
-of Tartu in Estonia.  g:GOSt retrieves the most significant Gene
-Ontology (GO) terms, KEGG and REACTOME pathways, and TRANSFAC motifs
-for a user-specified group of genes, proteins, or microarray probes.
-g:GOSt also allows analysis of ranked or ordered lists of genes,
-visual browsing of GO graph structure, interactive visualization of
-retrieved results, and many other features.  Multiple testing
-corrections are applied to extract only statistically important
-results.
-
-The g:GOSt form is pre-filled with gene, protein, or microarray probe
-IDs from the selected column of a tabular Galaxy dataset.  To follow
-the created link, click on the eye icon when the Galaxy tool has
-finished running.  Once at the g:Profiler site, scroll down to see
-the g:GOSt results.  You can also adjust the options in the g:GOSt
-form to your liking, or use the row of links between the form and
-the results to run other g:Profiler tools using the same list of IDs.
-
------
-
-**Reference**
-
-Reimand J, Kull M, Peterson H, Hansen J, Vilo J. (2007) g:Profiler -- a web-based
-toolset for functional profiling of gene lists from large-scale experiments.
-Nucleic Acids Res. 35(Web Server issue):W193-200. Epub 2007 May 3.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/lped_to_geno.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-#convert from a MAP and PED file to a genotype file
-#assumes not many SNPs but lots of individuals
-# transposed formats are used when lots of SNPs (TPED, TFAM)
-
-if (!@ARGV or scalar @ARGV ne 2) {
-   print "usage: lped_to_geno.pl infile.map infile.ped > outfile\n";
-   exit;
-}
-
-my $map = shift @ARGV;
-my $ped = shift @ARGV;
-
-my @snp; #array to hold SNPs from map file
-open(FH, $map) or die "Couldn't open $map, $!\n";
-while (<FH>) {
-   chomp;
-   my @f = split(/\s+/); #3 or 4 columns
-   #chrom ID [distance|morgans] position
-   if (!exists $f[3]) { $f[3] = $f[2]; } #only 3 columns
-   #have to leave in so know which to skip later
-   #if ($f[3] < 0) { next; } #way of excluding SNPs
-   #if ($f[0] eq '0') { next; } #unplaced SNP
-   $f[0] = "chr$f[0]";
-   push(@snp, "$f[0]:$f[3]:$f[1]");
-}
-close FH or die "Couldn't finish $map, $!\n";
-
-#rows are individuals, columns are SNPs (7 & up)
-#need to print row per SNP
-my @allele; #alleles to go with @snp
-my @pheno;  #marker for phenotype
-open(FH, $ped) or die "Couldn't open $ped, $!\n";
-while (<FH>) {
-   chomp;
-   my @f = split(/\s+/);
-   if (!defined $f[5]) { die "ERROR undefined phenotype $f[0] $f[1] $f[2] $f[3] $f[4]\n"; }
-   push(@pheno, $f[5]);
-   my $j = 0;
-   for(my $i = 6; $i< $#f; $i+=2) {
-      if (!$allele[$j]) { $allele[$j] = ''; }
-      #can be ACTG or 1234 (for haploview etc) or 0 for missing
-      if ($f[$i] eq '1') { $f[$i] = 'A'; }
-      elsif ($f[$i] eq '2') { $f[$i] = 'C'; }
-      elsif ($f[$i] eq '3') { $f[$i] = 'G'; }
-      elsif ($f[$i] eq '4') { $f[$i] = 'T'; }
-      if ($f[$i+1] eq '1') { $f[$i+1] = 'A'; }
-      elsif ($f[$i+1] eq '2') { $f[$i+1] = 'C'; }
-      elsif ($f[$i+1] eq '3') { $f[$i+1] = 'G'; }
-      elsif ($f[$i+1] eq '4') { $f[$i+1] = 'T'; }
-      $f[$i] = uc($f[$i]);
-      $f[$i+1] = uc($f[$i+1]);
-      $allele[$j] .= " $f[$i]$f[$i+1]";
-      $j++;
-   }
-}
-close FH or die "Couldn't close $ped, $!\n";
-
-print "ID Chr Pos";
-foreach (@pheno) { if ($_ > 0) { print " ", $_ - 1; }} #go from 1/2 to 0/1
-print "\n";
-for(my $i =0; $i <= $#snp; $i++) { #foreach snp
-   $allele[$i] =~ /(\w)/;
-   my $nt = $1;
-   my $j = 0;
-   my @t = split(/:/, $snp[$i]);
-   if ($t[0] eq 'chr0' or $t[1] < 0) { next; } #skip this SNP
-   if ($t[0] eq 'chrX') { $t[0] = 'chr23'; }
-   elsif ($t[0] eq 'chrY') { $t[0] = 'chr24'; }
-   elsif ($t[0] eq 'chrXY') { $t[0] = 'chr23'; }
-   elsif ($t[0] eq 'chrMT') { $t[0] = 'chr25'; }
-   print "$t[2] $t[0] $t[1]";
-   $allele[$i] =~ s/^\s+//;
-   foreach my $p (split(/ +/, $allele[$i])) {
-      if ($pheno[$j] > 0) { #pheno 0 or -9 skip
-          #change AA BB AB to 2 0 1
-          if ($p eq "$nt$nt") { print " 2"; }
-          elsif ($p =~ /$nt/) { print " 1"; }
-          else { print " 0"; }
-      }
-      $j++;
-   }
-   print "\n";
-}
-
-exit;
--- a/tools/human_genome_variation/lps.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,304 +0,0 @@
-<tool id="hgv_lps" name="LPS" version="1.0.0">
-  <description>LASSO-Patternsearch algorithm</description>
-
-  <command interpreter="bash">
-    lps_tool_wrapper.sh $lambda_fac $input_file $label_column $output_file $log_file
-    Initialization 0
-    #if $advanced.options == "true":
-      Sample $advanced.sample
-      Verbosity $advanced.verbosity
-      Standardize $advanced.standardize
-      initialLambda $advanced.initialLambda
-      #if $advanced.continuation.continuation == "1":
-        Continuation $advanced.continuation.continuation
-        continuationSteps $advanced.continuation.continuationSteps
-        accurateIntermediates $advanced.continuation.accurateIntermediates
-      #end if
-      printFreq $advanced.printFreq
-      #if $advanced.newton.newton == "1":
-        Newton $advanced.newton.newton
-        NewtonThreshold $advanced.newton.newtonThreshold
-      #end if
-      HessianSampleFraction $advanced.hessianSampleFraction
-      BB 0
-      Monotone 0
-      FullGradient $advanced.fullGradient
-      GradientFraction $advanced.gradientFraction
-      InitialAlpha $advanced.initialAlpha
-      AlphaIncrease $advanced.alphaIncrease
-      AlphaDecrease $advanced.alphaDecrease
-      AlphaMax $advanced.alphaMax
-      c1 $advanced.c1
-      MaxIter $advanced.maxIter
-      StopTol $advanced.stopTol
-      IntermediateTol $advanced.intermediateTol
-      FinalOnly $advanced.finalOnly
-    #end if
-  </command>
-
-  <inputs>
-    <param name="input_file" type="data" format="tabular" label="Dataset"/>
-    <param name="label_column" type="data_column" data_ref="input_file" numerical="true" label="Label column" help="Column containing outcome labels: +1 or -1."/>
-    <param name="lambda_fac" label="Lambda_fac" type="float" value="0.03" help="Target value of the regularization parameter, expressed as a fraction of the calculated lambda_max.">
-      <validator type="in_range" message="0.00 &lt; lambda_fac &lt;= 1.00" min="0.00" max="1.00"/>
-    </param>
-    <conditional name="advanced">
-      <param name="options" type="select" label="Advanced Options">
-        <option value="false" selected="true">Hide advanced options</option>
-        <option value="true">Show advanced options</option>
-      </param>
-      <when value="false">
-        <!-- no options -->
-      </when>
-      <when value="true">
-        <!-- HARDCODED: 'Sample' we don't support passing an array -->
-        <param name="sample" type="float" value="1.0" label="Sample fraction" help="Sample this fraction of the data set.">
-          <validator type="in_range" message="0.0 &lt;= sample &lt;= 1.0" min="0.0" max="1.0"/>
-        </param>
-        <!-- HARDCODED: 'Initialization' = 0 :: Initialize at beta=0 -->
-        <param name="verbosity" type="select" format="integer" label="Verbosity">
-          <option value="0" selected="true">Little output</option>
-          <option value="1">More output</option>
-          <option value="2">Still more output</option>
-        </param>
-        <param name="standardize" type="select" format="integer" label="Standardize" help="Scales and shifts each column so that it has mean zero and variance 1.">
-          <option value="0" selected="true">Don't standardize</option>
-          <option value="1">Standardize</option>
-        </param>
-        <param name="initialLambda" type="float" value="0.8" label="Initial lambda" help="First value of lambda to be used in the continuation scheme, expressed as a fraction of lambda_max.">
-          <validator type="in_range" message="0.0 &lt; initialLambda &lt; 1.0" min="0.0" max="1.0"/>
-        </param>
-        <conditional name="continuation">
-          <param name="continuation" type="select" format="integer" label="Continuation" help="Use continuation strategy to start with a larger value of lambda, decreasing it successively to lambda_fac.">
-            <option value="0" selected="true">Don't use continuation</option>
-            <option value="1">Use continuation</option>
-          </param>
-          <when value="0">
-            <!-- no options -->
-          </when>
-          <when value="1">
-            <param name="continuationSteps" type="integer" value="5" label="Continuation steps" help="Number of lambda values to use in continuation &lt;em&gt;prior&lt;/em&gt; to target value lambda_fac."/>
-
-            <param name="accurateIntermediates" type="select" format="integer" label="Accurate intermediates" help="Indicates whether accurate solutions are required for lambda values other than the target value lambda_fac.">
-              <option value="0" selected="true">Don't need accurate intemediates</option>
-              <option value="1">Calculate accurate intermediates</option>
-            </param>
-          </when>
-        </conditional> <!-- name="continuation" -->
-        <param name="printFreq" type="integer" value="1" label="Print frequency" help="Print a progress report every NI iterations, where NI is the supplied value of this parameter.">
-          <validator type="in_range" message="printFreq &gt;= 1" min="1"/>
-        </param>
-        <conditional name="newton">
-          <param name="newton" type="select" format="integer" label="Projected Newton steps">
-            <option value="0" selected="true">No Newton steps</option>
-            <option value="1">Try projected Newton steps</option>
-          </param>
-          <when value="0">
-            <!-- no options -->
-          </when>
-          <when value="1">
-            <param name="newtonThreshold" type="integer" value="500" label="Newton threshold" help="Maximum size of free variable subvector for Newton."/>
-          </when>
-        </conditional>
-        <param name="hessianSampleFraction" type="float" value="1.0" label="Hessian sample fraction" help="Fraction of terms to use in approximate Hessian calculation.">
-          <validator type="in_range" message="0.01 &lt; hessianSampleFraction &lt;= 1.00" min="0.01" max="1.00"/>
-        </param>
-        <!-- HARDCODED: 'BB' = 0 :: don't use Barzilai-Borwein steps -->
-        <!-- HARDCODED: 'Monotone' = 0 :: don't force monotonicity -->
-        <param name="fullGradient" type="select" format="integer" label="Partial gradient vector selection">
-          <option value="0">Use randomly selected partial gradient, including current active components ("biased")</option>
-          <option value="1">Use full gradient vector at every step</option>
-          <option value="2">Randomly selected partial gradient, without regard to current active set ("unbiased")</option>
-        </param>
-        <param name="gradientFraction" type="float" value="0.1" label="Gradient fraction" help="Fraction of inactive gradient vector to evaluate.">
-          <validator type="in_range" message="0.0 &lt; gradientFraction &lt;= 1" min="0.0" max="1.0"/>
-        </param>
-        <param name="initialAlpha" type="float" value="1.0" label="Initial value of alpha"/>
-        <param name="alphaIncrease" type="float" value="2.0" label="Alpha increase" help="Factor by which to increase alpha after descent not obtained."/>
-        <param name="alphaDecrease" type="float" value="0.8" label="Alpha decrease" help="Factor by which to decrease alpha after successful first-order step."/>
-        <param name="alphaMax" type="float" value="1e12" label="Alpha max" help="Maximum value of alpha; terminate with error if we exceed this."/>
-        <param name="c1" type="float" value="1e-3" help="Parameter defining the margin by which the first-order step is required to decrease before being taken.">
-          <validator type="in_range" message="0.0 &lt; c1 &lt; 1.0" min="0.0" max="1.0"/>
-        </param>
-        <param name="maxIter" type="integer" value="10000" label="Maximum number of iterations" help="Terminate with error if we exceed this."/>
-        <param name="stopTol" type="float" value="1e-6" label="Stop tolerance" help="Convergence tolerance for target value of lambda."/>
-        <param name="intermediateTol" type="float" value="1e-4" label="Intermediate tolerance" help="Convergence tolerance for intermediate values of lambda."/>
-        <param name="finalOnly" type="select" format="integer" label="Final only">
-          <option value="0" selected="true">Return information for all intermediate values</option>
-          <option value="1">Just return information at the last lambda</option>
-        </param>
-      </when> <!-- value="advanced" -->
-    </conditional> <!-- name="advanced" -->
-  </inputs>
-
-  <outputs>
-    <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: results"/>
-    <data name="log_file" format="txt" label="${tool.name} on ${on_string}: log"/>
-  </outputs>
-
-  <requirements>
-    <requirement type="package">lps_tool</requirement>
-  </requirements>
-
-  <tests>
-    <test>
-      <param name="input_file" value="lps_arrhythmia.tabular"/>
-      <param name="label_column" value="280"/>
-      <param name="lambda_fac" value="0.03"/>
-      <param name="options" value="true"/>
-      <param name="sample" value="1.0"/>
-      <param name="verbosity" value="1"/>
-      <param name="standardize" value="0"/>
-      <param name="initialLambda" value="0.9"/>
-      <param name="continuation" value="1"/>
-      <param name="continuationSteps" value="10"/>
-      <param name="accurateIntermediates" value="0"/>
-      <param name="printFreq" value="1"/>
-      <param name="newton" value="1"/>
-      <param name="newtonThreshold" value="500"/>
-      <param name="hessianSampleFraction" value="1.0"/>
-      <param name="fullGradient" value="1"/>
-      <param name="gradientFraction" value="0.5"/>
-      <param name="initialAlpha" value="1.0"/>
-      <param name="alphaIncrease" value="2.0"/>
-      <param name="alphaDecrease" value="0.8"/>
-      <param name="alphaMax" value="1e12"/>
-      <param name="c1" value="1e-3"/>
-      <param name="maxIter" value="2500"/>
-      <param name="stopTol" value="1e-6"/>
-      <param name="intermediateTol" value="1e-6"/>
-      <param name="finalOnly" value="0"/>
-      <output name="ouput_file" file="lps_arrhythmia_beta.tabular"/>
-      <output name="log_file" file="lps_arrhythmia_log.txt"/>
-    </test>
-  </tests>
-
-  <help>
-**Dataset formats**
-
-The input and output datasets are tabular_.  The columns are described below.
-There is a second output dataset (a log) that is in text_ format.
-(`Dataset missing?`_)
-
-.. _tabular: ./static/formatHelp.html#tab
-.. _text: ./static/formatHelp.html#text
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-The LASSO-Patternsearch algorithm fits your dataset to an L1-regularized
-logistic regression model.  A benefit of using L1-regularization is
-that it typically yields a weight vector with relatively few non-zero
-coefficients.
-
-For example, say you have a dataset containing M rows (subjects)
-and N columns (attributes) where one of these N attributes is binary,
-indicating whether or not the subject has some property of interest P.
-In simple terms, LPS calculates a weight for each of the other attributes
-in your dataset.  This weight indicates how "relevant" that attribute
-is for predicting whether or not a given subject has property P.
-The L1-regularization causes most of these weights to be equal to zero,
-which means LPS will find a "small" subset of the remaining N-1 attributes
-in your dataset that can be used to predict P.
-
-In other words, LPS can be used for feature selection.
-
-The input dataset is tabular, and must contain a label column which
-indicates whether or not a given row has property P.  In the current
-version of this tool, P must be encoded using +1 and -1.  The Lambda_fac
-parameter ranges from 0 to 1, and controls how sparse the weight
-vector will be.  At the low end, when Lambda_fac = 0, there will be
-no regularization.  At the high end, when Lambda_fac = 1, there will be
-"too much" regularization, and all of the weights will equal zero.
-
-The LPS tool creates two output datasets.  The first, called the results
-file, is a tabular dataset containing one column of weights for each
-value of the regularization parameter lambda that was tried.  The weight
-columns are in order from left to right by decreasing values of lambda.
-The first N-1 rows in each column are the weights for the N-1 attributes
-in your input dataset.  The final row is a constant, the intercept.
-
-Let **x** be a row from your input dataset and let **b** be a column
-from the results file.  To compute the probability that row **x** has
-a label value of +1:
-
-  Probability(row **x** has label value = +1) = 1 / [1 + exp{**x** \* **b**\[1..N-1\] + **b**\[N\]}]
-
-where **x** \* **b**\[1..N-1\] represents matrix multiplication.
-
-The second output dataset, called the log file, is a text file which
-contains additional data about the fitted L1-regularized logistic
-regression model.  These data include the number of features, the
-computed value of lambda_max, the actual values of lambda used, the
-optimal values of the log-likelihood and regularized log-likelihood
-functions, the number of non-zeros, and the number of iterations.
-
-Website: http://pages.cs.wisc.edu/~swright/LPS/
-
------
-
-**Example**
-
-- input file::
-
-    +1   1   0   0   0   0   1   0   1   1   ...
-    +1   1   1   1   0   0   1   0   1   1   ...
-    +1   1   0   1   0   1   0   1   0   1   ...
-    etc.
-
-- output results file::
-
-    0
-    0
-    0
-    0
-    0.025541
-    etc.
-
-- output log file::
-
-    Data set has 100 vectors with 50 features.
-      calculateLambdaMax: n=50, m=100, m+=50, m-=50
-      computed value of lambda_max: 5.0000e-01
-
-    lambda=2.96e-02 solution:
-      optimal log-likelihood function value: 6.46e-01
-      optimal *regularized* log-likelihood function value: 6.79e-01
-      number of nonzeros at the optimum:      5
-      number of iterations required:     43
-    etc.
-
------
-
-**References**
-
-Koh K, Kim S-J, Boyd S. (2007)
-An interior-point method for large-scale l1-regularized logistic regression.
-Journal of Machine Learning Research. 8:1519-1555.
-
-Shi W, Wahba G, Wright S, Lee K, Klein R, Klein B. (2008)
-LASSO-Patternsearch algorithm with application to ophthalmology and genomic data.
-Stat Interface. 1(1):137-153.
-
-<!--
-Wright S, Novak R, Figueiredo M. (2009)
-Sparse reconstruction via separable approximation.
-IEEE Transactions on Signal Processing. 57:2479-2403.
-
-Shi J, Yin W, Osher S, Sajda P. (2010)
-A fast hybrid algorithm for large scale l1-regularized logistic regression.
-Journal of Machine Learning Research. 11:713-741.
-
-Byrd R, Chin G, Neveitt W, Nocedal J. (2010)
-On the use of stochastic Hessian information in unconstrained optimization.
-Technical Report. Northwestern University. June 16, 2010.
-
-Wright S. (2010)
-Accelerated block-coordinate relaxation for regularized optimization.
-Technical Report. University of Wisconsin. August 10, 2010.
--->
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/lps_tool_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#!/usr/bin/env bash
-# script for execution of deployed applications
-#
-# Sets up the MCR environment for the current $ARCH and executes
-# the specified command.
-#
-
-export PATH=$PATH:$(dirname $0)
-
-MCRROOT=${MCRROOT:-/galaxy/software/linux2.6-x86_64/bin/MCR-7.11/v711}
-MWE_ARCH=glnxa64
-
-if [ "$MWE_ARCH" = "sol64" ] ; then
-  LD_LIBRARY_PATH=.:/usr/lib/lwp:${MCRROOT}/runtime/glnxa64
-else
-  LD_LIBRARY_PATH=.:${MCRROOT}/runtime/glnxa64
-fi
-
-LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/bin/glnxa64
-LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/sys/os/glnxa64
-
-if [ "$MWE_ARCH" = "maci" -o "$MWE_ARCH" = "maci64" ]; then
-  DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/System/Library/Frameworks/JavaVM.framework/JavaVM:/System/Library/Frameworks/JavaVM.framework/Libraries
-else
-  MCRJRE=${MCRROOT}/sys/java/jre/glnxa64/jre/lib/amd64
-  LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRJRE}/native_threads
-  LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRJRE}/server
-  LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRJRE}/client
-  LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRJRE}
-fi
-
-XAPPLRESDIR=${MCRROOT}/X11/app-defaults
-
-export LD_LIBRARY_PATH XAPPLRESDIR
-
-lps_tool $*
-
-exit 0
--- a/tools/human_genome_variation/mergeSnps.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-#this merges the significance output with the SNPs so users get more than an index
-
-my($out, $snp) = @ARGV;
-
-if (!$out or !$snp) { die "missing args\n"; }
-
-#merge SNP data with results
-merge();
-
-exit;
-
-########################################
-
-#merge the input and output files so have SNP data with result
-sub merge {
-   open(FH, $out) or die "Couldn't open $out, $!\n";
-   my %res;
-   my @ind;
-   while (<FH>) {
-      chomp;
-      my $line = $_;
-      #0:      10 score= 14.224153 , df= 2 , p= 0.040760 , N=50
-      if ($line =~ /^(\d+):\s+(.*)/) { $res{$1} = $2; push(@ind, $1); }
-   }
-   close FH;
-   if (!@ind) { return; } #no results, leave alone
-   @ind = sort { $a <=> $b } @ind;
-   #read input file to get SNP data
-   open(FH, $snp) or die "Couldn't open $snp, $!\n";
-   my $i = 0; #0 based, not counting ID line
-   my $c = shift @ind;
-   while (<FH>) {
-      chomp;
-      if (/^ID/) { next; }
-      my @f = split(/\s+/);
-      if ($i == $c) {
-         $res{$i} = "$f[0]\t$f[1]\t$f[2]\t$res{$i}";
-         if (!@ind) { last; }
-         $c = shift @ind;
-      }
-      $i++;
-   }
-   close FH;
-   #now reprint results with SNP data included
-   open(FH, ">", $out) or die "Couldn't write to $out, $!\n";
-   print FH "ID\tchr\tposition\tresults\n";
-   foreach $i (keys %res) {
-      print FH $res{$i}, "\n";
-   }
-   close FH;
-}
-
--- a/tools/human_genome_variation/pagetag.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,297 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This accepts as input a file of the following format:
-
-    Site   Sample   Allele1   Allele2
-
-for example:
-
-    000834   D001    G       G
-    000834   D002    G       G
-    000834   D003    G       G
-    000834   D004    G       G
-    000834   D005    N       N
-    000834   E001    G       G
-    000834   E002    G       G
-    000834   E003    G       G
-    000834   E004    G       G
-    000834   E005    G       G
-    000963   D001    T       T
-    000963   D002    T       T
-    000963   D003    T       T
-    000963   D004    T       T
-    000963   D005    N       N
-    000963   E001    T       T
-    000963   E002    N       N
-    000963   E003    G       T
-    000963   E004    G       G
-    000963   E005    G       T
-
-and a rsquare threshold and outputs two files:
-
-a) a file of input snps (one on each line). A SNP is identified by the "Site"
-column in the input file
-
-b) a file where each line has the following:
-    SNP     list
-where SNP is one of  the SNPs and the "list" is a comma separated list of SNPs
-that exceed the rsquare threshold with the first SNP.
-"""
-
-from sys import argv, stderr, exit
-from getopt import getopt, GetoptError
-
-__author__ = "Aakrosh Ratan"
-__email__  = "ratan@bx.psu.edu"
-
-# do we want the debug information to be printed?
-debug_flag = False
-
-# denote different combos of alleles in code
-HOMC  = str(1)
-HOMR  = str(2)
-HETE  = str(3)
-OTHER = str(4)
-
-indexcalculator = {(HOMC,HOMC) : 0,
-                   (HOMC,HOMR) : 1,
-                   (HOMC,HETE) : 2,
-                   (HOMR,HOMC) : 3,
-                   (HOMR,HOMR) : 4,
-                   (HOMR,HETE) : 5,
-                   (HETE,HOMC) : 6,
-                   (HETE,HOMR) : 7,
-                   (HETE,HETE) : 8}
-
-def read_inputfile(filename, samples):
-    input = {}
-
-    file = open(filename, "r")
-
-    for line in file:
-        position,sample,allele1,allele2 = line.split()
-
-        # if the user specified a list of samples, then only use those samples
-        if samples != None and sample not in samples: continue
-
-        if position in input:
-            v = input[position]
-            v[sample] = (allele1,allele2)
-        else:
-            v = {sample : (allele1, allele2)}
-            input[position] = v
-
-    file.close()
-    return input
-
-def annotate_locus(input, minorallelefrequency, snpsfile):
-    locus = {}
-    for k,v in input.items():
-        genotypes = [x for x in v.values()]
-        alleles   = [y for x in genotypes for y in x]
-        alleleset = list(set(alleles))
-        alleleset = list(set(alleles) - set(["N","X"]))
-
-        if len(alleleset) == 2:
-            genotypevec = ""
-            num1 = len([x for x in alleles if x == alleleset[0]])
-            num2 = len([x for x in alleles if x == alleleset[1]])
-
-            if num1 > num2:
-                major = alleleset[0]
-                minor = alleleset[1]
-                minorfreq = (num2 * 1.0)/(num1 + num2)
-            else:
-                major = alleleset[1]
-                minor = alleleset[0]
-                minorfreq = (num1 * 1.0)/(num1 + num2)
-
-            if minorfreq < minorallelefrequency: continue
-
-            for gen in genotypes:
-                if gen == (major,major):
-                    genotypevec += HOMC
-                elif gen == (minor,minor):
-                    genotypevec += HOMR
-                elif gen == (major, minor) or gen == (minor, major):
-                    genotypevec += HETE
-                else:
-                    genotypevec += OTHER
-
-            locus[k] = genotypevec,minorfreq
-        elif len(alleleset) > 2:
-            print >> snpsfile, k
-    return locus
-
-def calculateLD(loci, rsqthreshold):
-    snps = list(loci)
-    rsquare = {}
-
-    for index,loc1 in enumerate(snps):
-        for loc2 in snps[index + 1:]:
-            matrix = [0]*9
-
-            vec1 = loci[loc1][0]
-            vec2 = loci[loc2][0]
-
-            for gen in zip(vec1,vec2):
-                if gen[0] == OTHER or gen[1] == OTHER: continue
-                matrix[indexcalculator[gen]] += 1
-
-            n   = sum(matrix)
-            x11 = 2*matrix[0] + matrix[2] + matrix[6]
-            x12 = 2*matrix[1] + matrix[2] + matrix[7]
-            x21 = 2*matrix[3] + matrix[6] + matrix[5]
-            x22 = 2*matrix[4] + matrix[6] + matrix[5]
-
-            p   = (x11 + x12 + matrix[8] * 1.0) / (2 * n)
-            q   = (x11 + x21 + matrix[8] * 1.0) / (2 * n)
-
-            p11    = p * q
-
-            oldp11 = p11
-            range  = 0.0
-            converged         = False
-            convergentcounter = 0
-            if p11 > 0.0:
-                while converged == False and convergentcounter < 100:
-                    if (1.0 - p - q + p11) != 0.0 and oldp11 != 0.0:
-                        num = matrix[8] * p11 * (1.0 - p - q + p11)
-                        den = p11 * (1.0 - p - q + p11) + (p - p11)*(q - p11)
-                        p11 = (x11 + (num/den))/(2.0*n)
-                        range = p11/oldp11
-                        if range >= 0.9999 and range <= 1.001:
-                            converged = True
-                        oldp11 = p11
-                        convergentcounter += 1
-                    else:
-                        converged = True
-
-            dvalue = 0.0
-            if converged == True:
-                dvalue = p11 - (p * q)
-
-            if dvalue != 0.0:
-                rsq = (dvalue**2)/(p*q*(1-p)*(1-q))
-                if rsq >= rsqthreshold:
-                    rsquare["%s %s" % (loc1,loc2)] = rsq
-
-    return rsquare
-
-def main(inputfile, snpsfile, neigborhoodfile, \
-         rsquare, minorallelefrequency, samples):
-    # read the input file
-    input = read_inputfile(inputfile, samples)
-    print >> stderr, "Read %d locations" % len(input)
-
-    # open the snpsfile to print
-    file = open(snpsfile, "w")
-
-    # annotate the inputs, remove the abnormal loci (which do not have 2 alleles
-    # and add the major and minor allele to each loci
-    loci = annotate_locus(input, minorallelefrequency, file)
-    print >> stderr, "Read %d interesting locations" % len(loci)
-
-    # print all the interesting loci as candidate snps
-    for k in loci.keys(): print >> file, k
-    file.close()
-    print >> stderr, "Finished creating the snpsfile"
-
-    # calculate the LD values and store it if it exceeds the threshold
-    lds = calculateLD(loci, rsquare)
-    print >> stderr, "Calculated all the LD values"
-
-    # create a list of SNPs
-    snps   = {}
-    ldvals = {}
-    for k,v in lds.items():
-        s1,s2 = k.split()
-        if s1 in snps: snps[s1].append(s2)
-        else         : snps[s1] = [s2]
-        if s2 in snps: snps[s2].append(s1)
-        else         : snps[s2] = [s1]
-
-        if s1 in ldvals: ldvals[s1].append(str(v))
-        else           : ldvals[s1] = [str(v)]
-        if s2 in ldvals: ldvals[s2].append(str(v))
-        else           : ldvals[s2] = [str(v)]
-
-    # print the snps to the output file
-    file = open(neigborhoodfile, "w")
-
-    for k,v in snps.items():
-        ldv = ldvals[k]
-        if debug_flag == True:
-            print >> file, "%s\t%s\t%s" % (k, ",".join(v), ",".join(ldv))
-        else:
-            print >> file, "%s\t%s" % (k, ",".join(v))
-
-    file.close()
-
-
-def read_list(filename):
-    file = open(filename, "r")
-    list = {}
-
-    for line in file:
-        list[line.strip()] = 1
-
-    file.close()
-    return list
-
-def usage():
-    f = stderr
-    print >> f, "usage:"
-    print >> f, "pagetag [options] input.txt snps.txt neighborhood.txt"
-    print >> f, "where input.txt is the prettybase file"
-    print >> f, "where snps.txt is the first output file with the snps"
-    print >> f, "where neighborhood.txt is the output neighborhood file"
-    print >> f, "where the options are:"
-    print >> f, "-h,--help : print usage and quit"
-    print >> f, "-d,--debug: print debug information"
-    print >> f, "-r,--rsquare: the rsquare threshold (default : 0.64)"
-    print >> f, "-f,--freq : the minimum MAF required (default: 0.0)"
-    print >> f, "-s,--sample : a list of samples to be clustered"
-
-if __name__ == "__main__":
-    try:
-        opts, args = getopt(argv[1:], "hds:r:f:",\
-                    ["help", "debug", "rsquare=","freq=", "sample="])
-    except GetoptError, err:
-        print str(err)
-        usage()
-        exit(2)
-
-    rsquare = 0.64
-    minorallelefrequency = 0.0
-    samples = None
-
-    for o, a in opts:
-        if o in ("-h", "--help"):
-            usage()
-            exit()
-        elif o in ("-d", "--debug"):
-            debug_flag = True
-        elif o in ("-r", "--rsquare"):
-            rsquare = float(a)
-        elif o in ("-f", "--freq"):
-            minorallelefrequency = float(a)
-        elif o in ("-s", "--sample"):
-            samples = read_list(a)
-        else:
-            assert False, "unhandled option"
-
-    if rsquare < 0.00 or rsquare > 1.00:
-        print >> stderr, "input value of rsquare should be in [0.00, 1.00]"
-        exit(3)
-
-    if minorallelefrequency < 0.0 or minorallelefrequency > 0.5:
-        print >> stderr, "input value of MAF should be (0.00,0.50]"
-        exit(4)
-
-    if len(args) != 3:
-        usage()
-        exit(5)
-
-    main(args[0], args[1], args[2], rsquare, minorallelefrequency, samples)
--- a/tools/human_genome_variation/pass.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-<tool id="hgv_pass" name="PASS" version="1.0.0">
-  <description>significant transcription factor binding sites from ChIP data</description>
-
-  <command interpreter="bash">
-    pass_wrapper.sh "$input" "$min_window" "$max_window" "$false_num" "$output"
-  </command>
-
-  <inputs>
-    <param format="gff" name="input" type="data" label="Dataset"/>
-    <param name="min_window" label="Smallest window size (by # of probes)" type="integer" value="2" />
-    <param name="max_window" label="Largest window size (by # of probes)" type="integer" value="6" />
-    <param name="false_num" label="Expected total number of false positive intervals to be called" type="float" value="5.0" help="N.B.: this is a &lt;em&gt;count&lt;/em&gt;, not a rate." />
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <requirements>
-    <requirement type="package">pass</requirement>
-    <requirement type="binary">sed</requirement>
-  </requirements>
-
-  <!-- we need to be able to set the seed for the random number generator
-  <tests>
-    <test>
-      <param name="input" ftype="gff" value="pass_input.gff"/>
-      <param name="min_window" value="2"/>
-      <param name="max_window" value="6"/>
-      <param name="false_num" value="5"/>
-      <output name="output" file="pass_output.tab"/>
-    </test>
-  </tests>
-  -->
-
-  <help>
-**Dataset formats**
-
-The input is in GFF_ format, and the output is tabular_.
-(`Dataset missing?`_)
-
-.. _GFF: ./static/formatHelp.html#gff
-.. _tabular: ./static/formatHelp.html#tab
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-PASS (Poisson Approximation for Statistical Significance) detects
-significant transcription factor binding sites in the genome from
-ChIP data.  This is probably the only peak-calling method that
-accurately controls the false-positive rate and FDR in ChIP data,
-which is important given the huge discrepancy in results obtained
-from different peak-calling algorithms.  At the same time, this
-method achieves a similar or better power than previous methods.
-
-<!-- we don't have wrapper support for the "prior" file yet
-Another unique feature of this method is that it allows varying
-thresholds to be used for peak calling at different genomic
-locations.  For example, if a position lies in an open chromatin
-region, is depleted of nucleosome positioning, or a co-binding
-protein has been detected within the neighborhood, then the position
-is more likely to be bound by the target protein of interest, and
-hence a lower threshold will be used to call significant peaks.
-As a result, weak but real binding sites can be detected.
--->
-
------
-
-**Hints**
-
-- ChIP-Seq data:
-
-  If the data is from ChIP-Seq, you need to convert the ChIP-Seq values
-  into z-scores before using this program.  It is also recommended that
-  you group read counts within a neighborhood together, e.g. in tiled
-  windows of 30bp.  In this way, the ChIP-Seq data will resemble
-  ChIP-chip data in format.
-
-- Choosing window size options:
-
-  The window size is related to the probe tiling density.  For example,
-  if the probes are tiled at every 100bp, then setting the smallest
-  window = 2 and largest window = 6 is appropriate, because the DNA
-  fragment size is around 300-500bp.
-
------
-
-**Example**
-
-- input file::
-
-    chr7  Nimblegen  ID  40307603  40307652  1.668944     .  .  .
-    chr7  Nimblegen  ID  40307703  40307752  0.8041307    .  .  .
-    chr7  Nimblegen  ID  40307808  40307865  -1.089931    .  .  .
-    chr7  Nimblegen  ID  40307920  40307969  1.055044     .  .  .
-    chr7  Nimblegen  ID  40308005  40308068  2.447853     .  .  .
-    chr7  Nimblegen  ID  40308125  40308174  0.1638694    .  .  .
-    chr7  Nimblegen  ID  40308223  40308275  -0.04796628  .  .  .
-    chr7  Nimblegen  ID  40308318  40308367  0.9335709    .  .  .
-    chr7  Nimblegen  ID  40308526  40308584  0.5143972    .  .  .
-    chr7  Nimblegen  ID  40308611  40308660  -1.089931    .  .  .
-    etc.
-
-  In GFF, a value of dot '.' is used to mean "not applicable".
-
-- output file::
-
-    ID  Chr   Start     End       WinSz  PeakValue  # of FPs  FDR
-    1   chr7  40310931  40311266  4      1.663446   0.248817  0.248817
-
------
-
-**References**
-
-Zhang Y. (2008)
-Poisson approximation for significance in genome-wide ChIP-chip tiling arrays.
-Bioinformatics. 24(24):2825-31. Epub 2008 Oct 25.
-
-Chen KB, Zhang Y. (2010)
-A varying threshold method for ChIP peak calling using multiple sources of information.
-Submitted.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/pass_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#!/usr/bin/env bash
-
-export PATH=$PATH:$(dirname $0)
-
-input=$1
-min_window=$2
-max_window=$3
-false_num=$4
-output=$5
-
-pass "$input" "$min_window" "$max_window" "$false_num" "$output" >/dev/null
-sed -i -e 's/\t\t*/\t/g' "$output"
-
--- a/tools/human_genome_variation/senatag.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,243 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This tool takes the following file pairs as input:
-a) input_snp  : A file with identifiers for SNPs (one on each line)
-b) ldfile     : A file where each line has  the following
-                snp     list
-                where "snp" is an identifier for one SNP and the "list" is a
-                comma separated list of all the other snps that are in LD with
-                it (as per some threshold of rsquare)
-
-The output is a set of tag SNPs for the given datasets
-
-The algorithm is as follows:
-
-a) Construct a graph for each population, where each node is a SNP and two nodes
-are connected using an edge iff they are in LD.
-b) For each SNP, count the total number of connected nodes, which have not yet
-been visited.
-c) Find the SNP with the highest count and assign it to be a tag SNP.
-d) Mark that SNP and all the snps connected to it as "visited". This should be
-done for each population.
-e) Continue steps b-e until all SNPs, in all populations have been visited.
-"""
-
-from sys import argv, stderr, exit
-from getopt import getopt, GetoptError
-
-import os
-import heapq
-
-__author__ = "Aakrosh Ratan"
-__email__  = "ratan@bx.psu.edu"
-
-# do we want the debug information to be printed?
-debug_flag = False
-
-class node:
-    def __init__(self, name):
-        self.name    = name
-        self.edges   = []
-        self.visited = False
-
-    # return the number of nodes connected to this node, that have yet to be
-    # visited
-    def num_not_visited(self):
-        num = 0
-        for n in self.edges:
-            if n.visited == False: num += 1
-        return num
-
-    def __cmp__(self, other):
-        return other.num_not_visited() - self.num_not_visited()
-
-    def __str__(self):
-        return self.name
-
-class graph:
-    def __init__(self):
-        self.nodes = {}
-
-    def __str__(self):
-        string = ""
-        for n1 in self.nodes.values():
-            n2s = [x.name for x in n1.edges]
-            string += "%s %s\n" % (n1.name, ",".join(n2s))
-        return string[:-1]
-
-    def add_node(self, n):
-        self.nodes[n.name] = n
-
-    def add_edges(self, n1, n2):
-        assert n1.name in self.nodes
-        assert n2.name in self.nodes
-        n1.edges.append(n2)
-        n2.edges.append(n1)
-
-    def check_graph(self):
-        for n in self.nodes.values():
-            ms = [x for x in n.edges]
-            for m in ms:
-                if n not in m.edges:
-                    print >> stderr, "check : %s - %s" % (n,m)
-
-def construct_graph(ldfile, snpfile):
-    # construct the initial graph. add all the SNPs as nodes
-    g = graph()
-    file = open(snpfile, "r")
-
-    for line in file:
-        # ignore empty lines and add the remainder to the graph
-        if len(line.strip()) == 0: continue
-        n = node(line.strip())
-        g.add_node(n)
-
-    file.close()
-    print >> stderr, "Added %d nodes to a graph" % len(g.nodes)
-
-    # now add all the edges
-    file   = open(ldfile, "r")
-
-    for line in file:
-        tokens = line.split()
-        assert len(tokens) == 2
-
-        # if this node is in the graph, then we need to construct an edge from
-        # this node to all the nodes which are highly related to it
-        if tokens[0] in g.nodes:
-            n1  = g.nodes[tokens[0]]
-            n2s = [g.nodes[x] for x in tokens[1].split(",")]
-
-            for n2 in n2s:
-                g.add_edges(n1, n2)
-
-    file.close()
-    print >> stderr, "Added all edges to the graph"
-
-    return g
-
-def check_output(g, tagsnps):
-    # find all the nodes in the graph
-    allsnps = [x.name for x in g.nodes.values()]
-
-    # find the nodes that are covered by our tagsnps
-    mysnps = [x.name for x in tagsnps]
-
-    for n in tagsnps:
-        for m in n.edges:
-                mysnps.append(m.name)
-
-    mysnps = list(set(mysnps))
-
-    if set(allsnps) != set(mysnps):
-        diff = list(set(allsnps) - set(mysnps))
-        print >> stderr, "%s are not covered" % ",".join(diff)
-
-def main(ldfile, snpsfile, required, excluded):
-    # construct the graph
-    g = construct_graph(ldfile, snpsfile)
-    if debug_flag == True: g.check_graph()
-
-    tagsnps   = []
-    neighbors = {}
-
-    # take care of the SNPs that are required to be TagSNPs
-    for s in required:
-        t = g.nodes[s]
-
-        t.visited = True
-        ns = []
-
-        for n in t.edges:
-            if n.visited == False: ns.append(n.name)
-            n.visited = True
-
-        tagsnps.append(t)
-        neighbors[t.name] = list(set(ns))
-
-    # find the tag SNPs for this graph
-    data = [x for x in g.nodes.values()]
-    heapq.heapify(data)
-
-    while data:
-        s = heapq.heappop(data)
-
-        if s.visited == True or s.name in excluded: continue
-
-        s.visited = True
-        ns = []
-
-        for n in s.edges:
-            if n.visited == False: ns.append(n.name)
-            n.visited = True
-
-        tagsnps.append(s)
-        neighbors[s.name] = list(set(ns))
-
-        heapq.heapify(data)
-
-    for s in tagsnps:
-        if len(neighbors[s.name]) > 0:
-            print "%s\t%s" % (s, ",".join(neighbors[s.name]))
-            continue
-        print s
-
-    if debug_flag == True: check_output(g, tagsnps)
-
-def read_list(filename):
-    assert os.path.exists(filename) == True
-    file = open(filename, "r")
-    list = {}
-
-    for line in file:
-        list[line.strip()] = 1
-
-    file.close()
-    return list
-
-def usage():
-    f = stderr
-    print >> f, "usage:"
-    print >> f, "senatag [options] neighborhood.txt inputsnps.txt"
-    print >> f, "where inputsnps.txt is a file of snps from one population"
-    print >> f, "where neighborhood.txt is neighborhood details for the pop."
-    print >> f, "where the options are:"
-    print >> f, "-h,--help : print usage and quit"
-    print >> f, "-d,--debug: print debug information"
-    print >> f, "-e,--excluded : file with names of SNPs that cannot be TagSNPs"
-    print >> f, "-r,--required : file with names of SNPs that should be TagSNPs"
-
-if __name__ == "__main__":
-    try:
-        opts, args = getopt(argv[1:], "hdr:e:",\
-                     ["help", "debug", "required=", "excluded="])
-    except GetoptError, err:
-        print str(err)
-        usage()
-        exit(2)
-
-    required = {}
-    excluded = {}
-
-    for o, a in opts:
-        if o in ("-h", "--help"):
-            usage()
-            exit()
-        elif o in ("-d", "--debug"):
-            debug_flag = True
-        elif o in ("-r", "--required"):
-            required = read_list(a)
-        elif o in ("-e", "--excluded"):
-            excluded = read_list(a)
-        else:
-            assert False, "unhandled option"
-
-    if len(args) != 2:
-        usage()
-        exit(3)
-
-    assert os.path.exists(args[0]) == True
-    assert os.path.exists(args[1]) == True
-
-    main(args[0], args[1], required, excluded)
--- a/tools/human_genome_variation/sift.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,174 +0,0 @@
-<tool id="hgv_sift" name="SIFT" version="1.0.0">
-  <description>predictions of functional sites</description>
-
-  <command interpreter="bash">
-    sift_variants_wrapper.sh "$input" "$output" "${input.metadata.dbkey}" "${GALAXY_DATA_INDEX_DIR}/sift_db.loc" "$chrom_col" "$pos_col" "$base" "$allele_col" "$strand_source.strand_col" "$comment_source.comment_col" "$output_opts"
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Dataset">
-      <validator type="unspecified_build"/>
-      <validator type="dataset_metadata_in_file" filename="sift_db.loc" metadata_name="dbkey" metadata_column="0" message="Data is currently not available for the specified build."/>
-    </param>
-    <param name="chrom_col"  type="data_column" data_ref="input" label="Column with chromosome"/>
-    <param name="pos_col"    type="data_column" data_ref="input" numerical="true" label="Column with position"/>
-    <param name="base" type="select" label="Position coordinates are">
-      <option value="1" selected="true">one-based</option>
-      <option value="0">zero-based</option>
-    </param>
-    <param name="allele_col" type="data_column" data_ref="input" label="Column with allele"/>
-    <conditional name="strand_source">
-      <param name="strand_choice" type="select" label="Strand info">
-        <option value="data_column" selected="true">a column in the dataset</option>
-        <option value="all_pos">all on sense/forward/+ strand</option>
-        <option value="all_neg">all on antisense/reverse/- strand</option>
-      </param>
-      <when value="data_column">
-        <param name="strand_col" type="data_column" data_ref="input" label="Column with strand"/>
-      </when>
-      <when value="all_pos">
-        <param name="strand_col" type="hidden" value="+"/>
-      </when>
-      <when value="all_neg">
-        <param name="strand_col" type="hidden" value="-"/>
-      </when>
-    </conditional>
-    <conditional name="comment_source">
-      <param name="comment_choice" type="select" label="Include comment column">
-        <option value="no" selected="true">no</option>
-        <option value="yes">yes</option>
-      </param>
-      <when value="no">
-        <param name="comment_col" type="hidden" value="-"/>
-      </when>
-      <when value="yes">
-        <param name="comment_col" type="data_column" data_ref="input" label="Column with comment"/>
-      </when>
-    </conditional>
-    <param name="output_opts" type="select" multiple="true" display="checkboxes" label="Include the following additional fields in the output">
-      <option value="A">Ensembl Gene ID</option>
-      <option value="B">Gene Name</option>
-      <option value="C">Gene Description</option>
-      <option value="D">Ensembl Protein Family ID</option>
-      <option value="E">Ensembl Protein Family Description</option>
-      <option value="F">Ensembl Transcript Status (Known / Novel)</option>
-      <option value="G">Protein Family Size</option>
-      <option value="H">Ka/Ks (Human-mouse)</option>
-      <option value="I">Ka/Ks (Human-macaque)</option>
-      <option value="J">OMIM Disease</option>
-      <option value="K">Allele Frequencies (All Hapmap Populations - weighted average)</option>
-      <option value="L">Allele Frequencies (CEU Hapmap population)</option>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <requirements>
-    <requirement type="binary">awk</requirement>
-    <requirement type="binary">rm</requirement>
-    <requirement type="binary">sed</requirement>
-  </requirements>
-
-  <tests>
-    <test>
-      <param name="input" value="sift_variants.tab" ftype="tabular" dbkey="hg18"/>
-      <param name="chrom_col" value="1"/>
-      <param name="pos_col" value="3"/>
-      <param name="base" value="1"/>
-      <param name="allele_col" value="5"/>
-      <param name="strand_choice" value="data_column"/>
-      <param name="strand_col" value="4"/>
-      <param name="output_opts" value="A"/>
-      <output name="output" file="sift_variants_result.tab"/>
-    </test>
-  </tests>
-
-  <help>
-.. class:: warningmark
-
-This currently works only for builds hg18 or hg19.
-
------
-
-**Dataset formats**
-
-The input and output datasets are tabular_.
-(`Dataset missing?`_)
-
-.. _tabular: ./static/formatHelp.html#tab
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-SIFT predicts whether an amino-acid substitution affects protein function,
-based on sequence homology and the physical properties of amino acids.
-SIFT can be applied to naturally occurring non-synonymous polymorphisms
-and laboratory-induced missense mutations.  This tool uses SQLite databases
-containing pre-computed SIFT scores and annotations for all possible nucleotide
-substitutions at each position in the human exome.  Allele frequency data
-are from the HapMap frequency database, and additional transcript and
-gene-level data are from Ensembl BioMart.
-
-The input dataset must contain columns for the chromosome, position, and
-alleles.  The alleles must be two nucleotides separated by '/',
-usually the reference allele and the allele of interest.
-The strand must either be in another column or all the same.
-The output contains a standard set of columns plus the additional ones that
-have been selected from the list above.
-
-Website: http://sift.jcvi.org/
-
------
-
-**Example**
-
-- input file::
-
-    chr3   81780820   +  T/C
-    chr2   230341630  +  G/A
-    chr2   43881517   +  A/T
-    chr2   43857514   +  T/C
-    chr6   88375602   +  G/A
-    chr22  29307353   -  T/A
-    chr10  115912482  -  G/T
-    chr10  115900918  -  C/T
-    chr16  69875502   +  G/T
-    etc.
-
-- output file::
-
-    #Chrom  Position   Strand  Allele  Codons   Transcript ID    Protein ID       Substitution  Region    dbSNP ID      SNP Type       Prediction  Score  Median Info  Num seqs at position  User Comment
-    chr3    81780820   +       T/C     AGA-gGA  ENST00000264326  ENSP00000264326  R190G         EXON CDS  rs2229519:C   Nonsynonymous  DAMAGING    0.04   3.06         149
-    chr2    230341630  +       G/T     -        ENST00000389045  ENSP00000373697  NA            EXON CDS  rs1803846:A   Unknown        Not scored  NA     NA           NA
-    chr2    43881517   +       A/T     ATA-tTA  ENST00000260605  ENSP00000260605  I230L         EXON CDS  rs11556157:T  Nonsynonymous  TOLERATED   0.47   3.19         7
-    chr2    43857514   +       T/C     TTT-TcT  ENST00000260605  ENSP00000260605  F33S          EXON CDS  rs2288709:C   Nonsynonymous  TOLERATED   0.61   3.33         6
-    chr6    88375602   +       G/A     GTT-aTT  ENST00000257789  ENSP00000257789  V217I         EXON CDS  rs2307389:A   Nonsynonymous  TOLERATED   0.75   3.17         13
-    chr22   29307353   +       T/A     ACC-tCC  ENST00000335214  ENSP00000334612  T264S         EXON CDS  rs42942:A     Nonsynonymous  TOLERATED   0.4    3.14         23
-    chr10   115912482  +       C/A     CGA-CtA  ENST00000369285  ENSP00000358291  R179L         EXON CDS  rs12782946:T  Nonsynonymous  TOLERATED   0.06   4.32         2
-    chr10   115900918  +       G/A     CAA-tAA  ENST00000369287  ENSP00000358293  Q271*         EXON CDS  rs7095762:T   Nonsynonymous  N/A         N/A    N/A          N/A
-    chr16   69875502   +       G/T     ACA-AaA  ENST00000338099  ENSP00000337512  T608K         EXON CDS  rs3096381:T   Nonsynonymous  TOLERATED   0.12   3.41         3
-    etc.
-
------
-
-**References**
-
-Ng PC, Henikoff S. (2001) Predicting deleterious amino acid substitutions.
-Genome Res. 11(5):863-74.
-
-Ng PC, Henikoff S. (2002) Accounting for human polymorphisms predicted to affect protein function.
-Genome Res. 12(3):436-46.
-
-Ng PC, Henikoff S. (2003) SIFT: Predicting amino acid changes that affect protein function.
-Nucleic Acids Res. 31(13):3812-4.
-
-Kumar P, Henikoff S, Ng PC. (2009) Predicting the effects of coding non-synonymous variants
-on protein function using the SIFT algorithm.
-Nat Protoc. 4(7):1073-81. Epub 2009 Jun 25.
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/sift_variants_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
-#!/usr/bin/env bash
-
-input_file=$1
-output_file=$2
-org=$3
-db_loc=$4
-chrom_col=$5
-pos_col=$6
-base=$7
-allele_col=$8
-strand_col=$9
-comment_col=${10}
-output_opts=${11}
-
-working_dir=$PWD
-sift_input="$working_dir/sift_input.txt"
-sift_output="$working_dir/sift_output.txt"
-
-################################################################################
-## make sure input file column selections are mutually exclusive              ##
-################################################################################
-ERROR=0
-declare -a col_use
-
-function check_col () {
-    local col=$1
-    local use=$2
-    local int=$3
-
-    if [ -n "${col//[0-9]}" ]; then
-        if [ $int -eq 1 ]; then
-            echo "ERROR: invalid value for $use column: $col" 1>&2
-            ERROR=1
-        fi
-        return
-    fi
-
-    local cur=${col_use[$col]}
-    if [ -n "$cur" ]; then
-        echo "ERROR: $use column is the same as $cur column" 1>&2
-        col_use[$col]="${cur},$use"
-        ERROR=1
-    else
-        col_use[$col]=$use
-    fi
-}
-
-check_col $chrom_col   'chromosome' 1
-check_col $pos_col     'position'   1
-check_col $allele_col  'allele'     1
-check_col $strand_col  'strand'     0
-check_col $comment_col 'comment'    0
-
-if [ $ERROR -ne 0 ]; then
-    exit 1
-fi
-
-################################################################################
-## get/check the db directory from the argument org,db_loc                    ##
-################################################################################
-db_dir=$( awk '$1 == org { print $2 }' org=$org $db_loc )
-
-if [ -z "$db_dir" ]; then
-    echo "Can't find dbkey \"$org\" in loc file \"$db_loc\"" 1>&2
-    exit 1
-fi
-
-if [ ! -d "$db_dir" ]; then
-    echo "Can't access SIFT database directory \"$db_dir\"" 1>&2
-    exit 1
-fi
-
-################################################################################
-## create input file for SIFT_exome_nssnvs.pl                                 ##
-################################################################################
-if [ ! -r "$input_file" ]; then
-    echo "Can't read input file \"$input_file\"" 1>&2
-    exit 1
-fi
-
-if [ $base -eq 0 ]; then
-    beg_col="$pos_col"
-    end_col="$pos_col + 1"
-    pos_adj='$2 = $2 - 1'
-else
-    beg_col="$pos_col - 1"
-    end_col="$pos_col"
-    pos_adj=''
-fi
-
-strand_cvt=''
-if [ \( "$strand_col" = "+" \) ]; then
-    strand='"1"'
-elif [ \( "$strand_col" = "-" \) ]; then
-    strand='"-1"'
-else
-    strand="\$$strand_col"
-    strand_cvt='if ('"${strand}"' == "+") {'"${strand}"' = "1"} else if ('"${strand}"' == "-") {'"${strand}"' = "-1"}'
-fi
-
-print_row='print $'"${chrom_col}"', $'"${beg_col}"', $'"${end_col}"', '"${strand}"', $'"${allele_col}"''
-if [ "$comment_col" != "-" ]; then
-    print_row=''"${print_row}"', $'"${comment_col}"''
-fi
-
-awk '
-BEGIN {FS="\t";OFS=","}
-$'"${chrom_col}"' ~ /^[cC][hH][rR]/ {$'"${chrom_col}"' = substr($'"${chrom_col}"',4)}
-{
-    '"${strand_cvt}"'
-    '"${print_row}"'
-}
-' "$input_file" > "$sift_input"
-
-################################################################################
-## run SIFT_exome_nssnvs.pl command line program                              ##
-################################################################################
-if [ "$output_opts" = "None" ]; then
-    output_opts=""
-else
-    output_opts=$( echo "$output_opts" | sed -e 's/,/ 1 -/g' )
-    output_opts="-$output_opts 1"
-fi
-
-SIFT_exome_nssnvs.pl -i "$sift_input" -d "$db_dir" -o "$working_dir" $output_opts &> "$sift_output"
-if [ $? -ne 0 ]; then
-    echo "failed: SIFT_exome_nssnvs.pl -i \"$sift_input\" -d \"$db_dir\" -o \"$working_dir\" $output_opts"
-    exit 1
-fi
-
-################################################################################
-## locate the SIFT_exome_nssnvs.pl output file                                ##
-################################################################################
-sift_pid=$( sed -n -e 's/^.*Your job id is \([0-9][0-9]*\) and is currently running.*$/\1/p' "$sift_output" )
-
-if [ -z "$sift_pid" ]; then
-    echo "Can't find SIFT pid in \"$sift_output\"" 1>&2
-    exit 1
-fi
-
-sift_outdir="$working_dir/$sift_pid"
-if [ ! -d "$sift_outdir" ]; then
-    echo "Can't access SIFT output directory \"$sift_outdir\"" 1>&2
-    exit 1
-fi
-
-sift_outfile="$sift_outdir/${sift_pid}_predictions.tsv"
-if [ ! -r "$sift_outfile" ]; then
-    echo "Can't access SIFT output file \"$sift_outfile\"" 1>&2
-    exit 1
-fi
-
-################################################################################
-## create galaxy output file                                                  ##
-################################################################################
-awk '
-BEGIN {FS="\t";OFS="\t"}
-NR == 1 {
-    $12 = "Num seqs at position"
-    $1 = "Chrom\tPosition\tStrand\tAllele"
-    print
-}
-NR != 1 {
-    $1 = "chr" $1
-    gsub(/,/, "\t", $1)
-    print
-}
-' "$sift_outfile" | awk '
-BEGIN {FS="\t";OFS="\t"}
-NR == 1 {
-    print "#" $0
-}
-NR != 1 {
-    if ($3 == "1") {$3 = "+"} else if ($3 == "-1") {$3 = "-"}
-    '"${pos_adj}"'
-    print
-}
-' > "$output_file"
-
-################################################################################
-## cleanup                                                                    ##
-################################################################################
-rm -rf "$sift_outdir" "$sift_input" "$sift_output"
-
--- a/tools/human_genome_variation/snpFreq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-<tool id="hgv_snpFreq" name="snpFreq" version="1.0.0">
-  <description>significant SNPs in case-control data</description>
-
-  <command interpreter="perl">
-    snpFreq2.pl $input $group1_1 $group1_2 $group1_3 $group2_1 $group2_2 $group2_3 0.05 $output
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Dataset" />
-    <param name="group1_1" label="Column with genotype 1 count for group 1" type="data_column" data_ref="input" />
-    <param name="group1_2" label="Column with genotype 2 count for group 1" type="data_column" data_ref="input" />
-    <param name="group1_3" label="Column with genotype 3 count for group 1" type="data_column" data_ref="input" />
-    <param name="group2_1" label="Column with genotype 1 count for group 2" type="data_column" data_ref="input" />
-    <param name="group2_2" label="Column with genotype 2 count for group 2" type="data_column" data_ref="input" />
-    <param name="group2_3" label="Column with genotype 3 count for group 2" type="data_column" data_ref="input" />
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <requirements>
-    <requirement type="binary">R</requirement>
-  </requirements>
-
-  <tests>
-    <test>
-      <param name="input" ftype="tabular" value="snpFreqInput.txt" dbkey="hg18" />
-      <param name="group1_1" value="4" />
-      <param name="group1_2" value="5" />
-      <param name="group1_3" value="6" />
-      <param name="group2_1" value="7" />
-      <param name="group2_2" value="8" />
-      <param name="group2_3" value="9" />
-      <output name="output" file="snpFreqTestOut.txt" />
-    </test>
-  </tests>
-
-  <help>
-
-**Dataset formats**
-
-The input is tabular_, with six columns of allele counts.  The output is also tabular,
-and includes all of the input data plus the additional columns described below.
-(`Dataset missing?`_)
-
-.. _tabular: ./static/formatHelp.html#tab
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool performs a basic analysis of bi-allelic SNPs in case-control
-data, using the R statistical environment and Fisher's exact test to
-identify SNPs with a significant difference in the allele frequencies
-between the two groups.  R's "qvalue" package is used to correct for
-multiple testing.
-
-The input file includes counts for each allele combination (AA aa Aa)
-for each group at each SNP position.  The assignment of codes (1 2 3)
-to these genotypes is arbitrary, as long as it is consistent for both
-groups.  Any other input columns are ignored in the computation, but
-are copied to the output.  The output appends eight additional columns,
-namely the minimum expected counts of the three genotypes for each
-group, the p-value, and the q-value.
-
------
-
-**Example**
-
-- input file::
-
-    chr1  210  211  38  4  15  56  0   1   x
-    chr1  228  229  55  0  2   56  0   1   x
-    chr1  230  231  46  0  11  55  0   2   x
-    chr1  234  235  43  0  14  55  0   2   x
-    chr1  236  237  55  0  2   13  10  34  x
-    chr1  437  438  55  0  2   46  0   11  x
-    chr1  439  440  56  0  1   55  0   2   x
-    chr1  449  450  56  0  1   13  20  24  x
-    chr1  518  519  56  0  1   38  4   15  x
-
-Here the group 1 genotype counts are in columns 4 - 6, while those
-for group 2 are in columns 7 - 9.
-
-Note that the "x" column has no meaning.  It was added to this example
-to show that extra columns can be included, and to make it easier
-to see where the new columns are appended in the output.
-
-- output file::
-
-    chr1  210  211  38  4  15  56  0   1   x  47    2   8     47    2   8     1.50219088598917e-05  6.32501425679652e-06
-    chr1  228  229  55  0  2   56  0   1   x  55.5  0   1.5   55.5  0   1.5   1                     0.210526315789474
-    chr1  230  231  46  0  11  55  0   2   x  50.5  0   6.5   50.5  0   6.5   0.0155644201009862    0.00409590002657532
-    chr1  234  235  43  0  14  55  0   2   x  49    0   8     49    0   8     0.00210854461554067   0.000739840215979182
-    chr1  236  237  55  0  2   13  10  34  x  34    5   18    34    5   18    6.14613878554783e-17  4.31307984950725e-17
-    chr1  437  438  55  0  2   46  0   11  x  50.5  0   6.5   50.5  0   6.5   0.0155644201009862    0.00409590002657532
-    chr1  439  440  56  0  1   55  0   2   x  55.5  0   1.5   55.5  0   1.5   1                     0.210526315789474
-    chr1  449  450  56  0  1   13  20  24  x  34.5  10  12.5  34.5  10  12.5  2.25757007974134e-18  2.37638955762246e-18
-    chr1  518  519  56  0  1   38  4   15  x  47    2   8     47    2   8     1.50219088598917e-05  6.32501425679652e-06
-
-  </help>
-</tool>
--- a/tools/human_genome_variation/snpFreq2.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-#expected input: path to file, cols of counts (2 sets of 3), threshold
-if (!@ARGV or scalar @ARGV != 9) {
-   print "usage snpFreq.pl /path/to/snps.txt <6 column numbers(1 based) with counts for alleles, first one group then another> #threshold outfile\n";
-   exit 1;
-}
-
-#get and verify inputs
-my $file = shift @ARGV;
-my $a1 = shift @ARGV;
-if ($a1 =~ /\D/ or $a1 < 1) {
-   print "Error the column number, must be an integer greater than or equal to 1. Got $a1\n";
-   exit 1;
-}
-my $a2 = shift @ARGV;
-if ($a2 =~ /\D/ or $a2 < 1) {
-   print "Error the column number, must be an integer greater than or equal to 1. Got $a2\n";
-   exit 1;
-}
-my $a3 = shift @ARGV;
-if ($a3 =~ /\D/ or $a3 < 1) {
-   print "Error the column number, must be an integer greater than or equal to 1. Got $a3\n";
-   exit 1;
-}
-my $b1 = shift @ARGV;
-if ($b1 =~ /\D/ or $b1 < 1) {
-   print "Error the column number, must be an integer greater than or equal to 1. Got $b1\n";
-   exit 1;
-}
-my $b2 = shift @ARGV;
-if ($b2 =~ /\D/ or $b2 < 1) {
-   print "Error the column number, must be an integer greater than or equal to 1. Got $b2\n";
-   exit 1;
-}
-my $b3 = shift @ARGV;
-if ($b3 =~ /\D/ or $b3 < 1) {
-   print "Error the column number, must be an integer greater than or equal to 1. Got $b3\n";
-   exit 1;
-}
-my $thresh = shift @ARGV;
-if ($thresh !~ /^\d*\.?\d+$/) {
-   print "Error the threshold must be a number. Got $thresh\n";
-   exit 1;
-}elsif ($thresh > .3) {
-   print "Error the threshold can not be greater than 0.3 got $thresh\n";
-   exit 1;
-}
-my $outfile = shift @ARGV;
-
-#run a fishers exact test (using R) on whole table
-my $cmd = qq|options(warn=-1)
-           tab <- read.table('$file', sep="\t")
-           size <- length(tab[,1])
-           width <- length(tab[1,])
-           x <- 1:size
-           y <- matrix(data=0, nr=size, nc=6)
-           for(i in 1:size) {
-              m <- matrix(c(tab[i,$a1], tab[i,$b1], tab[i,$a2], tab[i,$b2], tab[i,$a3], tab[i,$b3]), nrow=2)
-              t <- fisher.test(m)
-              x[i] <- t\$p.value
-              if (x[i] >= 1) {
-                  x[i] <- .999
-              }
-              n <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3] + tab[i,$b1] + tab[i,$b2] + tab[i,$b3])
-              n_a <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3])
-              y[i,1] <- ((tab[i,$a1] + tab[i,$b1])*(n_a))/n
-              y[i,1] <- round(y[i,1],3)
-              y[i,2] <- ((tab[i,$a2] + tab[i,$b2])*(n_a))/n
-              y[i,2] <- round(y[i,2],3)
-              y[i,3] <- ((tab[i,$a3] + tab[i,$b3])*(n_a))/n
-              y[i,3] <- round(y[i,3],3)
-              n_b <- (tab[i,$b1] + tab[i,$b2] + tab[i,$b3])
-              y[i,4] <- ((tab[i,$a1] + tab[i,$b1])*(n_b))/n
-              y[i,4] <- round(y[i,4],3)
-              y[i,5] <- ((tab[i,$a2] + tab[i,$b2])*(n_b))/n
-              y[i,5] <- round(y[i,5],3)
-              y[i,6] <- ((tab[i,$a3] + tab[i,$b3])*(n_b))/n
-              y[i,6] <- round(y[i,6],3)
-           }|;
-           #results <- data.frame(tab[1:size,1:width], x[1:size])
-           #write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t")
-           #q()|;
-
-my $cmd2 = qq|suppressPackageStartupMessages(library(qvalue))
-              qobj <- qvalue(x[1:size], lambda=seq(0,0.90,$thresh), pi0.method="bootstrap", fdr.level=0.1, robust=FALSE, smooth.log.pi0 = FALSE)
-              q <- qobj\$qvalues
-              results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size], q[1:size])
-              write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t")
-              q()|;
-
-#for TESTING
-my $pr = qq|results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size])
-            write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t")
-              q()|;
-
-open(FT, "| R --slave --vanilla")
-   or die "Couldn't call fisher.text, $!\n";
-print FT $cmd, "\n"; #fisher test
-print FT $cmd2, "\n"; #qvalues and results
-#print FT $pr, "\n";
-close FT or die "Couldn't finish fisher.test, $!\n";
-
-exit;
--- a/tools/hyphy/hyphy_branch_lengths_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#Dan Blankenberg
-#takes commandline tree def and input multiple fasta alignment file and runs the branch length ananlysis
-import os, sys
-from galaxy import eggs
-from galaxy.tools.util import hyphy_util
-
-#Retrieve hyphy path, this will need to be the same across the cluster
-tool_data = sys.argv.pop()
-HYPHY_PATH = os.path.join( tool_data, "HYPHY" )
-HYPHY_EXECUTABLE = os.path.join( HYPHY_PATH, "HYPHY" )
-
-#Read command line arguments
-input_filename = os.path.abspath(sys.argv[1].strip())
-output_filename = os.path.abspath(sys.argv[2].strip())
-tree_contents = sys.argv[3].strip()
-nuc_model = sys.argv[4].strip()
-base_freq = sys.argv[5].strip()
-model_options = sys.argv[6].strip()
-
-#Set up Temporary files for hyphy run
-#set up tree file
-tree_filename = hyphy_util.get_filled_temp_filename(tree_contents)
-
-#Guess if this is a single or multiple FASTA input file
-found_blank = False
-is_multiple = False
-for line in open(input_filename):
-    line = line.strip()
-    if line == "": found_blank = True
-    elif line.startswith(">") and found_blank:
-        is_multiple = True
-        break
-    else: found_blank = False
-
-#set up BranchLengths file
-BranchLengths_filename = hyphy_util.get_filled_temp_filename(hyphy_util.BranchLengths)
-if is_multiple:
-    os.unlink(BranchLengths_filename)
-    BranchLengths_filename = hyphy_util.get_filled_temp_filename(hyphy_util.BranchLengthsMF)
-    print "Multiple Alignment Analyses"
-else: print "Single Alignment Analyses"
-
-#setup Config file
-config_filename = hyphy_util.get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename)
-
-#Run Hyphy
-hyphy_cmd = "%s BASEPATH=%s USEPATH=/dev/null %s" % (HYPHY_EXECUTABLE, HYPHY_PATH, config_filename)
-hyphy = os.popen(hyphy_cmd, 'r')
-#print hyphy.read()
-hyphy.close()
-
-#remove temporary files
-os.unlink(BranchLengths_filename)
-os.unlink(tree_filename)
-os.unlink(config_filename)
--- a/tools/hyphy/hyphy_branch_lengths_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-<?xml version="1.2.1"?>
-<tool name="Branch Lengths" id="hyphy_branch_lengths_wrapper1">
-
-	<description>Estimation</description>
-
-	<command interpreter="python">hyphy_branch_lengths_wrapper.py $input1 $out_file1 "$tree" "$model" "$base_freq" "Global" ${GALAXY_DATA_INDEX_DIR}</command>
-
-    <inputs>
-        <page>
-            <param format="fasta" name="input1" type="data" label="Fasta file"/>
-            <param name="tree" type="text" label="Tree Definition" size="20" help="For example: ((hg17,panTro1),(mm5,rn3),canFam1)"/>
-            <param name="model" type="select" label="Substitution Model">
-      	        <option value="000000">F81</option>
-                <option value="010010">HKY85</option>
-                <option value="012345">REV</option>
-            </param>
-<!--            <param name="model_options" type="select" label="Model Options">
-      	        <option value="Local">All model parameters are estimated independently for each branch</option>
-                <option value="Global">Model parameters are shared by all branches, branch lengths are estimated independently</option>
-                <option value="Global w/variation">Model parameters are shared by all branches, branch lengths come from a user-chosen distribution, whose parameters are estimated</option>
-                <option value="Global w/variation+HM">Model parameters are shared by all branches, branch lengths come from a user-chosen distribution, whose parameters is estimated; rates at adjacent sites are correlated via a simple Hidden Markov model with an autocorrelation parameter lambda</option>
-            </param> -->
-            <param name="base_freq" type="select" label="Base Frequencies">
-      	        <option value="Observed">Nucleotide frequencies collected from the data file will be used as equilibrium frequencies</option>
-                <option value="Equal">Equal (.25) frequencies are used as equilibrium frequencies</option>
-            </param>
-        </page>
-    </inputs>
-	<outputs>
-		<data name="out_file1" format="tabular" />
-	</outputs>
-    <tests>
-      <test>
-        <param name="input1" value="branchlength_in.fasta"/>
-        <param name="tree" value="((hg17,panTro1),(mm5,rn3),canFam1)"/>
-        <param name="model" value="012345"/>
-        <param name="base_freq" value="Observed"/>
-        <output name="out_file1" file="branchlength_out.tabular"/>
-      </test>
-    </tests>
-	<help>
-This tool takes a single or multiple FASTA alignment file and estimates branch lengths using HYPHY_, a maximum likelihood analyses package.
-
-For the tree definition, you only need to specify the species build names. For example, you could use the tree *((hg17,panTro1),(mm5,rn3),canFam1)*, if your FASTA file looks like this::
-
-    &gt;hg17.chr7(+):26907301-26907310|hg17_0
-    GTGGGAGGT
-    &gt;panTro1.chr6(+):28037319-28037328|panTro1_0
-    GTGGGAGGT
-    &gt;mm5.chr6(+):52104022-52104031|mm5_0
-    GTGGGAGGT
-    &gt;rn3.chr4(+):80734395-80734404|rn3_0
-    GTGGGAGGT
-    &gt;canFam1.chr14(+):42826409-42826418|canFam1_0
-    GTGGGAGGT
-
-    &gt;hg17.chr7(+):26907310-26907326|hg17_1
-    AGTCAGAGTGTCTGAG
-    &gt;panTro1.chr6(+):28037328-28037344|panTro1_1
-    AGTCAGAGTGTCTGAG
-    &gt;mm5.chr6(+):52104031-52104047|mm5_1
-    AGTCAGAGTGTCTGAG
-    &gt;rn3.chr4(+):80734404-80734420|rn3_1
-    AGTCAGAGTATCTGAG
-    &gt;canFam1.chr14(+):42826418-42826434|canFam1_1
-    AGTCAGAGTGTCTGAG
-
-    &gt;hg17.chr7(+):26907326-26907338|hg17_2
-    GTAGAAGACCCC
-    &gt;panTro1.chr6(+):28037344-28037356|panTro1_2
-    GTAGAAGACCCC
-    &gt;mm5.chr6(+):52104047-52104059|mm5_2
-    GTAGACGATGCC
-    &gt;rn3.chr4(+):80734420-80734432|rn3_2
-    GTAGATGATGCG
-    &gt;canFam1.chr14(+):42826434-42826446|canFam1_2
-    GTAGAAGACCCC
-
-    &gt;hg17.chr7(+):26907338-26907654|hg17_3
-    GGGGAAGGAACGCAGGGCGAAGAGCTGGACTTCTCTGAGGAT---TCCTCGGCCTTCTCGT-----CGTTTCCTGG----CGGGGTGGCCGGAGAGATGGGCAAGAGACCCTCCTTCTCACGTTTCTTTTGCTTCATTCGGCGGTTCTGGAACCAGATCTTCACTTGGGTCTCGTTGAGCTGCAGGGATGCAGCGATCTCCACCCTGCGGGCGCGCGTCAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGTTCCGTGAGCTGCTTGGTAGTGAAGTTGGTGCGCACCGCGTTGGGTTGACCCAGGTAGCCGTACTCTCCAACTTTCC
-    &gt;panTro1.chr6(+):28037356-28037672|panTro1_3
-    GGGGAAGGAACGCAGGGCGAAGAGCTGGACTTCTCTGAGGAT---TCCTCGGCCTTCTCGT-----CGTTTCCTGG----CGGGGTGGCCGGAGAGATGGGCAAGAGACCCTCCTTCTCACGTTTCTTTTGCTTCATTCGGCGGTTCTGGAACCAGATCTTCACTTGGGTCTCGTTGAGCTGCAGGGATGCAGCGATCTCCACCCTGCGGGCGCGCGTCAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGTTCCGTGAGCTGCTTGGTAGTGAAGTTGGTGCGCACCGCGTTGGGTTGACCCAGGTAGCCGTACTCTCCAACTTTCC
-    &gt;mm5.chr6(+):52104059-52104375|mm5_3
-    GGAGAAGGGGCACTGGGCGAGGGGCTAGATTTCTCAGATGAT---TCTTCCGTTTTCTCAT-----CGCTGCCAGG----AGGAGTGGCAGGGGAGATGGGCAGGAGCCCCTCCTTCTCACGCTTCTTCTGCTTCATGCGGCGATTCTGGAACCAGATCTTCACCTGGGTCTCATTGAGCTGTAGGGACGCGGCAATCTCCACCCTGCGCGCTCGTGTAAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGCTCTGTGAGCTGCTTGGTGGTGAAATTGGTGCGCACTGCGTTGGGTTGACCCACGTAGCCGTACTCTCCAACTTTCC
-    &gt;rn3.chr4(+):80734432-80734748|rn3_3
-    GGAGAAGGGGCGCTGGGCGAGGAGCTGGATTTCTCAGATGAT---TCTTCAGTTTTCTCAT-----CGCTTCCAGG----AGGGGTGGCGGGTGAAATGGGCAAGAGCCCCTCTTTCTCGCGCTTCTTCTGCTTCATGCGGCGATTCTGGAACCAGATCTTCACCTGGGTCTCATTGAGTTGCAGGGACGCGGCTATCTCCACCCTGCGGGCTCTTGTTAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGCTCTGTGAGCTGCTTGGTGGTGAAGTTGGTGCGCACTGCGTTGGGTTGACCCACGTAGCCATACTCTCCAACTTTCC
-    &gt;canFam1.chr14(+):42826446-42826762|canFam1_3
-    GGAGACGGAATGCAGGGCGAGGAGCTGGATTTCTCTGAAGAT---TCCTCCGCCTTCTCCT-----CACTTCCTGG----CGGGGTGGCAGGGGAGATGGGCAAAAGGCCCTCTTTCTCTCGTTTCTTCTGCTTCATCCGGCGGTTCTGGAACCAGATCTTCACCTGGGTCTCGTTGAGCTGCAGGGATGCTGCGATCTCCACCCTGCGGGCGCGGGTCAGATACTTATTGAAGTGGAACTCCTTTTCCAGCTCGGTGAGCTGCTTGGTGGTGAAGTTGGTACGCACTGCATTCGGTTGACCCACGTAGCCGTACTCTCCAACTTTCC
-
-
-
-.. _HYPHY: http://www.hyphy.org
-	</help>
-</tool>
-
--- a/tools/hyphy/hyphy_dnds_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#Guru
-#takes fasta alignments, a distance metric and builds neighbor joining trees
-import os, sys
-from galaxy import eggs
-from galaxy.tools.util import hyphy_util
-
-#Retrieve hyphy path, this will need to be the same across the cluster
-tool_data = sys.argv.pop()
-HYPHY_PATH = os.path.join( tool_data, "HYPHY" )
-HYPHY_EXECUTABLE = os.path.join( HYPHY_PATH, "HYPHY" )
-
-#Read command line arguments
-input_filename = os.path.abspath(sys.argv[1].strip())
-output_filename = os.path.abspath(sys.argv[2].strip())
-tree_contents = sys.argv[3].strip()
-nuc_model = sys.argv[4].strip()
-analysis = sys.argv[5].strip()
-
-if tree_contents == "":
-    print >> sys.stderr, "Please specify a valid tree definition."
-    sys.exit()
-
-tree_filename = hyphy_util.get_filled_temp_filename(tree_contents)
-
-if analysis == "local":
-    fitter_filename = hyphy_util.get_filled_temp_filename(hyphy_util.SimpleLocalFitter)
-else:
-    fitter_filename = hyphy_util.get_filled_temp_filename(hyphy_util.SimpleGlobalFitter)
-
-tabwriter_filename = hyphy_util.get_filled_temp_filename(hyphy_util.TabWriter)
-FastaReader_filename = hyphy_util.get_filled_temp_filename(hyphy_util.FastaReader)
-#setup Config file
-config_filename = hyphy_util.get_dnds_config_filename(fitter_filename, tabwriter_filename, "Universal", tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename)
-
-#Run Hyphy
-hyphy_cmd = "%s BASEPATH=%s USEPATH=/dev/null %s" % (HYPHY_EXECUTABLE, HYPHY_PATH, config_filename)
-hyphy = os.popen(hyphy_cmd, 'r')
-#print hyphy.read()
-hyphy.close()
-
-#remove temporary files
-os.unlink(fitter_filename)
-os.unlink(tabwriter_filename)
-os.unlink(tree_filename)
-os.unlink(FastaReader_filename)
-os.unlink(config_filename)
-
-if nuc_model == "000000":
-    model = "F81"
-elif nuc_model == "010010":
-    model = "HKY85"
-else:
-    model = "REV"
-
-print "Analysis: %s; Model: %s; Tree: %s" %(analysis, model, tree_contents)
--- a/tools/hyphy/hyphy_dnds_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-<?xml version="1.1.1"?>
-<tool name="dN/dS Ratio" id="hyphy_dnds_wrapper1">
-
-	<description>Estimation</description>
-
-	<command interpreter="python">hyphy_dnds_wrapper.py $input1 $out_file1 "$tree" "$model" $analysis ${GALAXY_DATA_INDEX_DIR}</command>
-
-    <inputs>
-        <page>
-            <param format="fasta" name="input1" type="data" label="Fasta file"/>
-            <param name="analysis" type="select" label="Analysis to run on every gene">
-      	    	<option value="global">Global</option>
-      	        <option value="local">Local</option>
-            </param>
-            <param name="tree" type="text" label="Tree Definition in Newick format" size="20" help="For example: ((hg17,panTro1),(mm5,rn3),canFam1)"/>
-            <param name="model" type="select" label="Substitution Model">
-      	        <option value="000000">F81</option>
-                <option value="010010">HKY85</option>
-                <option value="012345">REV</option>
-            </param>
-        </page>
-    </inputs>
-	<outputs>
-		<data name="out_file1" format="tabular" />
-	</outputs>
-    <tests>
-      <test>
-        <param name="input1" value="dnds_inp.fasta"/>
-        <param name="tree" value="((human, chimp), mouse)"/>
-        <param name="model" value="000000"/>
-        <param name="analysis" value="global"/>
-        <output name="out_file1" file="dnds_out.tabular"/>
-      </test>
-    </tests>
-	<help>
-
-.. class:: infomark
-
-This tool takes a FASTA alignment file and estimates dN/dS ratio using HYPHY_, a maximum likelihood analyses package.
-
------
-
-.. class:: warningmark
-
-The tool returns an error message if no tree definition or an invalid tree definition is supplied.
-Any block/s not containing as many species as mentioned in the tree definition will be omitted from the output.
-
------
-
-For the tree definition, you only need to specify the species build names. For example, you could use the tree *(hg17,panTro1),(mm5,rn3),canFam1)*, if your FASTA file looks like the example below. You may also use **Neighbor Joining Tree Builder** tool to obtain the tree definition::
-
-    &gt;hg17.chr7(+):26907301-26907310|hg17_0
-    GTGGGAGGT
-    &gt;panTro1.chr6(+):28037319-28037328|panTro1_0
-    GTGGGAGGT
-    &gt;mm5.chr6(+):52104022-52104031|mm5_0
-    GTGGGAGGT
-    &gt;rn3.chr4(+):80734395-80734404|rn3_0
-    GTGGGAGGT
-    &gt;canFam1.chr14(+):42826409-42826418|canFam1_0
-    GTGGGAGGT
-
-    &gt;hg17.chr7(+):26907310-26907326|hg17_1
-    AGTCAGAGTGTCTGAG
-    &gt;panTro1.chr6(+):28037328-28037344|panTro1_1
-    AGTCAGAGTGTCTGAG
-    &gt;mm5.chr6(+):52104031-52104047|mm5_1
-    AGTCAGAGTGTCTGAG
-    &gt;rn3.chr4(+):80734404-80734420|rn3_1
-    AGTCAGAGTATCTGAG
-    &gt;canFam1.chr14(+):42826418-42826434|canFam1_1
-    AGTCAGAGTGTCTGAG
-
-    &gt;hg17.chr7(+):26907326-26907338|hg17_2
-    GTAGAAGACCCC
-    &gt;panTro1.chr6(+):28037344-28037356|panTro1_2
-    GTAGAAGACCCC
-    &gt;mm5.chr6(+):52104047-52104059|mm5_2
-    GTAGACGATGCC
-    &gt;rn3.chr4(+):80734420-80734432|rn3_2
-    GTAGATGATGCG
-    &gt;canFam1.chr14(+):42826434-42826446|canFam1_2
-    GTAGAAGACCCC
-
-    &gt;hg17.chr7(+):26907338-26907654|hg17_3
-    GGGGAAGGAACGCAGGGCGAAGAGCTGGACTTCTCTGAGGAT---TCCTCGGCCTTCTCGT-----CGTTTCCTGG----CGGGGTGGCCGGAGAGATGGGCAAGAGACCCTCCTTCTCACGTTTCTTTTGCTTCATTCGGCGGTTCTGGAACCAGATCTTCACTTGGGTCTCGTTGAGCTGCAGGGATGCAGCGATCTCCACCCTGCGGGCGCGCGTCAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGTTCCGTGAGCTGCTTGGTAGTGAAGTTGGTGCGCACCGCGTTGGGTTGACCCAGGTAGCCGTACTCTCCAACTTTCC
-    &gt;panTro1.chr6(+):28037356-28037672|panTro1_3
-    GGGGAAGGAACGCAGGGCGAAGAGCTGGACTTCTCTGAGGAT---TCCTCGGCCTTCTCGT-----CGTTTCCTGG----CGGGGTGGCCGGAGAGATGGGCAAGAGACCCTCCTTCTCACGTTTCTTTTGCTTCATTCGGCGGTTCTGGAACCAGATCTTCACTTGGGTCTCGTTGAGCTGCAGGGATGCAGCGATCTCCACCCTGCGGGCGCGCGTCAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGTTCCGTGAGCTGCTTGGTAGTGAAGTTGGTGCGCACCGCGTTGGGTTGACCCAGGTAGCCGTACTCTCCAACTTTCC
-    &gt;mm5.chr6(+):52104059-52104375|mm5_3
-    GGAGAAGGGGCACTGGGCGAGGGGCTAGATTTCTCAGATGAT---TCTTCCGTTTTCTCAT-----CGCTGCCAGG----AGGAGTGGCAGGGGAGATGGGCAGGAGCCCCTCCTTCTCACGCTTCTTCTGCTTCATGCGGCGATTCTGGAACCAGATCTTCACCTGGGTCTCATTGAGCTGTAGGGACGCGGCAATCTCCACCCTGCGCGCTCGTGTAAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGCTCTGTGAGCTGCTTGGTGGTGAAATTGGTGCGCACTGCGTTGGGTTGACCCACGTAGCCGTACTCTCCAACTTTCC
-    &gt;rn3.chr4(+):80734432-80734748|rn3_3
-    GGAGAAGGGGCGCTGGGCGAGGAGCTGGATTTCTCAGATGAT---TCTTCAGTTTTCTCAT-----CGCTTCCAGG----AGGGGTGGCGGGTGAAATGGGCAAGAGCCCCTCTTTCTCGCGCTTCTTCTGCTTCATGCGGCGATTCTGGAACCAGATCTTCACCTGGGTCTCATTGAGTTGCAGGGACGCGGCTATCTCCACCCTGCGGGCTCTTGTTAGGTACTTGTTGAAGTGGAACTCCTTCTCCAGCTCTGTGAGCTGCTTGGTGGTGAAGTTGGTGCGCACTGCGTTGGGTTGACCCACGTAGCCATACTCTCCAACTTTCC
-    &gt;canFam1.chr14(+):42826446-42826762|canFam1_3
-    GGAGACGGAATGCAGGGCGAGGAGCTGGATTTCTCTGAAGAT---TCCTCCGCCTTCTCCT-----CACTTCCTGG----CGGGGTGGCAGGGGAGATGGGCAAAAGGCCCTCTTTCTCTCGTTTCTTCTGCTTCATCCGGCGGTTCTGGAACCAGATCTTCACCTGGGTCTCGTTGAGCTGCAGGGATGCTGCGATCTCCACCCTGCGGGCGCGGGTCAGATACTTATTGAAGTGGAACTCCTTTTCCAGCTCGGTGAGCTGCTTGGTGGTGAAGTTGGTACGCACTGCATTCGGTTGACCCACGTAGCCGTACTCTCCAACTTTCC
-
-
-
-.. _HYPHY: http://www.hyphy.org
-	</help>
-</tool>
-
--- a/tools/hyphy/hyphy_nj_tree_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-#Dan Blankenberg
-#takes fasta alignments, a distance metric and builds neighbor joining trees
-import os, sys
-from galaxy import eggs
-from galaxy.tools.util import hyphy_util
-
-#Retrieve hyphy path, this will need to be the same across the cluster
-tool_data = sys.argv.pop()
-HYPHY_PATH = os.path.join( tool_data, "HYPHY" )
-HYPHY_EXECUTABLE = os.path.join( HYPHY_PATH, "HYPHY" )
-
-#Read command line arguments
-input_filename = os.path.abspath(sys.argv[1].strip())
-output_filename1 = os.path.abspath(sys.argv[2].strip())
-output_filename2 = os.path.abspath(sys.argv[3].strip())
-distance_metric = sys.argv[4].strip()
-temp_ps_filename = hyphy_util.get_filled_temp_filename("")
-
-#Guess if this is a single or multiple FASTA input file
-found_blank = False
-is_multiple = False
-for line in open(input_filename):
-    line = line.strip()
-    if line == "": found_blank = True
-    elif line.startswith(">") and found_blank:
-        is_multiple = True
-        break
-    else: found_blank = False
-
-NJ_tree_shared_ibf = hyphy_util.get_filled_temp_filename(hyphy_util.NJ_tree_shared_ibf)
-
-#set up NJ_tree file
-NJ_tree_filename = hyphy_util.get_filled_temp_filename(hyphy_util.get_NJ_tree(NJ_tree_shared_ibf))
-#setup Config file
-config_filename = hyphy_util.get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, temp_ps_filename, NJ_tree_filename)
-if is_multiple:
-    os.unlink(NJ_tree_filename)
-    os.unlink(config_filename)
-    NJ_tree_filename = hyphy_util.get_filled_temp_filename(hyphy_util.get_NJ_treeMF(NJ_tree_shared_ibf))
-    config_filename = hyphy_util.get_nj_treeMF_config_filename(input_filename, output_filename1, temp_ps_filename, distance_metric, NJ_tree_filename)
-    print "Multiple Alignment Analyses"
-else: print "Single Alignment Analyses"
-
-
-#Run Hyphy
-hyphy_cmd = "%s BASEPATH=%s USEPATH=/dev/null %s" % (HYPHY_EXECUTABLE, HYPHY_PATH, config_filename)
-hyphy = os.popen(hyphy_cmd, 'r')
-#print hyphy.read()
-hyphy.close()
-
-#remove temporary files
-os.unlink(NJ_tree_filename)
-os.unlink(config_filename)
-
-
-#Convert PS to PDF
-if os.path.getsize(temp_ps_filename)>0: temp = os.popen("ps2pdf %s %s" % (temp_ps_filename, output_filename2), 'r').close()
-os.unlink(temp_ps_filename)
--- a/tools/hyphy/hyphy_nj_tree_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-<?xml version="1.1.1"?>
-<tool name="Neighbor Joining Tree" id="hyphy_nj_tree_wrapper1">
-
-    <description>Builder</description>
-
-    <command interpreter="python">hyphy_nj_tree_wrapper.py $input1 $out_file1 $out_file2 $distance_metric ${GALAXY_DATA_INDEX_DIR}</command>
-
-    <inputs>
-        <page>
-            <param format="fasta" name="input1" type="data" label="Fasta file"/>
-            <param name="distance_metric" type="select" label="Distance Model">
-                  <option value="TN93">Tamura-Nei (93)</option>
-                <!-- <option value="TN93_RV">Tamura-Nei (93) distance and rate variation (unequal character frequencies, A->G, C->T and transversional bias corrections, gamma distributed rate variation from site to site)</option> -->
-                <!-- <option value="TN84">Tajima-Nei (84) distance (unequal character frequencies)</option> -->
-                <!-- <option value="K2P_RV">Kimura 2 parameter and rate variation (equal character frequencies, transition/trasversion bias correction, gamma distributed rate variation from site to site)</option> -->
-                <option value="K2P">Kimura 2 parameter</option>
-                <option value="JC69">Jukes-Cantor</option>
-                <!-- <option value="T3P">Tamura 3-parameter (correction for GC content bias and transition/trasversion bias)</option> -->
-                <!-- <option value="p_Distance">Number of observed substitutions per site</option> -->
-                <!-- <option value="Unaligned_LZ">Distance measure for unaligned sequences based on Lempel Ziv measure of information content</option> -->
-                <!-- <option value="Unaligned_LZ_FR">Distance measure for unaligned sequences based on Lempel Ziv measure of information content using the best choice forward and reverse string orientations</option> -->
-            </param>
-        </page>
-    </inputs>
-    <outputs>
-        <data name="out_file1" format="tabular" />
-        <data name="out_file2" format="pdf" />
-    </outputs>
-    <requirements>
-      <requirement type="binary">ps2pdf</requirement>
-    </requirements>
-    <tests>
-      <test>
-        <param name="input1" value="nj_tree_inp.fasta"/>
-        <param name="distance_metric" value="TN93"/>
-        <output name="out_file1" file="nj_tree_newick_out.tabular"/>
-        <output name="out_file2" file="nj_tree_pdf_out.pdf"/>
-      </test>
-    </tests>
-    <help>
-This tool takes a single or multiple FASTA alignment file and builds Neighbor Joining Trees using HYPHY_, a maximum likelihood analyses package.
-
-.. _HYPHY: http://www.hyphy.org
-    </help>
-</tool>
-
--- a/tools/ilmn_pacbio/abyss.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<tool id="abyss" name="ABySS" version="1.0.0">
-  <description>Short-read de Bruijn assembly</description>
-  <command interpreter="python">
-    quake_wrapper.py -k $k -r $input1 -p 8 > $output1
-  </command>
-  <inputs>
-    <param name="input1" format="fastq" type="data" label="Select FASTQ file to correct" />
-    <param name="k" type="integer" value="16" label="Size of k-mers to correct" />
-  </inputs>
-  <outputs>
-    <data format="fastq" name="output1" label="Error-corrected reads from ${on_string}" />
-  </outputs>
-  <help>
-
-**What it does**
-
-TBD.  Calls ABySS assembler
-
-**Parameter list**
-
-k
-
-**Output**
-
-Corrected reads
-
-  </help>
-</tool>
-
-
--- a/tools/ilmn_pacbio/assembly_stats.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-#
-#Copyright (c) 2011, Pacific Biosciences of California, Inc.
-#
-#All rights reserved.
-#
-#Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-#    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-#    * Neither the name of Pacific Biosciences nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-#
-#THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS BE LIABLE FOR ANY
-#DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-import sys, os
-from optparse import OptionParser
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( 'bx-python' )
-from bx.seq.fasta import FastaReader
-
-def getStats( fastaFile, genomeLength, minContigLength ):
-    lengths = []
-    stats = { "Num" : 0,
-              "Sum" : 0,
-              "Max" : 0,
-              "Avg" : 0,
-              "N50" : 0,
-              "99%" : 0 }
-    fasta_reader = FastaReader( open( fastaFile, 'rb' ) )
-    while True:
-        seq = fasta_reader.next()
-        if not seq:
-            break
-        if seq.length < minContigLength:
-            continue
-        lengths.append( seq.length )
-    if lengths:
-        stats[ 'Num' ] = len( lengths )
-        stats[ 'Sum' ] = sum( lengths )
-        stats[ 'Max' ] = max( lengths )
-        stats[ 'Avg' ] = int( sum( lengths ) / float( len( lengths ) ) )
-        stats[ 'N50' ] = 0
-        stats[ '99%' ] = 0
-        if genomeLength == 0:
-            genomeLength = sum( lengths )
-        lengths.sort()
-        lengths.reverse()
-        lenSum = 0
-        stats[ "99%" ] = len( lengths )
-        for idx, length in enumerate( lengths ):
-            lenSum += length
-            if ( lenSum > genomeLength / 2 ):
-                stats[ "N50" ] = length
-                break
-        lenSum = 0
-        for idx, length in enumerate( lengths ):
-            lenSum += length
-            if lenSum > genomeLength * 0.99:
-                stats[ "99%" ] = idx + 1
-                break
-    return stats
-
-def __main__():
-    #Parse Command Line
-    usage = 'Usage: %prog input output --minContigLength'
-    parser = OptionParser( usage=usage )
-    parser.add_option( "--minContigLength", dest="minContigLength", help="Minimum length of contigs to analyze" )
-    parser.add_option( "--genomeLength", dest="genomeLength", help="Length of genome for which to calculate N50s" )
-    parser.set_defaults( minContigLength=0, genomeLength=0 )
-    options, args = parser.parse_args()
-    input_fasta_file = args[ 0 ]
-    output_tabular_file = args[ 1 ]
-    statKeys = "Num Sum Max Avg N50 99%".split( " " )
-    stats = getStats( input_fasta_file, int( options.genomeLength ), int( options.minContigLength ) )
-    fout = open( output_tabular_file, "w" )
-    fout.write( "%s\n" % "\t".join( map( lambda key: str( stats[ key ] ), statKeys ) ) )
-    fout.close()
-
-if __name__=="__main__": __main__()
--- a/tools/ilmn_pacbio/assembly_stats.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-<tool id="assembly_stats" name="Assembly Statistics" version="1.0.0">
-    <description>Calculate common measures of assembly quality</description>
-    <command interpreter="python">
-        assembly_stats.py $input1 $output1 --minContigLength=${minLength}
-    </command>
-    <inputs>
-        <param name="input1" format="fasta" type="data" label="Select FASTA file containing contigs"/>
-        <param name="minLength" type="integer" value="0" label="Minimum length of contigs to consider"/>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular" label="Assembly statistics for ${on_string}"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="input1" value="3.fasta" ftype="fasta"/>
-            <param name="minLength" value="100"/>
-            <output name="output1" ftype="tabular" file="assembly_stats.tabular" />
-        </test>
-    </tests>
-    <help>
-
-**What it does**
-
-Reports standard measures of *de novo* assembly quality such as number of contigs, sum of contigs, mean contig length, and N50.
-
-**Parameter list**
-
-Minimum length
-    Only include contigs of this size or greater for calculating statistics.
-
-**Output**
-
-Num contigs
-    Total number of contigs in the assembly
-
-Sum of contig lengths
-    Total sum of contig lengths
-
-Maximum contig length
-    Maximum of the contig lengths
-
-Mean contig length
-    Average contig length
-
-N50
-    Contig length at which 50% of the assembly is contained in contigs of this size or greater.
-
-99%
-    Number of contigs accounting for 99% of the observed assembly.
-
-    </help>
-</tool>
-
-
--- a/tools/ilmn_pacbio/cov_model.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,238 +0,0 @@
-#!/usr/bin/env python
-from optparse import OptionParser, SUPPRESS_HELP
-import os, random, quake
-
-############################################################
-# cov_model.py
-#
-# Given a file of kmer counts, reports the cutoff to use
-# to separate trusted/untrusted kmers.
-############################################################
-
-############################################################
-# main
-############################################################
-def main():
-    usage = 'usage: %prog [options] <counts file>'
-    parser = OptionParser(usage)
-    parser.add_option('--int', dest='count_kmers', action='store_true', default=False, help='Kmers were counted as integers w/o the use of quality values [default: %default]')
-    parser.add_option('--ratio', dest='ratio', type='int', default=200, help='Likelihood ratio to set trusted/untrusted cutoff [default: %default]')
-    parser.add_option('--no_sample', dest='no_sample', action='store_true', default=False, help='Do not sample kmer coverages into kmers.txt because its already done [default: %default]')
-    # help='Model kmer coverage as a function of GC content of kmers [default: %default]'
-    parser.add_option('--gc', dest='model_gc', action='store_true', default=False, help=SUPPRESS_HELP)
-    (options, args) = parser.parse_args()
-
-    if len(args) != 1:
-        parser.error('Must provide kmers counts file')
-    else:
-        ctsf = args[0]
-
-    if options.count_kmers:
-        model_cutoff(ctsf, options.ratio)
-        print 'Cutoff: %s' % open('cutoff.txt').readline().rstrip()
-
-    else:
-        if options.model_gc:
-            model_q_gc_cutoffs(ctsf, 25000, options.ratio)
-        else:
-            model_q_cutoff(ctsf, 50000, options.ratio, options.no_sample)
-            print 'Cutoff: %s' % open('cutoff.txt').readline().rstrip()
-
-
-############################################################
-# model_cutoff
-#
-# Make a histogram of kmers to give to R to learn the cutoff
-############################################################
-def model_cutoff(ctsf, ratio):
-    # make kmer histogram
-    cov_max = 0
-    for line in open(ctsf):
-        cov = int(line.split()[1])
-        if cov > cov_max:
-            cov_max = cov
-
-    kmer_hist = [0]*cov_max
-    for line in open(ctsf):
-        cov = int(line.split()[1])
-        kmer_hist[cov-1] += 1
-
-    cov_out = open('kmers.hist', 'w')
-    for cov in range(0,cov_max):
-        if kmer_hist[cov]:
-            print >> cov_out, '%d\t%d' % (cov+1,kmer_hist[cov])
-    cov_out.close()
-
-    os.system('R --slave --args %d < %s/cov_model.r 2> r.log' % (ratio,quake.quake_dir))
-
-
-############################################################
-# model_q_cutoff
-#
-# Sample kmers to give to R to learn the cutoff
-# 'div100' is necessary when the number of kmers is too
-# large for random.sample, so we only consider every 100th
-# kmer.
-############################################################
-def model_q_cutoff(ctsf, sample, ratio, no_sample=False):
-    if not no_sample:
-        # count number of kmer coverages
-        num_covs = 0
-        for line in open(ctsf):
-            num_covs += 1
-
-        # choose random kmer coverages
-        div100 = False
-        if sample >= num_covs:
-            rand_covs = range(num_covs)
-        else:
-            if num_covs > 1000000000:
-                div100 = True
-                rand_covs = random.sample(xrange(num_covs/100), sample)
-            else:
-                rand_covs = random.sample(xrange(num_covs), sample)
-        rand_covs.sort()
-
-        # print to file
-        out = open('kmers.txt', 'w')
-        kmer_i = 0
-        rand_i = 0
-        for line in open(ctsf):
-            if div100:
-                if kmer_i % 100 == 0 and kmer_i/100 == rand_covs[rand_i]:
-                    print >> out, line.split()[1]
-                    rand_i += 1
-                    if rand_i >= sample:
-                        break
-            else:
-                if kmer_i == rand_covs[rand_i]:
-                    print >> out, line.split()[1]
-                    rand_i += 1
-                    if rand_i >= sample:
-                        break
-            kmer_i += 1
-        out.close()
-
-    os.system('R --slave --args %d < %s/cov_model_qmer.r 2> r.log' % (ratio,quake.quake_dir))
-
-
-############################################################
-# model_q_gc_cutoffs
-#
-# Sample kmers to give to R to learn the cutoff for each
-# GC value
-############################################################
-def model_q_gc_cutoffs(ctsf, sample, ratio):
-    # count number of kmer coverages at each at
-    k = len(open(ctsf).readline().split()[0])
-    num_covs_at = [0]*(k+1)
-    for line in open(ctsf):
-        kmer = line.split()[0]
-        num_covs_at[count_at(kmer)] += 1
-
-    # for each AT bin
-    at_cutoffs = []
-    for at in range(1,k):
-        # sample covs
-        if sample >= num_covs_at[at]:
-            rand_covs = range(num_covs_at[at])
-        else:
-            rand_covs = random.sample(xrange(num_covs_at[at]), sample)
-        rand_covs.sort()
-
-        # print to file
-        out = open('kmers.txt', 'w')
-        kmer_i = 0
-        rand_i = 0
-        for line in open(ctsf):
-            (kmer,cov) = line.split()
-            if count_at(kmer) == at:
-                if kmer_i == rand_covs[rand_i]:
-                    print >> out, cov
-                    rand_i += 1
-                    if rand_i >= sample:
-                        break
-                kmer_i += 1
-        out.close()
-
-        os.system('R --slave --args %d < %s/cov_model_qmer.r 2> r%d.log' % (ratio,quake.quake_dir,at))
-
-        at_cutoffs.append( open('cutoff.txt').readline().rstrip() )
-        if at in [1,k-1]:   # setting extremes to next closests
-            at_cutoffs.append( open('cutoff.txt').readline().rstrip() )
-
-        os.system('mv kmers.txt kmers.at%d.txt' % at)
-        os.system('mv cutoff.txt cutoff.at%d.txt' % at)
-
-    out = open('cutoffs.gc.txt','w')
-    print >> out, '\n'.join(at_cutoffs)
-    out.close()
-
-
-############################################################
-# model_q_gc_cutoffs_bigmem
-#
-# Sample kmers to give to R to learn the cutoff for each
-# GC value
-############################################################
-def model_q_gc_cutoffs_bigmem(ctsf, sample, ratio):
-    # input coverages
-    k = 0
-    for line in open(ctsf):
-        (kmer,cov) = line.split()
-        if k == 0:
-            k = len(kmer)
-            at_covs = ['']*(k+1)
-        else:
-            at = count_at(kmer)
-            if at_covs[at]:
-                at_covs[at].append(cov)
-            else:
-                at_covs[at] = [cov]
-
-    for at in range(1,k):
-        print '%d %d' % (at,len(at_covs[at]))
-
-    # for each AT bin
-    at_cutoffs = []
-    for at in range(1,k):
-        # sample covs
-        if sample >= len(at_covs[at]):
-            rand_covs = at_covs[at]
-        else:
-            rand_covs = random.sample(at_covs[at], sample)
-
-        # print to file
-        out = open('kmers.txt', 'w')
-        for rc in rand_covs:
-            print >> out, rc
-        out.close()
-
-        os.system('R --slave --args %d < %s/cov_model_qmer.r 2> r%d.log' % (ratio,quake.quake_dir,at))
-
-        at_cutoffs.append( open('cutoff.txt').readline().rstrip() )
-        if at in [1,k-1]:   # setting extremes to next closests
-            at_cutoffs.append( open('cutoff.txt').readline().rstrip() )
-
-        os.system('mv kmers.txt kmers.at%d.txt' % at)
-        os.system('mv cutoff.txt cutoff.at%d.txt' % at)
-
-    out = open('cutoffs.gc.txt','w')
-    print >> out, '\n'.join(at_cutoffs)
-    out.close()
-
-
-############################################################
-# count_at
-#
-# Count A's and T's in the given sequence
-############################################################
-def count_at(seq):
-    return len([nt for nt in seq if nt in ['A','T']])
-
-
-############################################################
-# __main__
-############################################################
-if __name__ == '__main__':
-    main()
--- a/tools/ilmn_pacbio/quake.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-#!/usr/bin/env python
-from optparse import OptionParser, SUPPRESS_HELP
-import os, random, sys
-import cov_model
-
-############################################################
-# quake.py
-#
-# Launch pipeline to correct errors in Illumina sequencing
-# reads.
-############################################################
-
-#r_dir = '/nfshomes/dakelley/research/error_correction/bin'
-quake_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
-
-############################################################
-# main
-############################################################
-def main():
-    usage = 'usage: %prog [options]'
-    parser = OptionParser(usage)
-    parser.add_option('-r', dest='readsf', help='Fastq file of reads')
-    parser.add_option('-f', dest='reads_listf', help='File containing fastq file names, one per line or two per line for paired end reads.')
-    parser.add_option('-k', dest='k', type='int', help='Size of k-mers to correct')
-    parser.add_option('-p', dest='proc', type='int', default=4, help='Number of processes [default: %default]')
-    parser.add_option('-q', dest='quality_scale', type='int', default=-1, help='Quality value ascii scale, generally 64 or 33. If not specified, it will guess.')
-    parser.add_option('--no_count', dest='no_count', action='store_true', default=False, help='Kmers are already counted and in expected file [reads file].qcts or [reads file].cts [default: %default]')
-    parser.add_option('--no_cut', dest='no_cut', action='store_true', default=False, help='Coverage model is optimized and cutoff was printed to expected file cutoff.txt [default: %default]')
-    parser.add_option('--int', dest='counted_kmers', action='store_true', default=False, help='Kmers were counted as integers w/o the use of quality values [default: %default]')
-    parser.add_option('--ratio', dest='ratio', type='int', default=200, help='Likelihood ratio to set trusted/untrusted cutoff.  Generally set between 10-1000 with lower numbers suggesting a lower threshold. [default: %default]')
-    # help='Model kmer coverage as a function of GC content of kmers [default: %default]'
-    parser.add_option('--gc', dest='model_gc', action='store_true', default=False, help=SUPPRESS_HELP)
-    parser.add_option('--headers', action='store_true', default=False, help='Output original read headers (i.e. pass --headers to correct)' )
-    (options, args) = parser.parse_args()
-
-    if not options.readsf and not options.reads_listf:
-        parser.error('Must provide fastq file of reads with -r or file with list of fastq files of reads with -f')
-    if not options.k:
-        parser.error('Must provide k-mer size with -k')
-    if options.quality_scale == -1:
-        options.quality_scale = guess_quality_scale(options.readsf, options.reads_listf)
-
-    if options.counted_kmers:
-        cts_suf = 'cts'
-    else:
-        cts_suf = 'qcts'
-    if options.readsf:
-        ctsf = '%s.%s' % (os.path.splitext( os.path.split(options.readsf)[1] )[0], cts_suf)
-        reads_str = '-r %s' % options.readsf
-    else:
-        ctsf = '%s.%s' % (os.path.split(options.reads_listf)[1], cts_suf)
-        reads_str = '-f %s' % options.reads_listf
-
-    if not options.no_count and not options.no_cut:
-        count_kmers(options.readsf, options.reads_listf, options.k, ctsf, options.quality_scale)
-
-    if not options.no_cut:
-        # model coverage
-        if options.counted_kmers:
-            cov_model.model_cutoff(ctsf, options.ratio)
-        else:
-            if options.model_gc:
-                cov_model.model_q_gc_cutoffs(ctsf, 10000, options.ratio)
-            else:
-                cov_model.model_q_cutoff(ctsf, 25000, options.ratio)
-
-
-    if options.model_gc:
-        # run correct C++ code
-        os.system('%s/correct %s -k %d -m %s -a cutoffs.gc.txt -p %d -q %d' % (quake_dir,reads_str, options.k, ctsf, options.proc, options.quality_scale))
-
-    else:
-        cutoff = open('cutoff.txt').readline().rstrip()
-
-        # run correct C++ code
-        headers = '--headers' if options.headers else ''
-        os.system('%s/correct %s %s -k %d -m %s -c %s -p %d -q %d' % (quake_dir,headers, reads_str, options.k, ctsf, cutoff, options.proc, options.quality_scale))
-
-
-################################################################################
-# guess_quality_scale
-# Guess at ascii scale of quality values by examining
-# a bunch of reads and looking for quality values < 64,
-# in which case we set it to 33.
-################################################################################
-def guess_quality_scale(readsf, reads_listf):
-    reads_to_check = 1000
-    if not readsf:
-        readsf = open(reads_listf).readline().split()[0]
-
-    fqf = open(readsf)
-    reads_checked = 0
-    header = fqf.readline()
-    while header and reads_checked < reads_to_check:
-        seq = fqf.readline()
-        mid = fqf.readline()
-        qual = fqf.readline().rstrip()
-        reads_checked += 1
-        for q in qual:
-            if ord(q) < 64:
-                print 'Guessing quality values are on ascii 33 scale'
-                return 33
-        header = fqf.readline()
-
-    print 'Guessing quality values are on ascii 64 scale'
-    return 64
-
-
-
-############################################################
-# count_kmers
-#
-# Count kmers in the reads file using AMOS count-kmers or
-# count-qmers
-############################################################
-def count_kmers(readsf, reads_listf, k, ctsf, quality_scale):
-    # find files
-    fq_files = []
-    if readsf:
-        fq_files.append(readsf)
-    else:
-        for line in open(reads_listf):
-            for fqf in line.split():
-                fq_files.append(fqf)
-
-    if ctsf[-4:] == 'qcts':
-        os.system('cat %s | %s/count-qmers -k %d -q %d > %s' % (' '.join(fq_files), quake_dir, k, quality_scale, ctsf))
-    else:
-        os.system('cat %s | %s/count-kmers -k %d > %s' % (' '.join(fq_files), quake_dir, k, ctsf))
-
-
-############################################################
-# __main__
-############################################################
-if __name__ == '__main__':
-    main()
--- a/tools/ilmn_pacbio/quake.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-<tool id="quake" name="Quake" version="1.0.0">
-  <description>Quality-aware error correction</description>
-  <command interpreter="python">
-    quake_wrapper.py --default_cutoff=10 --headers -k $k -f $fofnfile -p 12 > $output1
-  </command>
-  <inputs>
-    <param name="input1" format="fastq" type="data" label="Select FASTQ file to correct" />
-    <param name="k" type="integer" value="16" label="Size of k-mers to correct" />
-  </inputs>
-  <configfiles>
-    <configfile name="fofnfile">
-${input1.file_name}
-    </configfile>
-  </configfiles>
-  <outputs>
-    <data format="fastq" name="output1" label="Error-corrected reads from ${on_string}" />
-  </outputs>
-  <help>
-
-**What it does**
-
-Applies the Quake_ algorithm for quality-aware correction of
-substitution error in short reads.
-
-Kelley DR, Schatz MC, Salzberg SL.
-"Quake: quality-aware detection and correction of sequencing errors."
-*Genome Biol.* 2010;11(11):R116.
-
-.. _Quake: http://www.cbcb.umd.edu/software/quake
-
-**Parameter list**
-
-k
-    k-mer size for detecting spurious k-mers versus true k-mers from
-    the genome.  Recommendations for choosing a value of k can be found
-    here_.
-
-.. _here: http://www.cbcb.umd.edu/software/quake/faq.html
-
-**Output**
-
-A FASTQ file of corrected and trimmed reads.
-  </help>
-</tool>
--- a/tools/ilmn_pacbio/quake_pe.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="quake_pe" name="Quake PE" version="1.0.0">
-  <description>Quality-aware error correction for paired-end reads</description>
-  <command interpreter="python">
-    quake_wrapper.py --default_cutoff=$cutoff --headers -k $k -f $fofnfile -p 12 --output=$output1,$output2
-  </command>
-  <inputs>
-    <param name="input1" format="fastq" type="data" label="FASTQ file for forward reads" />
-    <param name="input2" format="fastq" type="data" label="FASTQ file for reverse reads" />
-    <param name="k" type="integer" value="16" label="Size of k-mers to correct" />
-    <param name="cutoff" type="integer" value="0" label="Default coverage cutoff if estimation fails"/>
-  </inputs>
-  <configfiles>
-    <configfile name="fofnfile">${input1.file_name} ${input2.file_name}
-    </configfile>
-  </configfiles>
-  <outputs>
-    <data format="fastq" name="output1" label="Error-corrected forward reads from ${on_string}" />
-    <data format="fastq" name="output2" label="Error-corrected reverse reads from ${on_string}" />
-  </outputs>
-  <help>
-
-**What it does**
-
-Applies the Quake_ algorithm for quality-aware correction of
-substitution error in short reads.  This form of the tool is customized
-for correcting paired-end reads.
-
-Kelley DR, Schatz MC, Salzberg SL.
-"Quake: quality-aware detection and correction of sequencing errors."
-*Genome Biol.* 2010;11(11):R116.
-
-.. _Quake: http://www.cbcb.umd.edu/software/quake
-
-**Parameter list**
-
-K-mer size
-    k-mer size for detecting spurious k-mers versus true k-mers from
-    the genome.  Recommendations for choosing a value of k can be found
-    here_.
-
-Default coverage cutoff
-    If the appropriate coverage cutoff can not be found then Quake can be
-    forced to proceed anyways with the supplied cutoff.  In this case,
-    the optimal cutoff can be estimated by examining
-    the k-mer coverage histogram by eye.
-
-.. _here: http://www.cbcb.umd.edu/software/quake/faq.html
-
-**Output**
-
-A FASTQ file of corrected and trimmed reads.
-  </help>
-</tool>
--- a/tools/ilmn_pacbio/quake_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (c) 2011, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-#Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-#    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-#    * Neither the name of Pacific Biosciences nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-#
-#THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS BE LIABLE FOR ANY
-#DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-import sys
-import os
-import subprocess
-
-QUAKE_EXE = os.path.join( os.path.dirname(os.path.abspath(sys.argv[0])), 'quake.py' )
-cmdLine = sys.argv
-cmdLine.pop(0)
-
-#
-# horribly not robust, but it was a pain to rewrite everything with
-# optparse
-#
-j = -1
-cut = 0
-for i,arg in enumerate(cmdLine):
-    if '--default_cutoff' in arg:
-        j = i
-        cut = int(arg.split('=')[1])
-if j>=0:
-    cmdLine = cmdLine[:j] + cmdLine[j+1:]
-
-j = -1
-output=''
-for i,arg in enumerate(cmdLine):
-    if '--output' in arg:
-        j = i
-        output = arg.split('=')[1]
-if j>=0:
-    cmdLine = cmdLine[:j] + cmdLine[j+1:]
-
-def backticks( cmd, merge_stderr=True ):
-    """
-    Simulates the perl backticks (``) command with error-handling support
-    Returns ( command output as sequence of strings, error code, error message )
-    """
-    if merge_stderr:
-        _stderr = subprocess.STDOUT
-    else:
-        _stderr = subprocess.PIPE
-
-    p = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE,
-                          stdout=subprocess.PIPE, stderr=_stderr,
-                          close_fds=True )
-
-    out = [ l[:-1] for l in p.stdout.readlines() ]
-
-    p.stdout.close()
-    if not merge_stderr:
-        p.stderr.close()
-
-    # need to allow process to terminate
-    p.wait()
-
-    errCode = p.returncode and p.returncode or 0
-    if p.returncode>0:
-        errorMessage = os.linesep.join(out)
-        output = []
-    else:
-        errorMessage = ''
-        output = out
-
-    return output, errCode, errorMessage
-
-def to_stdout():
-    def toCorFastq(f):
-        stem, ext = os.path.splitext( os.path.basename(f) )
-        dir = os.path.dirname(f)
-        corFastq = os.path.join(dir,'%s.cor%s' % (stem,ext) )
-        if not os.path.exists(corFastq):
-            print >>sys.stderr, "Can't find path %s" % corFastq
-            sys.exit(1)
-        return corFastq
-    if '-r' in cmdLine:
-        fastqFile = cmdLine[ cmdLine.index('-r')+1 ]
-        corFastq = toCorFastq(fastqFile)
-        infile = open( corFastq, 'r' )
-        for line in infile:
-            sys.stdout.write( line )
-        infile.close()
-    else:
-        fofnFile = cmdLine[ cmdLine.index('-f')+1 ]
-        infile = open(fofnFile,'r')
-        for line in infile:
-            line = line.strip()
-            if len(line)>0:
-                fastqFiles = line.split()
-                break
-        infile.close()
-        outs = output.split(',')
-        for o,f in zip(outs,fastqFiles):
-            cf = toCorFastq(f)
-            os.system( 'cp %s %s' % ( cf, o ) )
-
-def run():
-    cmd = '%s %s' % ( QUAKE_EXE, " ".join(cmdLine) )
-    output, errCode, errMsg = backticks( cmd )
-
-    if errCode==0:
-        to_stdout()
-    else:
-        # if Quake exits with an error in cutoff determination we
-        # can force correction if requested
-        if 'cutoff.txt' in errMsg and cut>0:
-            outfile = open( 'cutoff.txt', 'w' )
-            print >>outfile, str(cut)
-            outfile.close()
-            cmd = '%s --no_count --no_cut %s' % ( QUAKE_EXE, " ".join(cmdLine) )
-            output, errCode, errMsg = backticks( cmd )
-        if errCode==0:
-            to_stdout()
-        else:
-            print >>sys.stderr, errMsg
-            sys.exit(1)
-
-if __name__=='__main__': run()
--- a/tools/ilmn_pacbio/smrtpipe.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#!/usr/bin/env python
-# EASY-INSTALL-SCRIPT: 'pbpy==0.1','smrtpipe.py'
-__requires__ = 'pbpy==0.1'
-import pkg_resources
-pkg_resources.run_script('pbpy==0.1', 'smrtpipe.py')
--- a/tools/ilmn_pacbio/smrtpipe_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-<tool id="smrtpipe_filter" name="SMRTpipe Filter" version="1.0.0">
-  <description>Produce filtered reads from a set of PacBio primary analysis outputs.</description>
-  <command interpreter="python">
-    smrtpipe_galaxy.py --output=data/filtered_subreads.fasta --galaxy_output=${outfile} ${iniFile}
-  </command>
-  <inputs>
-    <conditional name="source">
-      <param name="input_source" type="select" label="Choose the source for the analysis inputs">
-        <option value="path">Path to fofn or multiple bas.h5 paths</option>
-        <option value="history">History</option>
-      </param>
-      <when value="path">
-        <repeat name="inputFiles" title="Input files">
-          <param name="path" type="text" label="File path" size="75"/>
-        </repeat>
-      </when>
-      <when value="history">
-        <param name="input1" type="data" format="tabular" label="File containing input paths" />
-      </when>
-    </conditional>
-    <param name="minimum_readlength" type="integer" value="50" label="Minimum raw readlength" />
-    <param name="minimum_readscore" type="float" value="0.75" label="Minimum read quality" />
-  </inputs>
-  <configfiles>
-    <configfile name="iniFile">
-[input]
-#if $source.input_source=="history":
-#for $l in open($source.input1.file_name,'r'):
-$l
-#end for
-#else
-#for $p in $source.inputFiles
-${p.path}
-#end for
-#end if
-
-[S_Filter]
-filters=MinRL=${minimum_readlength},MinReadScore=${minimum_readscore}
-    </configfile>
-  </configfiles>
-  <outputs>
-    <data name="outfile" format="fasta" label="Filtered subreads" />
-  </outputs>
-  <help>
-
-**What it does**
-
-Filters PacBio bas.h5 files and produces a FASTA file of filtered subreads.
-
-In PacBio SMRT sequencing, the template format is a SMRTbell: a circular
-molecule with adapters at two locations in the circle.  The subreads are the
-portions of the read between adapters.
-
-**Parameter list**
-
-Minimum readlength
-    Only keep reads from ZMWs that produced this many bases or more.
-
-Minimum read quality
-    Only keep reads with overall quality scores of this value or more.  The read quality score is a *de novo* prediction of the accuracy of the read.
-
-**Output**
-
-FASTA file of filtered reads.
-
-  </help>
-</tool>
--- a/tools/ilmn_pacbio/smrtpipe_galaxy.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,265 +0,0 @@
-#!/usr/bin/python
-import sys
-import os
-import subprocess
-import optparse as op
-import xml.etree.cElementTree as et
-
-TRACE=False
-#
-# Turn on tracing to dump out __input__.xml and __settings__.xml somewhere
-#
-#TRACE=True
-#TRACE_PATH='/home/UNIXHOME/jsorenson'
-
-class SmrtpipeGalaxy:
-    """Wrapper for running smrtpipe under galaxy"""
-    def __init__( self, argv ):
-        self.__parseOptions( argv )
-
-    def __parseOptions( self, argv ):
-        usage = 'Usage: %prog [--help] [options] smrtpipe.ini'
-        parser = op.OptionParser( usage=usage, description=SmrtpipeGalaxy.__doc__ )
-        parser.add_option( "--output",
-                           help="Designate a file generated by smrtpipe as the expected output for galaxy" )
-        parser.add_option( "--nproc", type="int",
-                           help="Number of processes to use (-D NPROC)" )
-        parser.add_option( "--galaxy_output",
-                           help="File name provided by galaxy where output should be placed" )
-        parser.add_option( "--dry_run", action="store_true",
-                           help="Create auxiliary XML files and exit" )
-        parser.add_option( "--dat_extension",
-                           help="Soft link .dat files to have this extension (some pipelines require certain extensions)" )
-
-        parser.set_defaults( output=None, dry_run=False, galaxy_output=None,
-            dat_extension=None, nproc=0 )
-        self.options, self.args = parser.parse_args( argv )
-
-        if len(self.args)!=2:
-            parser.error( 'Expected 1 argument' )
-
-        self.configFile = self.args[1]
-
-    def __parseConfig( self ):
-        infile = open( self.configFile, 'r' )
-        section = None
-        sections = []
-        for line in infile:
-            l = line.strip()
-            if len(l)==0 or line.startswith('#'):
-                continue
-            if l.startswith('[') and l.endswith(']'):
-                section = section_factory( l[1:-1] )
-                sections.append(section)
-                continue
-            if section is None:
-                continue
-            if '=' in l:
-                section.addParameterLine(l)
-            else:
-                section.addLine(l)
-        infile.close()
-        return sections
-
-    def transferOutput( self ):
-        if not self.options.output or not self.options.galaxy_output:
-            return True, ''
-        if not os.path.exists(self.options.output):
-            return False, "Can't find file %s (job error?)" % self.options.output
-        os.system( 'cp %s %s' % (self.options.output, self.options.galaxy_output ))
-        return True, ''
-
-    def run( self ):
-        if not os.path.exists( self.configFile ):
-            print >>sys.stderr, "Can't find config file %s" % self.configFile
-            return 1
-
-        sections = self.__parseConfig()
-
-        if len(sections)==0:
-            print >>sys.stderr, "No sections found in %s" % self.configFile
-            return 1
-        if sections[0].name != 'input':
-            print >>sys.stderr, "No [input] section found in %s" % self.configFile
-            return 1
-
-        INPUT_FILE = '__input__.xml'
-        SETTINGS_FILE = '__settings__.xml'
-
-        sections[0].softLinkDats( self.options.dat_extension )
-        inputXml = sections[0].makeXmlElement()
-        write_xml_to_file( INPUT_FILE, inputXml )
-        if TRACE:
-            write_xml_to_file( os.path.join(TRACE_PATH,INPUT_FILE), inputXml )
-
-        settings = et.Element( 'smrtpipeSettings' )
-        for s in sections[1:]:
-            s.makeXmlElement( settings )
-
-        write_xml_to_file( SETTINGS_FILE, settings )
-        if TRACE:
-            write_xml_to_file( os.path.join(TRACE_PATH,SETTINGS_FILE), settings )
-
-        nproc = '-D NPROC=%d' % self.options.nproc if self.options.nproc>0 else ''
-        cmd = 'smrtpipe.py %s --params=%s xml:%s > smrtpipe.err 2>1' % \
-            ( nproc, SETTINGS_FILE, INPUT_FILE )
-
-        if self.options.dry_run:
-            print 'Command to run:'
-            print cmd
-            return 0
-
-        out, errCode, errMsg = backticks( cmd )
-        if errCode!=0:
-            print >>sys.stderr, "error while running: %s" % cmd
-            print >>sys.stderr, errMsg
-            if os.path.exists('log/smrtpipe.log'):
-                print >>sys.stderr, 'Log:'
-                infile = open('log/smrtpipe.log','r')
-                for line in infile: sys.stderr.write(line)
-                infile.close()
-            return errCode
-
-        success, errMsg = self.transferOutput()
-        if not success:
-            print >>sys.stderr, errMsg
-            return 1
-
-        return 0
-
-def write_xml_to_file( fileName, root ):
-    outfile = open( fileName, 'w' )
-    outfile.write( '<?xml version="1.0"?>\n' )
-    outfile.write( et.tostring(root) + '\n' )
-    outfile.close()
-
-def section_factory( name ):
-    if name=='input':
-        return InputSection(name)
-    else:
-        return Section(name)
-
-class Section:
-    def __init__( self, name ):
-        self._name = name
-        self._lines = []
-        self._vars = {}
-
-    @property
-    def name(self):
-        return self._name
-
-    def addLine( self, line ):
-        self._lines.append(line)
-
-    def addParameterLine( self, line ):
-        self.addLine(line)
-        i = line.find( '=' )
-        key = line[:i].strip()
-        value = line[i+1:].strip()
-        self._vars[key] = value
-
-    def makeXmlElement( self, settings ):
-        if self._name=='global':
-            root = et.SubElement( settings, "protocol", {'name':'generic'} )
-        else:
-            root = et.SubElement( settings, "module", {'name':self._name} )
-        for k,v in self._vars.iteritems():
-            param = et.SubElement( root, 'param', {'name':k} )
-            val = et.SubElement( param, 'value' )
-            val.text = v
-        return None
-
-    def __str__( self ):
-        "for debugging"
-        buffer = [ 'S { name=' ]
-        buffer.append(self._name)
-        buffer.append('; lines=%s' % ','.join(self._lines) )
-        for k,v in self._vars.iteritems():
-            buffer.append('; %s=%s' % (k,v) )
-        buffer.append(' }')
-        return ''.join(buffer)
-
-class InputSection( Section ):
-    def __init__( self, name ):
-        Section.__init__(self,name)
-
-    def softLinkDats( self, newExtension ):
-        if not newExtension:
-            return
-        newLines = []
-        for l in self._lines:
-            if ':' in l:
-                protocol = l[:l.find(':')+1]
-                file = l[l.find(':')+1:]
-            else:
-                protocol = ''
-                file = l
-            if os.path.exists(file) and file.endswith('.dat'):
-                newFile = '%s.%s' % ( file, newExtension )
-                if not os.path.exists(newFile):
-                    os.system( 'ln -s %s %s' % ( file, newFile ) )
-                newLines.append(protocol+newFile)
-            else:
-                newLines.append(l)
-        self._lines = newLines
-
-    def makeXmlElement( self, parent=None ):
-        root = et.Element( "pacbioAnalysisInputs" )
-        data = et.SubElement( root, 'dataReferences' )
-        iRef = 0
-        for l in self._lines:
-            def add(x,iRef):
-                if len(x)==0: return iRef
-                node = et.SubElement( data, 'url' )
-                if ':' in x:
-                    node.attrib[ 'ref' ] = x
-                else:
-                    node.attrib[ 'ref' ] = 'run:0000000-%04d' % iRef
-                    node2 = et.SubElement( node, 'location' )
-                    node2.text = x
-                return iRef+1
-            if l.endswith('fofn') and os.path.exists(l):
-                infile = open(l,'r')
-                for j,line in enumerate(infile): iRef=add(line.strip(),iRef)
-                infile.close()
-            else:
-                iRef=add(l,iRef)
-        return root
-
-def backticks( cmd, merge_stderr=True ):
-    """
-    Simulates the perl backticks (``) command with error-handling support
-    Returns ( command output as sequence of strings, error code, error message )
-    """
-    if merge_stderr:
-        _stderr = subprocess.STDOUT
-    else:
-        _stderr = subprocess.PIPE
-
-    p = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE,
-                          stdout=subprocess.PIPE, stderr=_stderr,
-                          close_fds=True )
-
-    out = [ l[:-1] for l in p.stdout.readlines() ]
-
-    p.stdout.close()
-    if not merge_stderr:
-        p.stderr.close()
-
-    # need to allow process to terminate
-    p.wait()
-
-    errCode = p.returncode and p.returncode or 0
-    if p.returncode>0:
-        errorMessage = os.linesep.join(out)
-        output = []
-    else:
-        errorMessage = ''
-        output = out
-
-    return output, errCode, errorMessage
-
-if __name__=='__main__':
-    app = SmrtpipeGalaxy( sys.argv )
-    sys.exit( app.run() )
--- a/tools/ilmn_pacbio/smrtpipe_hybrid.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-<tool id="smrtpipe_hybrid" name="AHA" version="1.0.0">
-  <description>Assemble contigs from a set of contigs and PacBio reads.</description>
-  <command interpreter="python">
-    smrtpipe_galaxy.py --nproc=24 --dat_extension=fasta --output=data/scaffold.fasta --galaxy_output=${outfile} ${iniFile}
-  </command>
-  <!--
-  <command>cp ${iniFile} ${outfile}</command>
-  -->
-  <inputs>
-    <param name="contigs" format="fasta" type="data" label="Starting Contigs"/>
-    <param name="reads" format="fasta" type="data" label="PacBio Reads"/>
-    <param name="schedule" type="text" value="6,3,75;6,3,75;5,3,75;5,3,75;6,2,75;6,2,75;5,2,75;5,2,75" label="Parameter Schedule" size="60"/>
-  </inputs>
-  <configfiles>
-    <configfile name="iniFile">
-[input]
-assembled_contigs:${contigs}
-file:${reads}
-
-[HybridAssembly]
-instrumentModel=RS
-cleanup=False
-untangler=pacbio
-#set $schedule2 = $schedule.replace('X',';')
-paramSchedule=${schedule2}
-dontFillin=False
-longReadsAsStrobe=True
-exactQueryIds=True
-rm4Opts=-minMatch 7 -minFrac 0.1 -minPctIdentity 65 -bestn 10 -noSplitSubreads
-numberProcesses=16
-cluster=False
-minRepeatLength=100000
-    </configfile>
-  </configfiles>
-  <outputs>
-    <data name="outfile" format="fasta" label="Hybrid assembly contigs from ${on_string}"/>
-  </outputs>
-  <help>
-
-**What it does**
-
-The AHA assembly algorithm is an AMOS_-based pipeline
-for finishing bacterial-sized
-genomes using draft contigs and PacBio reads.
-
-.. _AMOS: http://sourceforge.net/apps/mediawiki/amos
-
-**Parameter list**
-
-Parameter schedule
-    The parameter schedule is a semi-colon delimited list of triples.  Each triple represents an iteration of hybrid assembly (alignment/scaffolding/gap-filling).  The three paremeters for each iteration are the Z-score, number of reads required to define a link, and the minimum length of subreads used in links.
-
-**Output**
-
-FASTA file containing scaffolded and gap-filled contigs resulting from the
-hybrid assembly.
-
-  </help>
-</tool>
--- a/tools/ilmn_pacbio/soap_denovo.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-<tool id="soap_denovo" name="SOAPdenovo" version="1.0.0">
-  <description>Short-read de novo assembly</description>
-  <!--
-      # SOAPdenovo-127mer all -s ${soap_config} -o assembly -K ${k} -p 8 -d -D
-      # cat ${soap_config} > ${output1}
-      # cp ${soap_config} ${output1} &amp;&amp;
-  -->
-  <command>
-      SOAPdenovo-127mer all -s ${soap_config} -o assembly -K ${k} -p 24 -d -D -R
-  </command>
-  <inputs>
-    <conditional name="inputs">
-      <param name="read_type" type="select" label="Illumina read type">
-        <option value="single">Single fragment</option>
-        <option value="paired">Paired-end</option>
-      </param>
-      <when value="single">
-        <param name="input1" format="fastq" type="data" label="FASTQ file for reads"/>
-      </when>
-      <when value="paired">
-        <param name="input1" format="fastq" type="data" label="FASTQ file for forward reads"/>
-        <param name="input2" format="fastq" type="data" label="FASTQ file for reverse reads"/>
-        <param name="d" type="integer" value="500" label="Estimated insert size for paired-end reads" />
-      </when>
-    </conditional>
-    <param name="k" type="integer" value="23" label="Size of k for forming the de Bruijn overlap graph" />
-  </inputs>
-  <configfiles>
-    <configfile name="soap_config">max_rd_len=105
-[LIB]
-#if $inputs.read_type == "single"
-q=${inputs.input1.file_name}
-#else
-avg_ins=${inputs.d}
-asm_flags=3
-reverse_seq=0
-q1=${inputs.input1.file_name}
-q2=${inputs.input2.file_name}
-#end if
-    </configfile>
-  </configfiles>
-  <outputs>
-    <data name="assembled_contigs" format="fasta" from_work_dir="assembly.scafSeq" label="Assembled contigs from ${on_string}" />
-  </outputs>
-  <help>
-
-**What it does**
-
-Runs SOAPdenovo_ to generate a genome assembly
-using single-fragment or paired-end short reads.
-
-Li R, Zhu H, Ruan J, Qian W, Fang X, Shi Z, Li Y, Li S, Shan G, Kristiansen K, Li S, Yang H, Wang J, Wang J.
-"De novo assembly of human genomes with massively parallel short read sequencing."
-*Genome Res.* 2010 Feb;20(2):265-72.
-
-.. _SOAPdenovo: http://soap.genomics.org.cn/soapdenovo.html
-
-**Parameter list**
-
-k
-    k-mer size for constructing the de Bruijn graph.  The appropriate size of k is genome and data set dependent, but a good starting choice might be 75% of the read length.
-
-Insert size
-    For paired-end libraries, the expected insert size.
-
-**Output**
-
-assembly
-
-  </help>
-</tool>
-
-
--- a/tools/indels/indel_analysis.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,227 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Given an input sam file, provides analysis of the indels..
-
-usage: %prog [options] [input3 sum3[ input4 sum4[ input5 sum5[...]]]]
-   -i, --input=i: The sam file to analyze
-   -t, --threshold=t: The deletion frequency threshold
-   -I, --out_ins=I: The interval output file showing insertions
-   -D, --out_del=D: The interval output file showing deletions
-"""
-
-import re, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def add_to_mis_matches( mis_matches, pos, bases ):
-    """
-    Adds the bases and counts to the mis_matches dict
-    """
-    for j, base in enumerate( bases ):
-        try:
-            mis_matches[ pos + j ][ base ] += 1
-        except KeyError:
-            try:
-                mis_matches[ pos + j ][ base ] = 1
-            except KeyError:
-                mis_matches[ pos + j ] = { base: 1 }
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    # prep output files
-    out_ins = open( options.out_ins, 'wb' )
-    out_del = open( options.out_del, 'wb' )
-    # patterns
-    pat = re.compile( '^((?P<lmatch>\d+)M(?P<ins_del_width>\d+)(?P<ins_del>[ID])(?P<rmatch>\d+)M)$|((?P<match_width>\d+)M)$' )
-    pat_multi = re.compile( '(\d+[MIDNSHP])(\d+[MIDNSHP])(\d+[MIDNSHP])+' )
-    # for tracking occurences at each pos of ref
-    mis_matches = {}
-    indels = {}
-    multi_indel_lines = 0
-    # go through all lines in input file
-    for i,line in enumerate( open( options.input, 'rb' ) ):
-        if line.strip() and not line.startswith( '#' ) and not line.startswith( '@' ) :
-            split_line = line.split( '\t' )
-            chrom = split_line[2].strip()
-            pos = int( split_line[3].strip() )
-            cigar = split_line[5].strip()
-            bases = split_line[9].strip()
-            # if not an indel or match, exit
-            if chrom == '*':
-                continue
-            # find matches like 3M2D7M or 7M3I10M
-            match = {}
-            m = pat.match( cigar )
-            # unprocessable CIGAR
-            if not m:
-                m = pat_multi.match( cigar )
-                # skip this line if no match
-                if not m:
-                    continue
-                # account for multiple indels or operations we don't process
-                else:
-                    multi_indel_lines += 1
-            # get matching parts for the indel or full match if matching
-            else:
-                if not mis_matches.has_key( chrom ):
-                    mis_matches[ chrom ] = {}
-                    indels[ chrom ] = { 'D': {}, 'I': {} }
-                parts = m.groupdict()
-                if parts[ 'match_width' ] or ( parts[ 'lmatch' ] and parts[ 'ins_del_width' ] and parts[ 'rmatch' ] ):
-                    match = parts
-            # see if matches meet filter requirements
-            if match:
-                # match/mismatch
-                if parts[ 'match_width' ]:
-                    add_to_mis_matches( mis_matches[ chrom ], pos, bases )
-                # indel
-                else:
-                    # pieces of CIGAR string
-                    left = int( match[ 'lmatch' ] )
-                    middle = int( match[ 'ins_del_width' ] )
-                    right = int( match[ 'rmatch' ] )
-                    left_bases = bases[ : left ]
-                    if match[ 'ins_del' ] == 'I':
-                        middle_bases = bases[ left : left + middle ]
-                    else:
-                        middle_bases = ''
-                    right_bases = bases[ -right : ]
-                    start = pos + left
-                    # add data to ref_pos dict for match/mismatch bases on left and on right
-                    add_to_mis_matches( mis_matches[ chrom ], pos, left_bases )
-                    if match[ 'ins_del' ] == 'I':
-                        add_to_mis_matches( mis_matches[ chrom ], start, right_bases )
-                    else:
-                        add_to_mis_matches( mis_matches[ chrom ], start + middle, right_bases )
-                    # for insertions, count instances of particular inserted bases
-                    if match[ 'ins_del' ] == 'I':
-                        if indels[ chrom ][ 'I' ].has_key( start ):
-                            try:
-                                indels[ chrom ][ 'I' ][ start ][ middle_bases ] += 1
-                            except KeyError:
-                                indels[ chrom ][ 'I' ][ start ][ middle_bases ] = 1
-                        else:
-                            indels[ chrom ][ 'I' ][ start ] = { middle_bases: 1 }
-                    # for deletions, count number of deletions bases
-                    else:
-                        if indels[ chrom ][ 'D' ].has_key( start ):
-                            try:
-                                indels[ chrom ][ 'D' ][ start ][ middle ] += 1
-                            except KeyError:
-                                indels[ chrom ][ 'D' ][ start ][ middle ] = 1
-                        else:
-                            indels[ chrom ][ 'D' ][ start ] = { middle: 1 }
-    # compute deletion frequencies and insertion frequencies for checking against threshold
-    freqs = {}
-    ins_freqs = {}
-    chroms = mis_matches.keys()
-    chroms.sort()
-    for chrom in chroms:
-        freqs[ chrom ] = {}
-        ins_freqs[ chrom ] = {}
-        poses = mis_matches[ chrom ].keys()
-        poses.extend( indels[ chrom ][ 'D' ].keys() )
-        poses.extend( indels[ chrom ][ 'I' ].keys() )
-        poses = list( set( poses ) )
-        for pos in poses:
-            # all reads touching this particular position
-            freqs[ chrom ][ pos ] = {}
-            sum_counts = 0.0
-            sum_counts_end = 0.0
-            # get basic counts (match/mismatch)
-            try:
-                sum_counts += float( sum( mis_matches[ chrom ][ pos ].values() ) )
-            except KeyError:
-                pass
-            try:
-                sum_counts_end += float( sum( mis_matches[ chrom ][ pos + 1 ].values() ) )
-            except KeyError:
-                pass
-            # add deletions also touching this position
-            try:
-                sum_counts += float( sum( indels[ chrom ][ 'D' ][ pos ].values() ) )
-            except KeyError:
-                pass
-            try:
-                sum_counts_end += float( sum( indels[ chrom ][ 'D' ][ pos + 1 ].values() ) )
-            except KeyError:
-                pass
-            freqs[ chrom ][ pos ][ 'total' ] = sum_counts
-            # calculate actual frequencies
-            # deletions
-            # frequencies for deletions
-            try:
-                for d in indels[ chrom ][ 'D' ][ pos ].keys():
-                    freqs[ chrom ][ pos ][ d ] = indels[ chrom ][ 'D' ][ pos ][ d ] / sum_counts
-            except KeyError:
-                pass
-            # frequencies for matches/mismatches
-            try:
-                for base in mis_matches[ chrom ][ pos ].keys():
-                    try:
-                        prop = float( mis_matches[ chrom ][ pos ][ base ] ) / sum_counts
-                        freqs[ chrom ][ pos ][ base ] = prop
-                    except ZeroDivisionError:
-                        freqs[ chrom ][ pos ][ base ] = 0.0
-            except KeyError:
-                pass
-            # insertions
-            try:
-                for bases in indels[ chrom ][ 'I' ][ pos ].keys():
-                    prop_start = indels[ chrom ][ 'I' ][ pos ][ bases ] / ( indels[ chrom ][ 'I' ][ pos ][ bases ] + sum_counts )
-                    try:
-                        prop_end = indels[ chrom ][ 'I' ][ pos ][ bases ] / ( indels[ chrom ][ 'I' ][ pos ][ bases ] + sum_counts_end )
-                    except ZeroDivisionError:
-                        prop_end = 0.0
-                    try:
-                        ins_freqs[ chrom ][ pos ][ bases ] = [ prop_start, prop_end ]
-                    except KeyError:
-                        ins_freqs[ chrom ][ pos ] = { bases: [ prop_start, prop_end ] }
-            except KeyError, e:
-                pass
-    # output to files if meet threshold requirement
-    threshold = float( options.threshold )
-    #out_del.write( '#Chrom\tStart\tEnd\t#Del\t#Reads\t%TotReads\n' )
-    #out_ins.write( '#Chrom\tStart\tEnd\tInsBases\t#Reads\t%TotReadsAtStart\t%ReadsAtEnd\n' )
-    for chrom in chroms:
-        # deletions file
-        poses = indels[ chrom ][ 'D' ].keys()
-        poses.sort()
-        for pos in poses:
-            start = pos
-            dels = indels[ chrom ][ 'D' ][ start ].keys()
-            dels.sort()
-            for d in dels:
-                end = start + d
-                prop = freqs[ chrom ][ start ][ d ]
-                if prop > threshold :
-                    out_del.write( '%s\t%s\t%s\t%s\t%.2f\n' % ( chrom, start, end, indels[ chrom ][ 'D' ][ pos ][ d ], 100.0 * prop ) )
-        # insertions file
-        poses = indels[ chrom ][ 'I' ].keys()
-        poses.sort()
-        for pos in poses:
-            start = pos
-            end = pos + 1
-            ins_bases = indels[ chrom ][ 'I' ][ start ].keys()
-            ins_bases.sort()
-            for bases in ins_bases:
-                prop_start = ins_freqs[ chrom ][ start ][ bases ][0]
-                prop_end = ins_freqs[ chrom ][ start ][ bases ][1]
-                if prop_start > threshold or prop_end > threshold:
-                    out_ins.write( '%s\t%s\t%s\t%s\t%s\t%.2f\t%.2f\n' % ( chrom, start, end, bases, indels[ chrom ][ 'I' ][ start ][ bases ], 100.0 * prop_start, 100.0 * prop_end ) )
-    # close out files
-    out_del.close()
-    out_ins.close()
-    # if skipped lines because of more than one indel, output message
-    if multi_indel_lines > 0:
-        sys.stdout.write( '%s alignments were skipped because they contained more than one indel.' % multi_indel_lines )
-
-if __name__=="__main__": __main__()
--- a/tools/indels/indel_analysis.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,167 +0,0 @@
-<tool id="indel_analysis" name="Indel Analysis" version="1.0.0">
-  <description></description>
-  <command interpreter="python">
-    indel_analysis.py
-      --input=$input1
-      --threshold=$threshold
-      --out_ins=$out_ins
-      --out_del=$out_del
-  </command>
-  <inputs>
-    <param format="sam" name="input1" type="data" label="Select sam file to analyze" />
-    <param name="threshold" type="float" value="0.015" size="5" label="Frequency threshold" help="Cutoff" />
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_del" />
-    <data format="interval" name="out_ins" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="indel_analysis_in1.sam" ftype="sam"/>
-      <param name="threshold" value="0.017"/>
-      <output name="out_del" file="indel_analysis_out1.interval" ftype="interval"/>
-      <output name="out_ins" file="indel_analysis_out2.interval" ftype="interval"/>
-    </test>
-    <test>
-      <param name="input1" value="indel_analysis_in2.sam" ftype="sam"/>
-      <param name="threshold" value="0.09"/>
-      <output name="out_del" file="indel_analysis_out3.interval" ftype="interval"/>
-      <output name="out_ins" file="indel_analysis_out4.interval" ftype="interval"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Given an input sam file, this tool provides analysis of the indels. It filters out matches that do not meet the frequency threshold. The way this frequency of occurence is calculated is different for deletions and insertions. The CIGAR string's "M" can indicate an exact match or a mismatch. For SAM containing the following bits of information (assuming the reference "ACTGCTCGAT")::
-
- CHROM  POS   CIGAR  SEQ
-   ref    3  2M1I3M  TACTTC
-   ref    1  2M1D3M  ACGCT
-   ref    4  4M2I3M  GTTCAAGAT
-   ref    2  2M2D3M  CTCCG
-   ref    1  3M1D4M  AACCTGG
-   ref    6  3M1I2M  TTCAAT
-   ref    5  3M1I3M  CTCTGTT
-   ref    7      4M  CTAT
-   ref    5      5M  CGCTA
-   ref    3  2M1D2M  TGCC
-
-The following totals would be calculated (this is an intermediate step and not output)::
-
- -------------------------------------------------------------------------------------------------------
-  POS  BASE  NUMREADS  DELPROPCALC  DELPROP  INSPROPSTARTCALC  INSSTARTPROP  INSPROPENDCALC  INSENDPROP
- -------------------------------------------------------------------------------------------------------
-    1     A         2          2/2     1.00               ---           ---             ---         ---
-    2     A         1          1/3     0.33               ---           ---             ---         ---
-          C         2          2/3     0.67               ---           ---             ---         ---
-    3     C         1          1/5     0.20               ---           ---             ---         ---
-          T         3          3/5     0.60               ---           ---             ---         ---
-          -         1          1/5     0.20               ---           ---             ---         ---
-    4     A         1          1/6     0.17               ---           ---             ---         ---
-          G         3          3/6     0.50               ---           ---             ---         ---
-          -         1          1/6     0.17               ---           ---             ---         ---
-         --         1          1/6     0.17               ---           ---             ---         ---
-    5     C         4          4/7     0.57               ---           ---             ---         ---
-          T         2          2/7     0.29               ---           ---             ---         ---
-          -         1          1/7     0.14               ---           ---             ---         ---
-         +C         1          ---      ---               1/7          0.14             1/9        0.11
-    6     C         2          2/9     0.22               ---           ---             ---         ---
-          G         1          1/9     0.11               ---           ---             ---         ---
-          T         6          6/9     0.67               ---           ---             ---         ---
-    7     C         7          7/9     0.78               ---           ---             ---         ---
-          G         1          1/9     0.11               ---           ---             ---         ---
-          T         1          1/9     0.11               ---           ---             ---         ---
-    8     C         1          1/7     0.14               ---           ---             ---         ---
-          G         4          4/7     0.57               ---           ---             ---         ---
-          T         2          2/7     0.29               ---           ---             ---         ---
-         +T         1          ---      ---               1/8          0.13             1/6        0.17
-        +AA         1          ---      ---               1/8          0.13             1/6        0.17
-    9     A         4          4/5     0.80               ---           ---             ---         ---
-          T         1          1/5     0.20               ---           ---             ---         ---
-         +A         1          ---      ---               1/6          0.17             1/5        0.20
-   10     T         4          4/4     1.00               ---           ---             ---         ---
-
-The general idea for calculating these is that we want to find out the proportion of times a particular event occurred at a position among all reads that touch that base in some way. First, the basic total number of reads at a given position is the number of reads with each particular base plus the number of reads with that a deletion at that given position (including the bases that are "mismatches"). Note that deletions of two bases and one base would be counted completely separately. Insertions are not counted in this total. For position 4 above, the reference base is G, and there are 3 occurrences of it along with one mismatching base, A. Also, there is a 1-base deletion and another 2-base deletion. So there are a total of 5 matches/mismatches/deletions, and the proportions for each base are 1/6 = 0.17 (A) and 3/6 = 0.50 (G), and for each deletion it is 1/6 = 0.17.
-
-Insertions are slightly more complicated. We actually want to get the frequency of occurrence for both the associated start and end positions, since an insertion appears between those two bases. Each insertion is regarded individually, and the total number of occurrences of that insertion is divided by the sum of the number of its occurrences and the basic total for either the start or end. So for the insertions at position 8, there are a total of 7 matches/mismatches/deletions at position 8, and two insertions that each occur once, so each has an INSSTARTPROP of 1/8 = 0.13. For the end position there are 5 matches/mismatches/deletions, so the INSENDPROP is 1/6 = 0.17 for both insertions (T and AA).
-
-These proportions (DELPROP and either INSSTARTPROP or INSENDPROP) need to be greater than the threshold frequency specified by the user in order for that base, deletion or insertion to be included in the output.
-
-
-** Output format **
-
-The output varies for deletions and insertions, although for both, the first three columns are chromosome, start position, and end position.
-
-Columns in the deletions file::
-
-                        Column  Description
- -----------------------------  ---------------------------------------------------------------------------------------------------
-  1                      Chrom  Chromosome
-  2                      Start  Starting position
-  3                        End  Ending position
-  4                   Coverage  Number of reads containing this exact deletion
-  5       Frequency Percentage  Frequency of this exact deletion (2 and 1 are mutually exclusive, for instance), as percentage (%)
-
-Columns in the insertions file::
-
-                   Column  Description
- ------------------------  -----------------------------------------------------------------------------------------------------------------
-  1                 Chrom  Chromosome
-  2                 Start  Starting position
-  3                   End  Ending position (always Start + 1 for insertions)
-  4      Inserted Base(s)  The exact base(s) inserted at Start position
-  5              Coverage  Number of reads containing this exact insertion
-  6  Freq. Perc. at Start  Frequency of this exact insertion given Start position ("GG" and "G" are considered distinct), as percentage (%)
-  7    Freq. Perc. at End  Frequency of this exact insertion given End position ("GG" and "G" are considered distinct), as percentage (%)
-
-Before using this tool, you may will want to use the Filter SAM for indels tool to filter out indels on bases with insufficient quality scores, but this is not required.
-
-
------
-
-**Example**
-
-If you set the threshold to 0.0 and have the following SAM file::
-
- r327     16   chrM   11   37      8M1D10M   *   0   0             CTTACCAGATAGTCATCA   -+&lt;2;?@BA@?-,.+4=4             XT:A:U  NM:i:1  X0:i:1  X1:i:0  XM:i:0  XO:i:1  XG:i:1  MD:Z:41^C35
- r457      0   chr1   14   37          14M   *   0   0                 ACCTGACAGATATC   =/DF;?@1A@?-,.                 XT:A:U  NM:i:0  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r501     16   chrM    6   23      7M1I13M   *   0   0          TCTGTGCCTACCAGACATTCA   +=$2;?@BA@?-,.+4=4=4A          XT:A:U  NM:i:3  X0:i:1  X1:i:1  XM:i:2  XO:i:1  XG:i:1  MD:Z:28C36G9        XA:Z:chrM,+134263658,14M1I61M,4;
- r1288    16   chrM    8   37      11M1I7M   *   0   0            TCACTTACCTGTACACACA   /*F2;?@%A@?-,.+4=4=            XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T0T1A69
- r1902     0   chr1    4   37      7M2D18M   *   0   0        AGTCTCTTACCTGACGGTTATGA   &lt;2;?@BA@?-,.+4=4=4AA663        XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
- r2204    16   chrM    9    0          19M   *   0   0            CTGGTACCTGACAGGTATC   2;?@BA@?-,.+4=4=4AA            XT:A:R  NM:i:1  X0:i:2  X1:i:0  XM:i:1  XO:i:0  XG:i:0  MD:Z:0T75           XA:Z:chrM,-564927,76M,1;
- r2314    16   chrM    6   37      10M2D8M   *   0   0               TCACTCTTACGTCTGA   &lt;2;?@BA@?-,.+4=4               XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:25A5^CA45
- r3001     0   chrM   13   37   3M1D5M2I7M   *   0   0              TACAGTCACCCTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
- r3218     0   chr1   13   37       8M1D7M   *   0   0                TACAGTCACTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
- r4767    16   chr2    3   37      15M2I7M   *   0   0       CAGACTCTCTTACCAAAGACAGAC   &lt;2;?@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T1A4T65
- r5333     0   chrM    5   37      17M1D8M   *   0   0       GTCTCTCATACCAGACAACGGCAT   FB3$@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:45C10^C0C5C13
- r6690    16   chrM    7   23          20M   *   0   0           CTCTCTTACCAGACAGACAT   2;?@BA/(@?-,.+4=4=4A           XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76             XA:Z:chrM,-568532,76M,1;
- r7211     0   chrM    7   37          24M   *   0   0       CGACAGAGACAAAATAACATTTAA   //&lt;2;?@BA@?-,.+4=442;;6:       XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:2  XO:i:1  XG:i:1  MD:Z:73G0G0
- r9922    16   chrM    4    0       7M3I9M   *   0   0            CCAGACATTTGAAATCAGG   F/D4=44^D++26632;;6            XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r9987    16   chrM    4    0      9M1I18M   *   0   0   AGGTTCTCATTACCTGACACTCATCTTG   G/AD6"/+4=4426632;;6:&lt;2;?@BA   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r10145   16   chr1   16    0       5M2D7M   *   0   0                   CACATTGTTGTA   G//+4=44=4AA                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r10324   16   chrM   15    0       6M1D5M   *   0   0                   CCGTTCTACTTG   A@??8.G//+4=                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r12331   16   chrM   17    0       4M2I6M   *   0   0                  AGTCGAATACGTG   632;;6:&lt;2;?@B                  XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r12914   16   chr2   24    0       4M3I3M   *   0   0                     ACTACCCCAA   G//+4=42,.                     XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
-
-The following will be produced (deletions file followed by insertions file)::
-
- chr1   11   13   1   100.00
- chr1   21   22   1    25.00
- chr1   21   23   1    25.00
- chrM   16   18   1     9.09
- chrM   19   20   1     8.33
- chrM   21   22   1     9.09
- chrM   22   23   1     9.09
-
- chr2   18   19    AA   1   50.00   50.00
- chr2   28   29   CCC   1   50.00   50.00
- chrM   11   12   TTT   1    9.09    9.09
- chrM   13   14     C   1    9.09    9.09
- chrM   13   14     T   1    9.09    9.09
- chrM   19   20     T   1    7.69    8.33
- chrM   21   22    GA   1    8.33    8.33
-
-
-  </help>
-</tool>
--- a/tools/indels/indel_sam2interval.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Allows user to filter out non-indels from SAM.
-
-usage: %prog [options]
-   -i, --input=i: The input SAM file
-   -u, --include_base=u: Whether or not to include the base for insertions
-   -c, --collapse=c: Wheter to collapse multiple occurrences of a location with counts shown
-   -o, --int_out=o: The interval output file for the converted SAM file
-   -b, --bed_ins_out=b: The bed output file with insertions only for the converted SAM file
-   -d, --bed_del_out=d: The bed output file with deletions only for the converted SAM file
-"""
-
-import re, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def numeric_sort( text1, text2 ):
-    """
-    For two items containing space-separated text, compares equivalent pieces
-    numerically if both numeric or as text otherwise
-    """
-    pieces1 = text1.split()
-    pieces2 = text2.split()
-    if len( pieces1 ) == 0:
-        return 1
-    if len( pieces2 ) == 0:
-        return -1
-    for i, pc1 in enumerate( pieces1 ):
-        if i == len( pieces2 ):
-            return 1
-        if not pieces2[i].isdigit():
-            if pc1.isdigit():
-                return -1
-            else:
-                if pc1 > pieces2[i]:
-                    return 1
-                elif pc1 < pieces2[i]:
-                    return -1
-        else:
-            if not pc1.isdigit():
-                return 1
-            else:
-                if int( pc1 ) > int( pieces2[i] ):
-                    return 1
-                elif int( pc1 ) < int( pieces2[i] ):
-                    return -1
-    if i < len( pieces2 ) - 1:
-        return -1
-    return 0
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-
-    # open up output files
-    output = open( options.int_out, 'wb' )
-    if options.bed_ins_out != 'None':
-        output_bed_ins = open( options.bed_ins_out, 'wb' )
-    else:
-        output_bed_ins = None
-    if options.bed_del_out != 'None':
-        output_bed_del = open( options.bed_del_out, 'wb' )
-    else:
-        output_bed_del = None
-
-    # the pattern to match, assuming just one indel per cigar string
-    pat_indel = re.compile( '^(?P<lmatch>\d+)M(?P<ins_del_width>\d+)(?P<ins_del>[ID])(?P<rmatch>\d+)M$' )
-    pat_multi = re.compile( '(\d+[MIDNSHP])(\d+[MIDNSHP])(\d+[MIDNSHP])+' )
-
-    # go through all lines in input file
-    out_data = {}
-    multi_indel_lines = 0
-    for line in open( options.input, 'rb' ):
-        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
-            split_line = line.split( '\t' )
-            if split_line < 12:
-                continue
-            # grab relevant pieces
-            cigar = split_line[5].strip()
-            pos = int( split_line[3] )
-            chr = split_line[2]
-            base_string = split_line[9]
-            # parse cigar string
-            m = pat_indel.match( cigar )
-            if not m:
-                m = pat_multi.match( cigar )
-                # skip this line if no match
-                if not m:
-                    continue
-                # account for multiple indels or operations we don't process
-                else:
-                    multi_indel_lines += 1
-                continue
-            else:
-                match = m.groupdict()
-            left = int( match[ 'lmatch' ] )
-            middle = int( match[ 'ins_del_width' ] )
-            middle_type = match[ 'ins_del' ]
-            bases = base_string[ left : left + middle ]
-            # calculate start and end positions, and output to insertion or deletion file
-            start = left + pos
-            if middle_type == 'D':
-                end = start + middle
-                data = [ chr, start, end, 'D' ]
-                if options.include_base == "true":
-                    data.append( '-' )
-            else:
-                end = start + 1
-                data = [ chr, start, end, 'I' ]
-                if options.include_base == "true":
-                    data.append( bases )
-            location = '\t'.join( [ '%s' % d for d in data ] )
-            try:
-                out_data[ location ] += 1
-            except KeyError:
-                out_data[ location ] = 1
-    # output to interval file
-    # get all locations and sort
-    locations = out_data.keys()
-    locations.sort( numeric_sort )
-    last_line = ''
-    # output each location, either with counts or each occurrence
-    for loc in locations:
-        sp_loc = loc.split( '\t' )
-        cur_line = '\t'.join( sp_loc[:3] )
-        if options.collapse == 'true':
-            output.write( '%s\t%s\n' % ( loc, out_data[ loc ] ) )
-            if output_bed_del and sp_loc[3] == 'D':
-                output_bed_del.write( '%s\n' % cur_line )
-            if output_bed_ins and sp_loc[3] == 'I' and last_line != cur_line:
-                output_bed_ins.write( '%s\n' % cur_line )
-                last_line = cur_line
-        else:
-            for i in range( out_data[ loc ] ):
-                output.write( '%s\n' % loc )
-                if output_bed_del or output_bed_ins:
-                    if output_bed_del and sp_loc[3] == 'D':
-                        output_bed_del.write( '%s\n' % cur_line )
-                    if output_bed_ins and sp_loc[3] == 'I':
-                        output_bed_ins.write( '%s\n' % cur_line )
-
-    # cleanup, close files
-    if output_bed_ins:
-        output_bed_ins.close()
-    if output_bed_del:
-        output_bed_del.close()
-    output.close()
-
-    # if skipped lines because of more than one indel, output message
-    if multi_indel_lines > 0:
-        sys.stdout.write( '%s alignments were skipped because they contained more than one indel.' % multi_indel_lines )
-
-if __name__=="__main__": __main__()
--- a/tools/indels/indel_sam2interval.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-<tool id="indel_sam2interval" name="Extract indels" version="1.0.0">
-  <description>from SAM</description>
-  <command interpreter="python">
-    indel_sam2interval.py
-      --input=$input1
-      --include_base=$include_base
-      --collapse=$collapse
-      --int_out=$output1
-      #if $ins_out.include_ins_out == "true"
-        --bed_ins_out=$output2
-      #else
-        --bed_ins_out="None"
-      #end if
-      #if $del_out.include_del_out == "true"
-        --bed_del_out=$output3
-      #else
-        --bed_del_out="None"
-      #end if
-  </command>
-  <inputs>
-    <param format="sam" name="input1" type="data" label="Select dataset to convert" />
-    <param name="include_base" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include the relevant base(s) for each insertion (and a dash (-) for deletions)" />
-    <param name="collapse" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Collapse repeated locations onto single line with counts" />
-    <conditional name="ins_out">
-      <param name="include_ins_out" type="select" label="Include insertions output bed file?">
-        <option value="true">Yes</option>
-        <option value="false">No</option>
-      </param>
-      <when value="true" />
-      <when value="false" />
-    </conditional>
-    <conditional name="del_out">
-      <param name="include_del_out" type="select" label="Include deletions output bed file?">
-        <option value="true">Yes</option>
-        <option value="false">No</option>
-      </param>
-      <when value="true" />
-      <when value="false" />
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="interval" name="output1" />
-    <data format="bed" name="output2">
-      <filter>ins_out[ "include_ins_out" ] == "true"</filter>
-    </data>
-    <data format="bed" name="output3">
-      <filter>del_out[ "include_del_out" ] == "true"</filter>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="indel_sam2interval_in1.sam" ftype="sam"/>
-      <param name="include_base" value="true"/>
-      <param name="collapse" value="true"/>
-      <param name="include_ins_out" value="true" />
-      <param name="include_del_out" value="true" />
-      <output name="output1" file="indel_sam2interval_out1.interval" ftype="interval"/>
-      <output name="output2" file="indel_sam2interval_out2.bed" ftype="bed"/>
-      <output name="output3" file="indel_sam2interval_out3.bed" ftype="bed"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Given a SAM file containing indels, converts these to an interval file with a column indicating whether it is an insertion or a deletion, and then also can create a BED file for each type (one for insertions, one for deletions). The interval file can be combined with other like files to create a table useful for analysis with the Indel Analysis Table tool. The BED files can be useful for visualizing the reads.
-
------
-
-**Example**
-
-Suppose you have the following mapping results::
-
- r327     16   chrM   11   37      8M1D10M   *   0   0             CTTACCAGATAGTCATCA   -+&lt;2;?@BA@?-,.+4=4             XT:A:U  NM:i:1  X0:i:1  X1:i:0  XM:i:0  XO:i:1  XG:i:1  MD:Z:41^C35
- r457      0   chr1   14   37          14M   *   0   0                 ACCTGACAGATATC   =/DF;?@1A@?-,.                 XT:A:U  NM:i:0  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r501     16   chrM    6   23      7M1I13M   *   0   0          TCTGTGCCTACCAGACATTCA   +=$2;?@BA@?-,.+4=4=4A          XT:A:U  NM:i:3  X0:i:1  X1:i:1  XM:i:2  XO:i:1  XG:i:1  MD:Z:28C36G9        XA:Z:chrM,+134263658,14M1I61M,4;
- r1288    16   chrM    8   37      11M1I7M   *   0   0            TCACTTACCTGTACACACA   /*F2;?@%A@?-,.+4=4=            XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T0T1A69
- r1902     0   chr1    4   37      7M2D18M   *   0   0        AGTCTCTTACCTGACGGTTATGA   &lt;2;?@BA@?-,.+4=4=4AA663        XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
- r2204    16   chrM    9    0          19M   *   0   0            CTGGTACCTGACAGGTATC   2;?@BA@?-,.+4=4=4AA            XT:A:R  NM:i:1  X0:i:2  X1:i:0  XM:i:1  XO:i:0  XG:i:0  MD:Z:0T75           XA:Z:chrM,-564927,76M,1;
- r2314    16   chrM    6   37      10M2D8M   *   0   0               TCACTCTTACGTCTGA   &lt;2;?@BA@?-,.+4=4               XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:25A5^CA45
- r3001     0   chrM   13   37   3M1D5M2I7M   *   0   0              TACAGTCACCCTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
- r3218     0   chr1   13   37       8M1D7M   *   0   0                TACAGTCACTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
- r4767    16   chr2    3   37      15M2I7M   *   0   0       CAGACTCTCTTACCAAAGACAGAC   &lt;2;?@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T1A4T65
- r5333     0   chrM    5   37      17M1D8M   *   0   0       GTCTCTCATACCAGACAACGGCAT   FB3$@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:45C10^C0C5C13
- r6690    16   chrM    7   23          20M   *   0   0           CTCTCTTACCAGACAGACAT   2;?@BA/(@?-,.+4=4=4A           XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76             XA:Z:chrM,-568532,76M,1;
- r7211     0   chrM    7   37          24M   *   0   0       CGACAGAGACAAAATAACATTTAA   //&lt;2;?@BA@?-,.+4=442;;6:       XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:2  XO:i:1  XG:i:1  MD:Z:73G0G0
- r7899    69      *    0    0            *   *   0   0       CTGCGTGTTGGTGTCTACTGGGGT   #%#'##$#$##&amp;%#%$$$%#%#'#
- r9192   133      *    0    0            *   *   0   0       GTGCGTCGGGGAGGGTGCTGTCGG   ######%#$%#$$###($###&amp;&amp;%
- r9922    16   chrM    4    0       7M3I9M   *   0   0            CCAGACATTTGAAATCAGG   F/D4=44^D++26632;;6            XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r9987    16   chrM    4    0      9M1I18M   *   0   0   AGGTTCTCATTACCTGACACTCATCTTG   G/AD6"/+4=4426632;;6:&lt;2;?@BA   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r10145   16   chr1   16    0       5M2D7M   *   0   0                   CACATTGTTGTA   G//+4=44=4AA                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r10324   16   chrM   15    0       6M1D5M   *   0   0                   CCGTTCTACTTG   A@??8.G//+4=                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r12331   16   chrM   17    0       4M2I6M   *   0   0                  AGTCGAATACGTG   632;;6:&lt;2;?@B                  XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r12914   16   chr2   24    0       4M3I3M   *   0   0                     ACTACCCCAA   G//+4=42,.                     XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
- r13452   16   chrM   13    0      3M1D11M   *   0   0                 TACGTCACTCATCA   IIIABCCCICCCCI                 XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
-
-
-The following three files will be produced (Interval, Insertions BED and Deletions BED)::
-
- chr1   11   13   D     -   1
- chr1   21   22   D     -   1
- chr1   21   23   D     -   1
- chr2   18   19   I    AA   1
- chr2   28   29   I   CCC   1
- chrM   11   12   I   TTT   1
- chrM   13   14   I     C   1
- chrM   13   14   I     T   1
- chrM   16   17   D     -   1
- chrM   16   18   D     -   1
- chrM   19   20   D     -   1
- chrM   19   20   I     T   1
- chrM   21   22   D     -   1
- chrM   21   22   I    GA   1
- chrM   22   23   D     -   1
-
- chr2   18   19
- chr2   28   29
- chrM   11   12
- chrM   13   14
- chrM   13   14
- chrM   19   20
- chrM   21   22
-
- chr1   11   13
- chr1   21   22
- chr1   21   23
- chrM   16   17
- chrM   16   18
- chrM   19   20
- chrM   21   22
- chrM   22   23
-
-For more information on SAM, please consult the `SAM format description`__.
-
-.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
-
-
-  </help>
-</tool>
--- a/tools/indels/indel_table.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Combines several interval files containing indels with counts. All input files need to have the same number of columns.
-
-usage: %prog [options] [input3 sum3[ input4 sum4[ input5 sum5[...]]]]
-   -1, --input1=1: The first input file
-   -s, --sum1=s: Whether or not to include the totals from first file in overall total
-   -2, --input2=2: The second input file
-   -S, --sum2=S: Whether or not to include the totals from second file in overall total
-   -o, --output=o: The interval output file for the combined files
-"""
-
-import re, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def numeric_sort( text1, text2 ):
-    """
-    For two items containing space-separated text, compares equivalent pieces
-    numerically if both numeric or as text otherwise
-    """
-    pieces1 = text1.split()
-    pieces2 = text2.split()
-    if len( pieces1 ) == 0:
-        return 1
-    if len( pieces2 ) == 0:
-        return -1
-    for i, pc1 in enumerate( pieces1 ):
-        if i == len( pieces2 ):
-            return 1
-        if not pieces2[i].isdigit():
-            if pc1.isdigit():
-                return -1
-            else:
-                if pc1 > pieces2[i]:
-                    return 1
-                elif pc1 < pieces2[i]:
-                    return -1
-        else:
-            if not pc1.isdigit():
-                return 1
-            else:
-                if int( pc1 ) > int( pieces2[i] ):
-                    return 1
-                elif int( pc1 ) < int( pieces2[i] ):
-                    return -1
-    if i < len( pieces2 ) - 1:
-        return -1
-    return 0
-
-def __main__():
-    # Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    inputs = [ options.input1, options.input2 ]
-    includes = [ options.sum1, options.sum2 ]
-    inputs.extend( [ a for i, a in enumerate( args ) if i % 2 == 0 ] )
-    includes.extend( [ a for i, a in enumerate( args ) if i % 2 == 1 ] )
-    num_cols = 0
-    counts = {}
-    # read in data from all files and get total counts
-    try:
-        for i, input in enumerate( inputs ):
-            for line in open( input, 'rb' ):
-                sp_line = line.strip().split( '\t' )
-                # set num_cols on first pass
-                if num_cols == 0:
-                    if len( sp_line ) < 4:
-                        raise Exception, 'There need to be at least 4 columns in the file: Chrom, Start, End, and Count'
-                    num_cols = len( sp_line )
-                # deal with differing number of columns
-                elif len( sp_line ) != num_cols:
-                    raise Exception, 'All of the files need to have the same number of columns (current %s != %s of first line)' % ( len( sp_line ), num_cols )
-                # get actual counts for each indel
-                indel = '\t'.join( sp_line[:-1] )
-                try:
-                    count = int( sp_line[-1] )
-                except ValueError, e:
-                    raise Exception, 'The last column of each file must be numeric, with the count of the number of instances of that indel: %s' % str( e )
-                # total across all included files
-                if includes[i] == "true":
-                    try:
-                        counts[ indel ]['tot'] += count
-                    except ( IndexError, KeyError ):
-                        counts[ indel ] = { 'tot': count }
-                # counts for ith file
-                counts[ indel ][i] = count
-    except Exception, e:
-        stop_err( 'Failed to read all input files:\n%s' % str( e ) )
-    # output combined results to table file
-    try:
-        output = open( options.output, 'wb' )
-        count_keys = counts.keys()
-        count_keys.sort( numeric_sort )
-        for indel in count_keys:
-            count_out = [ str( counts[ indel ][ 'tot' ] ) ]
-            for i in range( len( inputs ) ):
-                try:
-                    count_out.append( str( counts[ indel ][i] ) )
-                except KeyError:
-                    count_out.append( '0' )
-            output.write( '%s\t%s\n' % ( indel, '\t'.join( count_out ) ) )
-        output.close()
-    except Exception, e:
-        stop_err( 'Failed to output data: %s' % str( e ) )
-
-if __name__=="__main__": __main__()
--- a/tools/indels/indel_table.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-<tool id="indel_table" name="Indel Analysis Table" version="1.0.0">
-  <description>for combining indel interval data</description>
-  <command interpreter="python">
-    indel_table.py
-      --input1=$input1
-      --sum1=$sum1
-      --input2=$input2
-      --sum2=$sum2
-      --output=$output1
-      #for $i in $inputs
-        ${i.input}
-        ${i.sum}
-      #end for
-  </command>
-  <inputs>
-    <param format="interval" name="input1" type="data" label="Select first file to add" />
-    <param name="sum1" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include first file's totals in overall total" />
-    <param format="interval" name="input2" type="data" label="Select second file to add" />
-    <param name="sum2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include second file's totals in overall total" />
-    <repeat name="inputs" title="Input Files">
-      <param name="input" label="Add file" type="data" format="interval" />
-      <param name="sum" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include file's totals in overall total" />
-    </repeat>
-  </inputs>
-  <outputs>
-    <data format="interval" name="output1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="indel_table_in1.interval" ftype="interval" />
-      <param name="sum1" value="true"/>
-      <param name="input2" value="indel_table_in2.interval" ftype="interval" />
-      <param name="sum2" value="true" />
-      <param name="input" value="indel_table_in3.interval" ftype="interval" />
-      <param name="sum" value="true" />
-      <output name="output1" file="indel_table_out1.interval" ftype="interval" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Creates a table allowing for analysis and comparison of indel data. Combines any number of interval files that have been produced by the tool that converts indel SAM data to interval format. Includes overall total counts for all or some files. The tool has the option to not include a given file's counts in the total column. This could be useful for combined data if the counts for certain indels might be included more than once.
-
-The exact columns of the output will depend on the columns of the input. Here is the detailed specification of the output columns::
-
-                          Column  Description
- -------------------------------  ----------------------------------------------------------------------------------
-  1 ... m                "Indel"  All the "indel" columns, which contain the info that will be checked for equality
-  m + 1        Total Occurrences  Total number of occurrences of this indel across all (included) files
-  m + 2   Occurrences for File 1  Number of occurrences of this indel for first file
-  m + 3   Occurrences for File 2  Number of occurrences of this indel for second file
-  [m + ...]                [...]  [Number of occurrences of this indel for ... file]
-
-The most likely columns would be from the output of the Convert SAM to Interval/BED tool, so: Chromosome, Start position, End position, I/D (Insertion/Deletion), -/&lt;base(s)&gt; (Deletion/Inserted base(s)), Total Occurrences (across files), Occurrences for File 1, Occurrences for File 2, etc. See below for an example.
-
-
------
-
-**Example**
-
-Suppose you have the following 4 files::
-
- chrM    300    301   D   -    6
- chrM    303    304   D   -   19
- chrM    359    360   D   -    1
- chrM    410    411   D   -    1
- chrM    435    436   D   -    1
-
- chrM    410    411   D   -    1
- chrM    714    715   D   -    1
- chrM    995    997   D   -    1
- chrM   1168   1169   I   A    1
- chrM   1296   1297   D   -    1
-
- chrM    300    301   D   -    8
- chrM    525    526   D   -    1
- chrM    958    959   D   -    1
- chrM    995    996   D   -    3
- chrM   1168   1169   I   C    1
- chrM   1296   1297   D   -    1
-
- chrM    303    304   D   -   22
- chrM    410    411   D   -    1
- chrM    435    436   D   -    1
- chrM    714    715   D   -    1
- chrM    753    754   I   A    1
- chrM   1168   1169   I   A    1
-
-and the fifth file::
-
- chrM    303    304   D   -   22
- chrM    410    411   D   -    2
- chrM    435    436   D   -    1
- chrM    714    715   D   -    2
- chrM    753    754   I   A    1
- chrM    995    997   D   -    1
- chrM   1168   1169   I   A    2
- chrM   1296   1297   D   -    1
-
-The following will be produced if you include the first four files in the sum, but not the fifth::
-
- chrM    300    301   D   -   14    6   0   8    0    0
- chrM    303    304   D   -   41   19   0   0   22   22
- chrM    359    360   D   -    1    1   0   0    0    0
- chrM    410    411   D   -    3    1   1   0    1    2
- chrM    435    436   D   -    2    1   0   0    1    2
- chrM    525    526   D   -    1    0   0   1    0    0
- chrM    714    715   D   -    2    0   1   0    1    2
- chrM    753    754   I   A    1    0   0   0    1    1
- chrM    958    959   D   -    1    0   0   1    0    0
- chrM    995    996   D   -    3    0   0   3    0    0
- chrM    995    997   D   -    1    0   1   0    0    1
- chrM   1168   1169   I   A    2    0   1   0    1    2
- chrM   1168   1169   I   C    1    0   0   1    0    0
- chrM   1296   1297   D   -    2    0   1   1    0    1
-
-The first numeric column includes the total or the next four columns, but not the fifth.
-
-
-  </help>
-</tool>
--- a/tools/indels/sam_indel_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Allows user to filter out non-indels from SAM.
-
-usage: %prog [options]
-   -i, --input=i: Input SAM file to be filtered
-   -q, --quality_threshold=q: Minimum quality value for adjacent bases
-   -a, --adjacent_bases=a: Number of adjacent bases on each size to check qualities
-   -o, --output=o: Filtered output SAM file
-"""
-
-import re, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    # prep output file
-    output = open( options.output, 'wb' )
-    # patterns
-    pat = re.compile( '^(?P<lmatch>\d+)M(?P<ins_del_width>\d+)(?P<ins_del>[ID])(?P<rmatch>\d+)M$' )
-    pat_multi = re.compile( '(\d+[MIDNSHP])(\d+[MIDNSHP])(\d+[MIDNSHP])+' )
-    try:
-        qual_thresh = int( options.quality_threshold )
-        if qual_thresh < 0 or qual_thresh > 93:
-            raise ValueError
-    except ValueError:
-        stop_err( 'Your quality threshold should be an integer between 0 and 93, inclusive.' )
-    try:
-        adj_bases = int( options.adjacent_bases )
-        if adj_bases < 1:
-            raise ValueError
-    except ValueError:
-        stop_err( 'The number of adjacent bases should be an integer greater than 1.' )
-    # record lines skipped because of more than one indel
-    multi_indel_lines = 0
-    # go through all lines in input file
-    for i,line in enumerate(open( options.input, 'rb' )):
-        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
-            split_line = line.split( '\t' )
-            cigar = split_line[5].strip()
-            # find matches like 3M2D7M or 7M3I10M
-            match = {}
-            m = pat.match( cigar )
-            # if unprocessable CIGAR
-            if not m:
-                m = pat_multi.match( cigar )
-                # skip this line if no match
-                if not m:
-                    continue
-                # account for multiple indels or operations we don't process
-                else:
-                    multi_indel_lines += 1
-            # otherwise get matching parts
-            else:
-                match = m.groupdict()
-            # process for indels
-            if match:
-                left = int( match[ 'lmatch' ] )
-                right = int( match[ 'rmatch' ] )
-                if match[ 'ins_del' ] == 'I':
-                    middle = int( match[ 'ins_del_width' ] )
-                else:
-                    middle = 0
-                # if there are enough adjacent bases to check, then do so
-                if left >= adj_bases and right >= adj_bases:
-                    quals = split_line[10]
-                    eligible_quals = quals[ left - adj_bases : left + middle + adj_bases ]
-                    qual_thresh_met = True
-                    for q in eligible_quals:
-                        if ord( q ) - 33 < qual_thresh:
-                            qual_thresh_met = False
-                            break
-                    # if filter reqs met, output line
-                    if qual_thresh_met:
-                        output.write( line )
-    # close out file
-    output.close()
-    # if skipped lines because of more than one indel, output message
-    if multi_indel_lines > 0:
-        sys.stdout.write( '%s alignments were skipped because they contained more than one indel.' % multi_indel_lines )
-
-if __name__=="__main__": __main__()
--- a/tools/indels/sam_indel_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-<tool id="sam_indel_filter" name="Filter Indels" version="1.0.0">
-  <description>for SAM</description>
-  <command interpreter="python">
-    sam_indel_filter.py
-      --input=$input1
-      --quality_threshold=$quality_threshold
-      --adjacent_bases=$adjacent_bases
-      --output=$out_file1
-  </command>
-  <inputs>
-    <param format="sam" name="input1" type="data" label="Select dataset to filter" />
-    <param name="quality_threshold" type="integer" value="40" label="Quality threshold for adjacent bases" help="Takes Phred value assuming Sanger scale; usually between 0 and 40, but up to 93" />
-    <param name="adjacent_bases" type="integer" value="1" label="The number of adjacent bases to match on either side of the indel" help="If one side is shorter than this width, the read will be excluded" />
-  </inputs>
-  <outputs>
-    <data format="sam" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="sam_indel_filter_in1.sam" ftype="sam"/>
-      <param name="quality_threshold" value="14"/>
-      <param name="adjacent_bases" value="2"/>
-      <output name="out_file1" file="sam_indel_filter_out1.sam" ftype="sam"/>
-    </test>
-    <test>
-      <param name="input1" value="sam_indel_filter_in1.sam" ftype="sam"/>
-      <param name="quality_threshold" value="29"/>
-      <param name="adjacent_bases" value="5"/>
-      <output name="out_file1" file="sam_indel_filter_out2.sam" ftype="sam"/>
-    </test>
-    <test>
-      <param name="input1" value="sam_indel_filter_in2.sam" ftype="sam"/>
-      <param name="quality_threshold" value="7"/>
-      <param name="adjacent_bases" value="1"/>
-      <output name="out_file1" file="sam_indel_filter_out3.sam" ftype="sam"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Allows extracting indels from SAM produced by BWA. Currently it can handle SAM with alignments that have only one insertion or one deletion, and will skip that alignment if it encounters one with more than one indel. It matches CIGAR strings (column 6 in the SAM file) like 5M3I5M or 4M2D10M, so there must be a match or mismatch of sufficient length on either side of the indel.
-
------
-
-**Example**
-
-Suppose you have the following::
-
- r770    89  ref        116   37  17M1I5M          =   72131356   0   CACACTGTGACAGACAGCGCAGC   00/02!!0//1200210AA44/1  XT:A:U  CM:i:2  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
- r770   181  ref        116    0      24M          =   72131356   0  TTGGTGCGCGCGGTTGAGGGTTGG  $$(#%%#$%#%####$%%##$###
- r1945  177  ref   41710908    0      23M  190342418  181247988   0   AGAGAGAGAGAGAGAGAGAGAGA   SQQWZYURVYWX]]YXTSY]]ZM  XT:A:R  CM:i:0  SM:i:0   AM:i:0  X0:i:163148            XM:i:0  XO:i:0  XG:i:0  MD:Z:23
- r3671  117  ref  190342418    0      24M          =  190342418   0  CTGGCGTTCTCGGCGTGGATGGGT  #####$$##$#%#%%###%$#$##
- r3671  153  ref  190342418   37  16M1I6M          =  190342418   0   TCTAACTTAGCCTCATAATAGCT   /&lt;&lt;!"0///////00/!!0121/  XT:A:U  CM:i:2  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
- r3824  117  ref   80324999    0      24M          =   80324999   0  TCCAGTCGCGTTGTTAGGTTCGGA  #$#$$$#####%##%%###**#+/
- r3824  153  ref   80324999   37  8M1I14M          =   80324999   0   TTTAGCCCGAAATGCCTAGAGCA   4;6//11!"11100110////00  XT:A:U  CM:i:2  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
- r4795   81  ref   26739130    0      23M   57401793   57401793   0   TGGCATTCCTGTAGGCAGAGAGG   AZWWZS]!"QNXZ]VQ]]]/2]]  XT:A:R  CM:i:2  SM:i:0   AM:i:0  X0:i:3    X1:i:0  XM:i:2  XO:i:0  XG:i:0  MD:Z:23
- r4795  161  ref   57401793   37      23M   26739130   26739130   0   GATCACCCAGGTGATGTAACTCC   ]WV]]]]WW]]]]]]]]]]PU]]  XT:A:U  CM:i:0  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:23
- r4800   16  ref        241  255  15M1D8M          =          0   0   CGTGGCCGGCGGGCCGAAGGCAT   IIIIIIIIIICCCCIII?IIIII  XT:A:U  CM:i:2  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
- r5377  170  ref   59090793   37      23M   26739130   26739130   0   TATCAATAAGGTGATGTAACTCG   ]WV]ABAWW]]]]]P]P//GU]]  XT:A:U  CM:i:0  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:23
- r5612  151  ref  190342418   37  19M1I3M          =  190342418   0   TCTAACTTAGCCTCATAATAGCT   /&lt;&lt;!"0/4//7//00/BC0121/  XT:A:U  CM:i:2  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
-
-
-To select only alignments with indels, you need to determine the minimum quality you want the adjacent bases to have, as well as the number of adjacent bases to check. If you set the quality threshold to 47 and the number of bases to check to 2, you will get the following output::
-
- r770    89  ref        116   37  17M1I5M          =   72131356   0   CACACTGTGACAGACAGCGCAGC   00/02!!0//1200210AA44/1  XT:A:U  CM:i:2  SM:i:37  AM:i:0       X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
- r4800   16  ref        241  255  15M1D8M          =          0   0   CGTGGCCGGCGGGCCGAAGGCAT   IIIIIIIIIICCCCIII?IIIII  XT:A:U  CM:i:2  SM:i:37  AM:i:0  X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
- r5612  151  ref  190342418   37  19M1I3M          =  190342418   0   TCTAACTTAGCCTCATAATAGCT   /&lt;&lt;!"0/4//7//00/BC0121/  XT:A:U  CM:i:2  SM:i:37  AM:i:0       X0:i:1    X1:i:0  XM:i:1  XO:i:1  XG:i:1  MD:Z:22
-
-
-For more information on SAM, please consult the `SAM format description`__.
-
-.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
-
-
-  </help>
-</tool>
--- a/tools/maf/genebed_maf_to_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-<tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks" version="1.0.1">
-  <description>given a set of coding exon intervals</description>
-  <command interpreter="python">
-    #if $maf_source_type.maf_source == "user" #interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
-    #else                                     #interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source  --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
-    #end if# --overwrite_with_gaps=$overwrite_with_gaps
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="bed" label="Gene BED File">
-      <validator type="unspecified_build" />
-      <validator type="expression" message="Input must be in BED12 format.">value.metadata.columns &gt;= 12</validator> <!-- allow 12+ columns, not as strict as possible. TODO: only list bed files with 12+ columns -->
-    </param>
-    <conditional name="maf_source_type">
-      <param name="maf_source" type="select" label="MAF Source">
-        <option value="cached" selected="true">Locally Cached Alignments</option>
-        <option value="user">Alignments in Your History</option>
-      </param>
-      <when value="user">
-        <param name="maf_file" type="data" format="maf" label="MAF File">
-          <validator type="dataset_ok_validator" />
-          <options>
-            <filter type="data_meta" ref="input1" key="dbkey" />
-          </options>
-        </param>
-        <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-          <options>
-            <filter type="data_meta" ref="maf_file" key="species" />
-          </options>
-        </param>
-      </when>
-      <when value="cached">
-        <param name="maf_identifier" type="select" label="MAF Type" >
-          <options from_file="maf_index.loc">
-            <column name="name" index="0"/>
-            <column name="value" index="1"/>
-            <column name="dbkey" index="2"/>
-            <column name="species" index="3"/>
-            <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
-            <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
-          </options>
-        </param>
-        <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-          <options from_file="maf_index.loc">
-            <column name="uid" index="1"/>
-            <column name="value" index="3"/>
-            <column name="name" index="3"/>
-            <filter type="param_value" ref="maf_identifier" name="uid" column="1"/>
-            <filter type="multiple_splitter" column="3" separator=","/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-    <param name="overwrite_with_gaps" type="select" label="Split into Gapless MAF blocks" help="When set to Yes, blocks are divided around gaps appearing in any species. This will prevent gaps occurring in the interior of the sequence for an aligning species from overwriting a nucleotide found for the same position in a lower-scoring block.">
-      <option value="True" selected="true">No</option>
-      <option value="False">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="8.bed"/>
-      <param name="maf_source" value="cached"/>in aligning species
-      <param name="maf_identifier" value="8_WAY_MULTIZ_hg17"/>
-      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
-      <param name="overwrite_with_gaps" value="True"/>
-      <output name="out_file1" file="gene_bed_maf_to_fasta_out.fasta" />
-    </test>
-    <test>
-      <param name="input1" value="8.bed"/>
-      <param name="maf_source" value="user"/>
-      <param name="maf_file" value="4.maf"/>
-      <param name="species" value="hg17,panTro1"/>
-      <param name="overwrite_with_gaps" value="True"/>
-      <output name="out_file1" file="gene_bed_maf_to_fasta_user_out.fasta" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-The coding sequence of genes are usually composed of several coding exons. Each of these coding exons is an individual genomic region, which when concatenated with each other constitutes the coding sequence. A single genomic region can be covered by multiple alignment blocks. In many cases it is desirable to stitch these alignment blocks together. This tool accepts a list of gene-based intervals, in the Gene BED format. For every interval it performs the following:
-
-  * finds all MAF blocks that overlap the coding regions;
-  * sorts MAF blocks by alignment score;
-  * stitches blocks together and resolves overlaps based on alignment score;
-  * outputs alignments in FASTA format.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/interval2maf.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Reads a list of intervals and a maf. Produces a new maf containing the
-blocks or parts of blocks in the original that overlapped the intervals.
-
-If a MAF file, not UID, is provided the MAF file is indexed before being processed.
-
-NOTE: If two intervals overlap the same block it will be written twice.
-
-usage: %prog maf_file [options]
-   -d, --dbkey=d: Database key, ie hg17
-   -c, --chromCol=c: Column of Chr
-   -s, --startCol=s: Column of Start
-   -e, --endCol=e: Column of End
-   -S, --strandCol=S: Column of Strand
-   -t, --mafType=t: Type of MAF source to use
-   -m, --mafFile=m: Path of source MAF file, if not using cached version
-   -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version
-   -i, --interval_file=i:       Input interval file
-   -o, --output_file=o:      Output MAF file
-   -p, --species=p: Species to include in output
-   -P, --split_blocks_by_species=P: Split blocks by species
-   -r, --remove_all_gap_columns=r: Remove all Gap columns
-   -l, --indexLocation=l: Override default maf_index.loc file
-   -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc )
-"""
-
-#Dan Blankenberg
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-import bx.align.maf
-import bx.intervals.io
-from galaxy.tools.util import maf_utilities
-import sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    index = index_filename = None
-    mincols = 0
-
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-
-    if options.dbkey: dbkey = options.dbkey
-    else: dbkey = None
-    if dbkey in [None, "?"]:
-        maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." )
-
-    species = maf_utilities.parse_species_option( options.species )
-
-    if options.chromCol: chromCol = int( options.chromCol ) - 1
-    else:
-        maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." )
-
-    if options.startCol: startCol = int( options.startCol ) - 1
-    else:
-        maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." )
-
-    if options.endCol: endCol = int( options.endCol ) - 1
-    else:
-        maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." )
-
-    if options.strandCol: strandCol = int( options.strandCol ) - 1
-    else:
-        strandCol = -1
-
-    if options.interval_file: interval_file = options.interval_file
-    else:
-        maf_utilities.tool_fail( "Input interval file has not been specified." )
-
-    if options.output_file: output_file = options.output_file
-    else:
-        maf_utilities.tool_fail( "Output file has not been specified." )
-
-    split_blocks_by_species = remove_all_gap_columns = False
-    if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species':
-        split_blocks_by_species = True
-        if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns':
-            remove_all_gap_columns = True
-    else:
-        remove_all_gap_columns = True
-    #Finish parsing command line
-
-    #Open indexed access to MAFs
-    if options.mafType:
-        if options.indexLocation:
-            index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation )
-        else:
-            index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile )
-        if index is None:
-            maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) )
-    elif options.mafFile:
-        index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] )
-        if index is None:
-            maf_utilities.tool_fail( "Your MAF file appears to be malformed." )
-    else:
-        maf_utilities.tool_fail( "Desired source MAF type has not been specified." )
-
-    #Create MAF writter
-    out = bx.align.maf.Writer( open(output_file, "w") )
-
-    #Iterate over input regions
-    num_blocks = 0
-    num_regions = None
-    for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ):
-        src = maf_utilities.src_merge( dbkey, region.chrom )
-        for block in index.get_as_iterator( src, region.start, region.end ):
-            if split_blocks_by_species:
-                blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species( block ) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start( dbkey ), region ) ]
-            else:
-                blocks = [ block ]
-            for block in blocks:
-                block = maf_utilities.chop_block_by_region( block, src, region )
-                if block is not None:
-                    if species is not None:
-                        block = block.limit_to_species( species )
-                    block = maf_utilities.orient_block_by_region( block, src, region )
-                    if remove_all_gap_columns:
-                        block.remove_all_gap_columns()
-                    out.write( block )
-                    num_blocks += 1
-
-    #Close output MAF
-    out.close()
-
-    #remove index file if created during run
-    maf_utilities.remove_temp_index_file( index_filename )
-
-    if num_blocks:
-        print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) )
-    elif num_regions is not None:
-        print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 )
-    else:
-        print "No valid regions have been provided."
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/interval2maf.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,294 +0,0 @@
-<tool id="Interval2Maf1" name="Extract MAF blocks" version="1.0.1">
-  <description>given a set of genomic intervals</description>
-  <command interpreter="python">
-    #if $maf_source_type.maf_source == "user" #interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafFile=$maf_source_type.mafFile --mafIndex=$maf_source_type.mafFile.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc --species=$maf_source_type.species
-    #else                                     #interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafType=$maf_source_type.mafType --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc --species=$maf_source_type.species
-    #end if# --split_blocks_by_species=$split_blocks_by_species_selector.split_blocks_by_species
-    #if $split_blocks_by_species_selector.split_blocks_by_species == "split_blocks_by_species"#
-        --remove_all_gap_columns=$split_blocks_by_species_selector.remove_all_gap_columns
-    #end if
-  </command>
-  <inputs>
-    <param format="interval" name="input1" type="data" label="Choose intervals">
-      <validator type="unspecified_build" />
-    </param>
-    <conditional name="maf_source_type">
-      <param name="maf_source" type="select" label="MAF Source">
-        <option value="cached" selected="true">Locally Cached Alignments</option>
-        <option value="user">Alignments in Your History</option>
-      </param>
-      <when value="user">
-        <param format="maf" name="mafFile" label="Choose alignments" type="data">
-          <options>
-            <filter type="data_meta" ref="input1" key="dbkey" />
-          </options>
-          <validator type="dataset_ok_validator" />
-        </param>
-        <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-          <options>
-            <filter type="data_meta" ref="mafFile" key="species" />
-          </options>
-        </param>
-      </when>
-      <when value="cached">
-        <param name="mafType" type="select" label="Choose alignments">
-          <options from_data_table="indexed_maf_files">
-            <!--
-            <column name="name" index="0"/>
-            <column name="value" index="1"/>
-            <column name="dbkey" index="2"/>
-            <column name="species" index="3"/>
-            -->
-            <filter type="data_meta" ref="input1" key="dbkey" column="dbkey" multiple="True" separator=","/>
-            <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
-          </options>
-        </param>
-        <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-          <options from_data_table="indexed_maf_files">
-            <column name="uid" index="1"/>
-            <column name="value" index="3"/>
-            <column name="name" index="3"/>
-            <filter type="param_value" ref="mafType" column="uid"/>
-            <filter type="multiple_splitter" column="name" separator=","/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-    <conditional name="split_blocks_by_species_selector">
-      <param name="split_blocks_by_species" type="select" label="Split blocks by species" help="Not usually applicable. See help below for more information.">
-        <option value="split_blocks_by_species">Split by species</option>
-        <option value="dont_split_blocks_by_species" selected="true">Do not split</option>
-      </param>
-      <when value="dont_split_blocks_by_species">
-        <!-- do nothing here -->
-      </when>
-      <when value="split_blocks_by_species">
-        <param name="remove_all_gap_columns" type="select" label="Collapse empty alignment columns">
-          <option value="remove_all_gap_columns" selected="true">Collapse empty columns</option>
-          <option value="do_not_remove_all_gap_columns">Do not collapse</option>
-        </param>
-      </when>
-    </conditional>
-   </inputs>
-   <outputs>
-     <data format="maf" name="out_file1"/>
-   </outputs>
-   <tests>
-     <test>
-       <param name="input1" value="1.bed"/>
-       <param name="maf_source" value="cached"/>
-       <param name="mafType" value="ENCODE_TBA_hg17"/>
-       <param name="species" value="hg17,panTro1,baboon,marmoset,galago,rn3,mm6,rabbit,cow,canFam1,rfbat,shrew,armadillo,tenrec,monDom1,tetNig1,fr1,rheMac1,galGal2,xenTro1,danRer2,elephant,platypus,hedgehog,colobus_monkey,dusky_titi,owl_monkey,mouse_lemur"/>
-       <param name="split_blocks_by_species" value="dont_split_blocks_by_species"/>
-       <output name="out_file1" file="fsa_interval2maf.dat" />
-     </test>
-     <test>
-       <param name="input1" value="1.bed"/>
-       <param name="maf_source" value="user"/>
-       <param name="mafFile" value="fsa_interval2maf.dat"/>
-       <param name="species" value="hg17,panTro1,baboon,marmoset,galago,rn3,mm6,rabbit,cow,canFam1,rfbat,shrew,armadillo,tenrec,monDom1,tetNig1,fr1,rheMac1,galGal2,xenTro1,danRer2,elephant,platypus,hedgehog,colobus_monkey,dusky_titi,owl_monkey,mouse_lemur"/>
-       <param name="split_blocks_by_species" value="dont_split_blocks_by_species"/>
-       <output name="out_file1" file="fsa_interval2maf.dat" />
-     </test>
-     <test>
-       <param name="input1" value="1.bed" dbkey="hg18" ftype="bed"/>
-       <param name="maf_source" value="cached"/>
-       <param name="mafType" value="28_WAY_MULTIZ_hg18"/>
-       <param name="species" value="hg18,panTro2,mm8"/>
-       <param name="split_blocks_by_species" value="dont_split_blocks_by_species"/>
-       <output name="out_file1" file="interval2maf_3from28way.maf" />
-     </test>
-   </tests>
-   <help>
-**What it does**
-
-This tool takes genomic coordinates, superimposes them on multiple alignments (in MAF format) stored on the Galaxy site or from your history, and excises alignment blocks corresponding to each set of coordinates. Alignment blocks that extend past START and/or END positions of an interval are trimmed. Note that a single genomic interval may correspond to two or more alignment blocks.
-
------
-
-**Example**
-
-Here a single interval is superimposed on three MAF blocks. Blocks 1 and 3 are trimmed because they extend beyond boundaries of the interval:
-
-.. image:: ./static/images/maf_icons/interval2maf.png
-
--------
-
-**Split blocks by species**
-
-This option examines each MAF block for multiple occurrences of a species in a single block. When this occurs, a block is split into multiple blocks where every combination of one sequence per species per block is represented.
-
-The interface for this option has two inputs:
-
- * **MAF file to split**. Choose multiple alignments from history to be split by species.
- * **Collapse empty alignment columns**. Should alignment columns containing only gaps in the new blocks be removed.
-
-
-
-**Example 1**: **Collapse empty alignment columns is Yes**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-the tool will create **a single** history item containing 12 alignment blocks (notice that no columns contain only gaps)::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT-GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC--GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC-GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGCAG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC---AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC---AG
-
-
-
-**Example 2**: **Collapse empty alignment columns is No**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-the tool will create **a single** history item containing 12 alignment blocks (notice that some columns contain only gaps)::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/interval2maf_pairwise.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-<tool id="Interval2Maf_pairwise1" name="Extract Pairwise MAF blocks" version="1.0.1">
-  <description>given a set of genomic intervals</description>
-  <command interpreter="python">interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafType=$mafType --interval_file=$input1 --output_file=$out_file1 --indexLocation=${GALAXY_DATA_INDEX_DIR}/maf_pairwise.loc</command>
-  <inputs>
-    <param name="input1" type="data" format="interval" label="Interval File">
-      <validator type="unspecified_build" />
-    </param>
-    <param name="mafType" type="select" label="Choose MAF source">
-      <options from_file="maf_pairwise.loc">
-        <column name="name" index="0"/>
-        <column name="value" index="1"/>
-        <column name="dbkey" index="2"/>
-        <column name="species" index="3"/>
-        <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
-        <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
-      </options>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="8.bed" dbkey="hg17" format="bed"/>
-      <param name="mafType" value="PAIRWISE_hg17_fr1"/>
-      <output name="out_file1" file="Interval2Maf_pairwise_out.maf"/>
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool takes genomic coordinates, superimposes them on pairwise alignments (in MAF format) stored on the Galaxy site, and excises alignment blocks corresponding to each set of coordinates. Alignment blocks that extend past START and/or END positions of an interval are trimmed. Note that a single genomic interval may correspond to two or more alignment blocks.
-
------
-
-**Example**
-
-Here a single interval is superimposed on three MAF blocks. Blocks 1 and 3 are trimmed because they extend beyond boundaries of the interval:
-
-.. image:: ./static/images/maf_icons/interval2maf.png
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/interval_maf_to_merged_fasta.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,196 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Reads an interval or gene BED and a MAF Source.
-Produces a FASTA file containing the aligned intervals/gene sequences, based upon the provided coordinates
-
-Alignment blocks are layered ontop of each other based upon score.
-
-usage: %prog maf_file [options]
-   -d, --dbkey=d: Database key, ie hg17
-   -c, --chromCol=c: Column of Chr
-   -s, --startCol=s: Column of Start
-   -e, --endCol=e: Column of End
-   -S, --strandCol=S: Column of Strand
-   -G, --geneBED: Input is a Gene BED file, process and join exons as one region
-   -t, --mafSourceType=t: Type of MAF source to use
-   -m, --mafSource=m: Path of source MAF file, if not using cached version
-   -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version
-   -i, --interval_file=i:       Input interval file
-   -o, --output_file=o:      Output MAF file
-   -p, --species=p: Species to include in output
-   -O, --overwrite_with_gaps=O: Overwrite bases found in a lower-scoring block with gaps interior to the sequence for a species.
-   -z, --mafIndexFileDir=z: Directory of local maf_index.loc file
-
-usage: %prog dbkey_of_BED comma_separated_list_of_additional_dbkeys_to_extract comma_separated_list_of_indexed_maf_files input_gene_bed_file output_fasta_file cached|user GALAXY_DATA_INDEX_DIR
-"""
-
-#Dan Blankenberg
-from galaxy import eggs
-from galaxy.tools.util import maf_utilities
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-import bx.intervals.io
-import sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    mincols = 0
-    strand_col = -1
-
-    if options.dbkey:
-        primary_species = options.dbkey
-    else:
-        primary_species = None
-    if primary_species in [None, "?", "None"]:
-        stop_err( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." )
-
-    include_primary = True
-    secondary_species = maf_utilities.parse_species_option( options.species )
-    if secondary_species:
-        species = list( secondary_species ) # make copy of species list
-        if primary_species in secondary_species:
-            secondary_species.remove( primary_species )
-        else:
-            include_primary = False
-    else:
-        species = None
-
-    if options.interval_file:
-        interval_file = options.interval_file
-    else:
-        stop_err( "Input interval file has not been specified." )
-
-    if options.output_file:
-        output_file = options.output_file
-    else:
-        stop_err( "Output file has not been specified." )
-
-    if not options.geneBED:
-        if options.chromCol:
-            chr_col = int( options.chromCol ) - 1
-        else:
-            stop_err( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." )
-
-        if options.startCol:
-            start_col = int( options.startCol ) - 1
-        else:
-            stop_err( "Start column not set, click the pencil icon in the history item to set the metadata attributes." )
-
-        if options.endCol:
-            end_col = int( options.endCol ) - 1
-        else:
-            stop_err( "End column not set, click the pencil icon in the history item to set the metadata attributes." )
-
-        if options.strandCol:
-            strand_col = int( options.strandCol ) - 1
-
-    mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir
-
-    overwrite_with_gaps = True
-    if options.overwrite_with_gaps and options.overwrite_with_gaps.lower() == 'false':
-        overwrite_with_gaps = False
-
-    #Finish parsing command line
-
-    #get index for mafs based on type
-    index = index_filename = None
-    #using specified uid for locally cached
-    if options.mafSourceType.lower() in ["cached"]:
-        index = maf_utilities.maf_index_by_uid( options.mafSource, mafIndexFile )
-        if index is None:
-            stop_err( "The MAF source specified (%s) appears to be invalid." % ( options.mafSource ) )
-    elif options.mafSourceType.lower() in ["user"]:
-        #index maf for use here, need to remove index_file when finished
-        index, index_filename = maf_utilities.open_or_build_maf_index( options.mafSource, options.mafIndex, species = [primary_species] )
-        if index is None:
-            stop_err( "Your MAF file appears to be malformed." )
-    else:
-        stop_err( "Invalid MAF source type specified." )
-
-    #open output file
-    output = open( output_file, "w" )
-
-    if options.geneBED:
-        region_enumerator = maf_utilities.line_enumerator( open( interval_file, "r" ).readlines() )
-    else:
-        region_enumerator = enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chr_col, start_col = start_col, end_col = end_col, strand_col = strand_col, fix_strand = True, return_header = False, return_comments = False ) )
-
-    #Step through intervals
-    regions_extracted = 0
-    line_count = 0
-    for line_count, line in region_enumerator:
-        try:
-            if options.geneBED: #Process as Gene BED
-                try:
-                    starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed( line )
-                    #create spliced alignment object
-                    alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand = '+', species = species, mincols = mincols, overwrite_with_gaps = overwrite_with_gaps )
-                    primary_name = secondary_name = fields[3]
-                    alignment_strand = fields[5]
-                except Exception, e:
-                    print "Error loading exon positions from input line %i: %s" % ( line_count, e )
-                    continue
-            else: #Process as standard intervals
-                try:
-                    #create spliced alignment object
-                    alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand = '+', species = species, mincols = mincols, overwrite_with_gaps = overwrite_with_gaps )
-                    primary_name = "%s(%s):%s-%s" % ( line.chrom, line.strand, line.start, line.end )
-                    secondary_name = ""
-                    alignment_strand = line.strand
-                except Exception, e:
-                    print "Error loading region positions from input line %i: %s" % ( line_count, e )
-                    continue
-
-            #Write alignment to output file
-            #Output primary species first, if requested
-            if include_primary:
-                output.write( ">%s.%s\n" %( primary_species, primary_name ) )
-                if alignment_strand == "-":
-                    output.write( alignment.get_sequence_reverse_complement( primary_species ) )
-                else:
-                    output.write( alignment.get_sequence( primary_species ) )
-                output.write( "\n" )
-            #Output all remainging species
-            for spec in secondary_species or alignment.get_species_names( skip = primary_species ):
-                if secondary_name:
-                    output.write( ">%s.%s\n" % ( spec, secondary_name ) )
-                else:
-                    output.write( ">%s\n" % ( spec ) )
-                if alignment_strand == "-":
-                    output.write( alignment.get_sequence_reverse_complement( spec ) )
-                else:
-                    output.write( alignment.get_sequence( spec ) )
-                output.write( "\n" )
-
-            output.write( "\n" )
-
-            regions_extracted += 1
-
-        except Exception, e:
-            print "Unexpected error from input line %i: %s" % ( line_count, e )
-            continue
-
-    #close output file
-    output.close()
-
-    #remove index file if created during run
-    maf_utilities.remove_temp_index_file( index_filename )
-
-    #Print message about success for user
-    if regions_extracted > 0:
-        print "%i regions were processed successfully." % ( regions_extracted )
-    else:
-        print "No regions were processed successfully."
-        if line_count > 0 and options.geneBED:
-            print "This tool requires your input file to conform to the 12 column BED standard."
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/interval_maf_to_merged_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,114 +0,0 @@
-<tool id="Interval_Maf_Merged_Fasta2" name="Stitch MAF blocks" version="1.0.1">
-  <description>given a set of genomic intervals</description>
-  <command interpreter="python">
-    #if $maf_source_type.maf_source == "user" #interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
-    #else                                     #interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
-    #end if# --overwrite_with_gaps=$overwrite_with_gaps
-  </command>
-  <inputs>
-    <page>
-        <param format="interval" name="input1" type="data" label="Choose intervals">
-          <validator type="unspecified_build" />
-        </param>
-        <conditional name="maf_source_type">
-            <param name="maf_source" type="select" label="MAF Source">
-              <option value="cached" selected="true">Locally Cached Alignments</option>
-              <option value="user">Alignments in Your History</option>
-            </param>
-            <when value="user">
-              <param name="maf_file" type="data" format="maf" label="MAF File">
-                <options>
-                  <filter type="data_meta" ref="input1" key="dbkey" />
-                </options>
-                <validator type="dataset_ok_validator" />
-              </param>
-              <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-                <options>
-                  <filter type="data_meta" ref="maf_file" key="species" />
-                </options>
-              </param>
-            </when>
-            <when value="cached">
-              <param name="maf_identifier" type="select" label="MAF Type" >
-                <options from_file="maf_index.loc">
-                  <column name="name" index="0"/>
-                  <column name="value" index="1"/>
-                  <column name="dbkey" index="2"/>
-                  <column name="species" index="3"/>
-                  <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
-                  <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
-                </options>
-              </param>
-              <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-                <options from_file="maf_index.loc">
-                  <column name="uid" index="1"/>
-                  <column name="value" index="3"/>
-                  <column name="name" index="3"/>
-                  <filter type="param_value" ref="maf_identifier" name="uid" column="1"/>
-                  <filter type="multiple_splitter" column="3" separator=","/>
-                </options>
-              </param>
-            </when>
-        </conditional>
-        <param name="overwrite_with_gaps" type="select" label="Split into Gapless MAF blocks" help="When set to Yes, blocks are divided around gaps appearing in any species. This will prevent gaps occurring in the interior of the sequence for an aligning species from overwriting a nucleotide found for the same position in a lower-scoring block.">
-          <option value="True" selected="true">No</option>
-          <option value="False">Yes</option>
-        </param>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="13.bed" dbkey="hg18" ftype="bed"/>
-      <param name="maf_source" value="cached"/>
-      <param name="maf_identifier" value="17_WAY_MULTIZ_hg18"/>
-      <param name="species" value="hg18,mm8"/>
-      <param name="overwrite_with_gaps" value="True"/>
-      <output name="out_file1" file="interval_maf_to_merged_fasta_out3.fasta" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" dbkey="hg17" ftype="bed"/>
-      <param name="maf_source" value="cached"/>
-      <param name="maf_identifier" value="8_WAY_MULTIZ_hg17"/>
-      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
-      <param name="overwrite_with_gaps" value="True"/>
-      <output name="out_file1" file="interval_maf_to_merged_fasta_out.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" dbkey="hg17" ftype="bed"/>
-      <param name="maf_source" value="user"/>
-      <param name="maf_file" value="5.maf"/>
-      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
-      <param name="overwrite_with_gaps" value="True"/>
-      <output name="out_file1" file="interval_maf_to_merged_fasta_user_out.dat" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-A single genomic region can be covered by multiple alignment blocks. In many cases it is desirable to stitch these alignment blocks together. This tool accepts a list of genomic intervals. For every interval it performs the following:
-
-  * finds all MAF blocks that overlap the interval;
-  * sorts MAF blocks by alignment score;
-  * stitches blocks together and resolves overlaps based on alignment score;
-  * outputs alignments in FASTA format.
-
-------
-
-**Example**
-
-Here three MAF blocks overlapping a single interval are stitched together. Space between blocks 2 and 3 is filled with gaps:
-
-.. image:: ./static/images/maf_icons/stitchMaf.png
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/maf_by_block_number.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-"""
-Reads a list of block numbers and a maf. Produces a new maf containing the
-blocks specified by number.
-"""
-
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from galaxy.tools.util import maf_utilities
-import bx.align.maf
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    input_block_filename = sys.argv[1].strip()
-    input_maf_filename = sys.argv[2].strip()
-    output_filename1 = sys.argv[3].strip()
-    block_col = int( sys.argv[4].strip() ) - 1
-    if block_col < 0:
-        print >> sys.stderr, "Invalid column specified"
-        sys.exit(0)
-    species = maf_utilities.parse_species_option( sys.argv[5].strip() )
-
-    maf_writer = bx.align.maf.Writer( open( output_filename1, 'w' ) )
-    #we want to maintain order of block file and write blocks as many times as they are listed
-    failed_lines = []
-    for ctr, line in enumerate( open( input_block_filename, 'r' ) ):
-        try:
-            block_wanted = int( line.split( "\t" )[block_col].strip() )
-        except:
-            failed_lines.append( str( ctr ) )
-            continue
-        try:
-            for count, block in enumerate( bx.align.maf.Reader( open( input_maf_filename, 'r' ) ) ):
-                if count == block_wanted:
-                    if species:
-                        block = block.limit_to_species( species )
-                    maf_writer.write( block )
-                    break
-        except:
-            print >>sys.stderr, "Your MAF file appears to be malformed."
-            sys.exit()
-    if len( failed_lines ) > 0: print "Failed to extract from %i lines (%s)." % ( len( failed_lines ), ",".join( failed_lines ) )
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_by_block_number.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<tool id="maf_by_block_number1" name="Extract MAF by block number" version="1.0.1">
-  <description>given a set of block numbers and a MAF file</description>
-  <command interpreter="python">maf_by_block_number.py $input1 $input2 $out_file1 $block_col $species</command>
-  <inputs>
-    <param format="txt" name="input1" type="data" label="Block Numbers"/>
-    <param format="maf" name="input2" label="MAF File" type="data"/>
-    <param name="block_col" type="data_column" label="Column containing Block number" data_ref="input1" accept_default="True" />
-    <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-      <options>
-        <filter type="data_meta" ref="input2" key="species" />
-      </options>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="maf_by_block_numbers.dat"/>
-      <param name="input2" value="3.maf"/>
-      <param name="block_col" value="1"/>
-      <param name="species" value="hg17,panTro1,mm5,rn3,canFam1"/>
-      <output name="out_file1" file="maf_by_block_number_out.dat" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool takes a list of block numbers, one per line, and extracts the corresponding MAF blocks from the provided file. Block numbers start at 0.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/maf_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-#Dan Blankenberg
-#Filters a MAF file according to the provided code file, which is generated in maf_filter.xml <configfiles>
-#Also allows filtering by number of columns in a block, and limiting output species
-import sys, os, shutil
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.align.maf
-from galaxy.tools.util import maf_utilities
-
-def main():
-    #Read command line arguments
-    try:
-        script_file = sys.argv.pop( 1 )
-        maf_file = sys.argv.pop( 1 )
-        out_file = sys.argv.pop( 1 )
-        additional_files_path = sys.argv.pop( 1 )
-        species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) )
-        min_size = int( sys.argv.pop( 1 ) )
-        max_size = int( sys.argv.pop( 1 ) )
-        if max_size < 1: max_size = sys.maxint
-        min_species_per_block = int( sys.argv.pop( 1 ) )
-        exclude_incomplete_blocks = int( sys.argv.pop( 1 ) )
-        if species:
-            num_species = len( species )
-        else:
-            num_species = len( sys.argv.pop( 1 ).split( ',') )
-    except:
-        print >>sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep"
-        sys.exit()
-
-    #Open input and output MAF files
-    try:
-        maf_reader = bx.align.maf.Reader( open( maf_file,'r' ) )
-        maf_writer = bx.align.maf.Writer( open( out_file,'w' ) )
-    except:
-        print >>sys.stderr, "Your MAF file appears to be malformed."
-        sys.exit()
-
-    #Save script file for debuging/verification info later
-    os.mkdir( additional_files_path )
-    shutil.copy( script_file, os.path.join( additional_files_path, 'debug.txt' ) )
-
-    #Loop through blocks, running filter on each
-    #'maf_block' and 'ret_val' are used/shared in the provided code file
-    #'ret_val' should be set to True if the block is to be kept
-    i = 0
-    blocks_kept = 0
-    for i, maf_block in enumerate( maf_reader ):
-        if min_size <= maf_block.text_size <= max_size:
-            local = {'maf_block':maf_block, 'ret_val':False}
-            execfile( script_file, {}, local )
-            if local['ret_val']:
-                #Species limiting must be done after filters as filters could be run on non-requested output species
-                if species:
-                    maf_block = maf_block.limit_to_species( species )
-                if len( maf_block.components ) >= min_species_per_block and ( not exclude_incomplete_blocks or len( maf_block.components ) >= num_species ):
-                    maf_writer.write( maf_block )
-                    blocks_kept += 1
-    maf_writer.close()
-    maf_reader.close()
-    if i == 0: print "Your file contains no valid maf_blocks."
-    else: print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float( blocks_kept ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
-    main()
--- a/tools/maf/maf_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,202 +0,0 @@
-<tool id="MAF_filter" name="Filter MAF" version="1.0.1">
-  <description>by specified attributes</description>
-  <command interpreter="python">maf_filter.py $maf_filter_file $input1 $out_file1 $out_file1.files_path $species $min_size $max_size $min_species_per_block $exclude_incomplete_blocks ${input1.metadata.species}</command>
-  <inputs>
-    <page>
-      <param name="input1" type="data" format="maf" label="MAF File"/>
-      <param name="min_size" label="Minimum Size" value="0" type="integer"/>
-      <param name="max_size" label="Maximum Size" value="0" type="integer" help="A maximum size less than 1 indicates no limit"/>
-      <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-        <options>
-          <filter type="data_meta" ref="input1" key="species" />
-        </options>
-      </param>
-      <param name="min_species_per_block" type="select" label="Exclude blocks which have only one species" >
-        <option value="2">Yes</option>
-        <option value="1" selected="True">No</option>
-      </param>
-      <param name="exclude_incomplete_blocks" type="select" label="Exclude blocks which have missing species" >
-        <option value="1">Yes</option>
-        <option value="0" selected="True">No</option>
-      </param>
-      <repeat name="maf_filters" title="Filter">
-        <param name="species1" type="select" label="When Species" multiple="false">
-          <options>
-            <filter type="data_meta" ref="input1" key="species" />
-          </options>
-        </param>
-        <conditional name="species1_attributes">
-          <param name="species1_attribute_type" type="select" label="Species Attribute">
-            <option value="attribute_strand">Strand</option>
-            <option value="attribute_chr" selected="true">Chromosome</option>
-          </param>
-          <when value="attribute_strand">
-            <param name="species1_is_isnot" type="select" label="Conditional">
-              <option value="==">Is</option>
-              <option value="!=">Is Not</option>
-            </param>
-            <param name="species1_attribute" type="select" label="Strand">
-              <option value="+" selected="true">+</option>
-              <option value="-">-</option>
-            </param>
-            <repeat name="filter_condition" title="Filter Condition">
-              <param name="species2" type="select" label="Species" multiple="false">
-                <options>
-                  <filter type="data_meta" ref="input1" key="species" />
-                </options>
-              </param>
-              <conditional name="species2_attributes">
-                <param name="species2_attribute_type" type="select" label="Species Attribute">
-                  <option value="attribute_strand" selected="true">Strand</option>
-                  <option value="attribute_chr">Chromosome</option>
-                </param>
-                <when value="attribute_strand">
-                  <param name="species2_is_isnot" type="select" label="Conditional">
-                    <option value="==">Is</option>
-                    <option value="!=">Is Not</option>
-                  </param>
-                  <param name="species2_attribute" type="select" label="Strand">
-                    <option value="+" selected="true">+</option>
-                    <option value="-">-</option>
-                  </param>
-                </when>
-                <when value="attribute_chr">
-                  <param name="species2_is_isnot" type="select" label="Conditional">
-                    <option value="in">Is</option>
-                    <option value="not in">Is Not</option>
-                  </param>
-                  <param name="species2_attribute" type="text" label="Chromosome" value="chr1"/>
-                </when>
-              </conditional>
-            </repeat>
-          </when>
-          <when value="attribute_chr">
-            <param name="species1_is_isnot" type="select" label="Conditional">
-              <option value="in">Is</option>
-              <option value="not in">Is Not</option>
-            </param>
-            <param name="species1_attribute" type="text" label="Chromosome" value="chr1"/>
-            <repeat name="filter_condition" title="Filter Condition">
-              <param name="species2" type="select" label="Species" multiple="false">
-                <options>
-                  <filter type="data_meta" ref="input1" key="species" />
-                </options>
-              </param>
-              <conditional name="species2_attributes">
-                <param name="species2_attribute_type" type="select" label="Species Attribute">
-                  <option value="attribute_strand">Strand</option>
-                  <option value="attribute_chr" selected="true">Chromosome</option>
-                </param>
-                <when value="attribute_strand">
-                  <param name="species2_is_isnot" type="select" label="Conditional">
-                    <option value="==">Is</option>
-                    <option value="!=">Is Not</option>
-                  </param>
-                  <param name="species2_attribute" type="select" label="Strand">
-                    <option value="+" selected="true">+</option>
-                    <option value="-">-</option>
-                  </param>
-                </when>
-                <when value="attribute_chr">
-                  <param name="species2_is_isnot" type="select" label="Conditional">
-                    <option value="in">Is</option>
-                    <option value="not in">Is Not</option>
-                  </param>
-                  <param name="species2_attribute" type="text" label="Chromosome" value="chr1"/>
-                </when>
-              </conditional>
-            </repeat>
-          </when>
-        </conditional>
-      </repeat>
-    </page>
-  </inputs>
-  <configfiles>
-    <configfile name="maf_filter_file">
-#set $is_isnot_valid = {"==":"==", "!=":"!=", "in":"in", "not in":"not in"}
-def maf_block_pass_filter( maf_block ):
-#for $maf_filter in $maf_filters:
-#if $len( $maf_filter['species1_attributes']['filter_condition'] ) == 0:
-#continue
-#end if
-    primary_component = maf_block.get_component_by_src_start( """$maf_filter['species1'].value.encode( 'string_escape' )""".decode( 'string_escape' ) )
-    if primary_component is not None:
-#if $maf_filter['species1_attributes']['species1_attribute_type'] == 'attribute_chr':
-        if primary_component.src.split( "." )[-1] $is_isnot_valid.get( $maf_filter['species1_attributes']['species1_is_isnot'].value.strip(), 'is in' ) """$maf_filter['species1_attributes']['species1_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ).split( "," ):
-#else
-        if primary_component.strand $is_isnot_valid.get( $maf_filter['species1_attributes']['species1_is_isnot'].value.strip(), '==' ) """$maf_filter['species1_attributes']['species1_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ):
-#end if
-#for $filter_condition in $maf_filter['species1_attributes']['filter_condition']:
-            secondary_component = maf_block.get_component_by_src_start( """$filter_condition['species2'].value.encode( 'string_escape' )""".decode( 'string_escape' ) )
-#if $filter_condition['species2_attributes']['species2_attribute_type'] == 'attribute_chr':
-            if secondary_component is not None:
-                if not ( secondary_component.src.split( "." )[-1] $is_isnot_valid.get( $filter_condition['species2_attributes']['species2_is_isnot'].value.strip(), 'is in' ) """$filter_condition['species2_attributes']['species2_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ).split( "," ) ):
-                    return False
-#else:
-            if secondary_component is not None:
-                if not ( secondary_component.strand $is_isnot_valid.get( $filter_condition['species2_attributes']['species2_is_isnot'].value.strip(), '==' ) """$filter_condition['species2_attributes']['species2_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ) ):
-                    return False
-#end if
-#end for
-#end for
-    return True
-ret_val = maf_block_pass_filter( maf_block )
-</configfile>
-  </configfiles>
-  <outputs>
-    <data format="maf" name="out_file1" />
-  </outputs>
-<!--
-  <tests>
-    <test>
-      <param name="input1" value="4.maf"/>
-      <param name="species" value="bosTau2,canFam2,hg17,panTro1,rheMac2,rn3"/>
-      <param name="exclude_incomplete_blocks" value="0"/>
-      <param name="min_species_per_block" value="1"/>
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <param name="species1" value="hg17"/>
-      <param name="species2" value="hg17"/>
-      <param name="species1_attribute_type" value="attribute_chr"/>
-      <param name="species1_is_isnot" value="in"/>
-      <param name="species1_attribute" value="chr1"/>
-      <param name="filter_condition"/> Test will ERROR when this is set or when it is not set.
-
-      <output name="out_file1" file="cf_maf_limit_to_species.dat"/>
-    </test>
-  </tests>
--->
-<help>
-This tool allows you to build complex filters to be applied to each alignment block of a MAF file. You can define restraints on species based upon chromosome and strand. You can specify comma separated lists of chromosomes where appropriate.
-
-.. class:: infomark
-
-For example, this tool is useful to restrict a set of alignments to only those blocks which contain alignments between chromosomes that are considered homologous.
-
------
-
-.. class:: warningmark
-
-If a species is not found in a particular block, all filters on that species are ignored.
-
------
-
-This tool allows the user to remove any undesired species from a MAF file. If no species are specified then all species will be kept. If species are specified, columns which contain only gaps are removed. The options for this are:
-
- * **Exclude blocks which have missing species** - suppose you want to restrict an 8-way alignment to human, mouse, and rat.  The tool will first remove all other species. Next, if this option is set to **YES** the tool WILL NOT return MAF blocks, which do not include human, mouse, or rat. This means that all alignment blocks returned by the tool will have exactly three sequences in this example.
-
- * **Exclude blocks which have only one species** - if this option is set to **YES** all single sequence alignment blocks WILL NOT be returned.
-
------
-
-You can also provide a size range and limit your output to the MAF blocks which fall within the specified range.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-</help>
-</tool>
--- a/tools/maf/maf_limit_size.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-"""
-Removes blocks that fall outside of specified size range.
-"""
-
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.align.maf
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-
-    input_maf_filename = sys.argv[1].strip()
-    output_filename1 = sys.argv[2].strip()
-    min_size = int( sys.argv[3].strip() )
-    max_size = int( sys.argv[4].strip() )
-    if max_size < 1: max_size = sys.maxint
-    maf_writer = bx.align.maf.Writer( open( output_filename1, 'w' ) )
-    try:
-        maf_reader = bx.align.maf.Reader( open( input_maf_filename, 'r' ) )
-    except:
-        print >>sys.stderr, "Your MAF file appears to be malformed."
-        sys.exit()
-
-    blocks_kept = 0
-    i = 0
-    for i, m in enumerate( maf_reader ):
-        if min_size <= m.text_size <= max_size:
-            maf_writer.write( m )
-            blocks_kept += 1
-    print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float( blocks_kept ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_limit_size.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-<tool id="maf_limit_size1" name="Filter MAF blocks" version="1.0.1">
-  <description>by Size</description>
-  <command interpreter="python">maf_limit_size.py $input1 $out_file1 $min_size $max_size</command>
-  <inputs>
-    <page>
-        <param format="maf" name="input1" label="MAF File" type="data"/>
-        <param name="min_size" label="Minimum Size" value="0" type="integer"/>
-        <param name="max_size" label="Maximum Size" value="0" type="integer" help="A maximum size less than 1 indicates no limit"/>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3.maf" ftype="maf" />
-      <param name="min_size" value="0"/>
-      <param name="max_size" value="0"/>
-      <output name="out_file1" file="maf_limit_size1_out.maf" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool takes a MAF file and a size range and extracts the MAF blocks which fall within the specified range.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/maf_limit_to_species.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a maf file and write out a new maf with only blocks having the
-required species, after dropping any other species and removing
-columns containing only gaps.
-
-usage: %prog species,species2,... input_maf output_maf allow_partial min_species_per_block
-"""
-#Dan Blankenberg
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.align.maf
-from galaxy.tools.util import maf_utilities
-import sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-
-    species = maf_utilities.parse_species_option( sys.argv[1] )
-    if species:
-        spec_len = len( species )
-    else:
-        spec_len = 0
-    try:
-        maf_reader = bx.align.maf.Reader( open( sys.argv[2],'r' ) )
-        maf_writer = bx.align.maf.Writer( open( sys.argv[3],'w' ) )
-    except:
-        print >>sys.stderr, "Your MAF file appears to be malformed."
-        sys.exit()
-    allow_partial = False
-    if int( sys.argv[4] ): allow_partial = True
-    min_species_per_block = int( sys.argv[5] )
-
-    maf_blocks_kept = 0
-    for m in maf_reader:
-        if species:
-            m = m.limit_to_species( species )
-        m.remove_all_gap_columns()
-        spec_in_block_len = len( maf_utilities.get_species_in_block( m ) )
-        if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block:
-            maf_writer.write( m )
-            maf_blocks_kept += 1
-
-    maf_reader.close()
-    maf_writer.close()
-
-    print "Restricted to species: %s." % ", ".join( species )
-    print "%i MAF blocks have been kept." % maf_blocks_kept
-
-if __name__ == "__main__":
-    main()
--- a/tools/maf/maf_limit_to_species.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-<tool id="MAF_Limit_To_Species1" name="Filter MAF blocks">
-  <description>by Species</description>
-  <command interpreter="python">maf_limit_to_species.py $species $input1 $out_file1 $allow_partial $min_species</command>
-  <inputs>
-    <param name="input1" type="data" format="maf" label="MAF file"/>
-    <param name="allow_partial" type="select" label="Exclude blocks which have missing species" >
-      <option value="1">No</option>
-      <option value="0">Yes</option>
-    </param>
-    <param name="min_species" type="select" label="Exclude blocks which have only one species" >
-      <option value="1">Yes</option>
-      <option value="0">No</option>
-    </param>
-    <param name="species" type="select" label="Species to keep" display="checkboxes" multiple="true">
-      <options>
-        <filter type="data_meta" ref="input1" key="species" />
-      </options>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="4.maf"/>
-      <param name="species" value="bosTau2,canFam2,hg17,panTro1,rheMac2,rn3"/>
-      <param name="allow_partial" value="0"/>
-      <param name="min_species" value="0"/>
-      <output name="out_file1" file="cf_maf_limit_to_species.dat"/>
-    </test>
-  </tests>
-  <help>
-
-**What It Does**
-
-This tool allows the user to remove any undesired species from a MAF file. Columns which contain only gaps are removed. The options for this tool are:
-
- * **Exclude blocks which have missing species** - suppose you want to restrict an 8-way alignment to human, mouse, and rat.  The tool will first remove all other species. Next, if this option is set to **YES** the tool WILL NOT return MAF blocks, which do not include human, mouse, or rat. This means that all alignment blocks returned by the tool will have exactly three sequences in this example.
-
- * **Exclude blocks with have only one species** - if this option is set to **YES** all single sequence alignment blocks WILL NOT be returned.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
-
--- a/tools/maf/maf_reverse_complement.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Reads a MAF file. Produces a MAF file containing
-the reverse complement for each block in the source file.
-
-usage: %prog input_maf_file output_maf_file
-"""
-#Dan Blankenberg
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.align.maf
-from galaxy.tools.util import maf_utilities
-import sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    #Parse Command Line
-    input_file = sys.argv.pop( 1 )
-    output_file = sys.argv.pop( 1 )
-    species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) )
-
-    try:
-        maf_writer = bx.align.maf.Writer( open( output_file, 'w' ) )
-    except:
-        print sys.stderr, "Unable to open output file"
-        sys.exit()
-    try:
-        count = 0
-        for count, maf in enumerate( bx.align.maf.Reader( open( input_file ) ) ):
-            maf = maf.reverse_complement()
-            if species:
-                maf = maf.limit_to_species( species )
-            maf_writer.write( maf )
-    except:
-        print >>sys.stderr, "Your MAF file appears to be malformed."
-        sys.exit()
-    print "%i regions were reverse complemented." % count
-    maf_writer.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_reverse_complement.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="MAF_Reverse_Complement_1" name="Reverse Complement" version="1.0.1">
-  <description>a MAF file</description>
-  <command interpreter="python">maf_reverse_complement.py $input1 $out_file1 $species</command>
-  <inputs>
-    <page>
-        <param format="maf" name="input1" label="Alignment File" type="data"/>
-        <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
-          <options>
-            <filter type="data_meta" ref="input1" key="species" />
-          </options>
-        </param>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" metadata_source="input1"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3.maf" dbkey="hg17" format="maf"/>
-      <param name="species" value="hg17,panTro1,mm5,rn3,canFam1"/>
-      <output name="out_file1" file="maf_reverse_complement_out.dat"/>
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool takes a MAF file and creates a new MAF file, where each block has been reversed complemented.
-
-**Example**
-
-This MAF Block::
-
-  a score=8157.000000
-  s hg17.chr7    127471526 58 + 158628139 AATTTGTGGTTTATTCATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
-  s panTro1.chr6 129885407 58 + 161576975 AATTTGTGGTTTATTCGTTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
-  s mm5.chr6      28904928 54 + 149721531 AA----CGTTTCATTGATTGCTCATCATTTAAAAAAAGAAATTCCTCAGTGGAAGAGG
-
-becomes::
-
-  a score=8157.000000
-  s hg17.chr7     31156555 58 - 158628139 CCTCTTCCACTATAGACCTCCTTAAACAAAATAATGAAAAATGAATAAACCACAAATT
-  s panTro1.chr6  31691510 58 - 161576975 CCTCTTCCACTATAGACCTCCTTAAACAAAATAATGAAAAACGAATAAACCACAAATT
-  s mm5.chr6     120816549 54 - 149721531 CCTCTTCCACTGAGGAATTTCTTTTTTTAAATGATGAGCAATCAATGAAACG----TT
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/maf_split_by_species.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a maf and split blocks by unique species combinations
-"""
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.align import maf
-from galaxy.tools.util import maf_utilities
-from galaxy.util import string_as_bool
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    try:
-        maf_reader = maf.Reader( open( sys.argv[1] ) )
-    except Exception, e:
-        maf_utilities.tool_fail( "Error opening MAF: %s" % e )
-    try:
-        out = maf.Writer( open( sys.argv[2], "w") )
-    except Exception, e:
-        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
-    try:
-        collapse_columns = string_as_bool( sys.argv[3] )
-    except Exception, e:
-        maf_utilities.tool_fail( "Error determining collapse columns value: %s" % e )
-
-    start_count = 0
-    end_count = 0
-    for start_count, start_block in enumerate( maf_reader ):
-        for block in maf_utilities.iter_blocks_split_by_species( start_block ):
-            if collapse_columns:
-                block.remove_all_gap_columns()
-            out.write( block )
-            end_count += 1
-    out.close()
-
-    if end_count:
-        print "%i alignment blocks created from %i original blocks." % ( end_count, start_count + 1 )
-    else:
-        print "No alignment blocks were created."
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_split_by_species.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,223 +0,0 @@
-<tool id="MAF_split_blocks_by_species1" name="Split MAF blocks" version="1.0.0">
-  <description>by Species</description>
-  <command interpreter="python">maf_split_by_species.py $input1 $out_file1 $collapse_columns</command>
-  <inputs>
-    <param format="maf" name="input1" type="data" label="MAF file to split"/>
-    <param name="collapse_columns" type="select" label="Collapse empty alignment columns" help="Removes columns that are gaps in all sequences">
-      <option value="True" selected="true">Yes</option>
-      <option value="False">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="maf_split_by_species_in.maf"/>
-      <param name="collapse_columns" value="True"/>
-      <output name="out_file1" file="maf_split_by_species_collapsed_out.maf"/>
-    </test>
-    <test>
-      <param name="input1" value="maf_split_by_species_in.maf"/>
-      <param name="collapse_columns" value="False"/>
-      <output name="out_file1" file="maf_split_by_species_not_collapsed_out.maf"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool examines each MAF block for multiple occurrences of a species in a single block. When this occurs, a block is split into multiple blocks where every combination of one sequence per species per block is represented.
-
-The interface for this tool has two inputs:
-
- * **MAF file to split**. Choose multiple alignments from history to be split by species.
- * **Collapse empty alignment columns**. Should alignment columns containing only gaps in the new blocks be removed.
-
------
-
-**Example 1**: **Collapse empty alignment columns is Yes**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-the tool will create **a single** history item containing 12 alignment blocks (notice that no columns contain only gaps)::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT-GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC--GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC-GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGCAG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC---AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC---AG
-
------
-
-**Example 2**: **Collapse empty alignment columns is No**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-the tool will create **a single** history item containing 12 alignment blocks (notice that some columns contain only gaps)::
-
-  ##maf version=1
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
-  a score=2047408.0
-  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG
-  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG
-  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG
-
--------
-
-.. class:: infomark
-
-**About formats**
-
-**MAF format** multiple alignment format file. This format stores multiple alignments at the DNA level between entire genomes.
-
- - The .maf format is line-oriented. Each multiple alignment ends with a blank line.
- - Each sequence in an alignment is on a single line.
- - Lines starting with # are considered to be comments.
- - Each multiple alignment is in a separate paragraph that begins with an "a" line and contains an "s" line for each sequence in the multiple alignment.
- - Some MAF files may contain two optional line types:
-
-   - An "i" line containing information about what is in the aligned species DNA before and after the immediately preceding "s" line;
-   - An "e" line containing information about the size of the gap between the alignments that span the current block.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-    </help>
-</tool>
-
--- a/tools/maf/maf_stats.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-"""
-Reads a list of intervals and a maf. Outputs a new set of intervals with statistics appended.
-"""
-
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.intervals.io
-from bx.bitset import BitSet
-from galaxy.tools.util import maf_utilities
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    maf_source_type = sys.argv.pop( 1 )
-    input_maf_filename = sys.argv[1].strip()
-    input_interval_filename = sys.argv[2].strip()
-    output_filename = sys.argv[3].strip()
-    dbkey = sys.argv[4].strip()
-    try:
-        chr_col  = int( sys.argv[5].strip() ) - 1
-        start_col = int( sys.argv[6].strip() ) - 1
-        end_col = int( sys.argv[7].strip() ) - 1
-    except:
-        print >>sys.stderr, "You appear to be missing metadata. You can specify your metadata by clicking on the pencil icon associated with your interval file."
-        sys.exit()
-    summary = sys.argv[8].strip()
-    if summary.lower() == "true": summary = True
-    else: summary = False
-
-    mafIndexFile = "%s/maf_index.loc" % sys.argv[9]
-    try:
-        maf_index_filename = sys.argv[10].strip()
-    except:
-        maf_index_filename = None
-    index = index_filename = None
-    if maf_source_type == "user":
-        #index maf for use here
-        index, index_filename = maf_utilities.open_or_build_maf_index( input_maf_filename, maf_index_filename, species = [dbkey] )
-        if index is None:
-            print >>sys.stderr, "Your MAF file appears to be malformed."
-            sys.exit()
-    elif maf_source_type == "cached":
-        #access existing indexes
-        index = maf_utilities.maf_index_by_uid( input_maf_filename, mafIndexFile )
-        if index is None:
-            print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( input_maf_filename )
-            sys.exit()
-    else:
-        print >>sys.stdout, 'Invalid source type specified: %s' % maf_source_type
-        sys.exit()
-
-    out = open(output_filename, 'w')
-
-    num_region = None
-    species_summary = {}
-    total_length = 0
-    #loop through interval file
-    for num_region, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( input_interval_filename, 'r' ), chrom_col = chr_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ) ):
-        src = "%s.%s" % ( dbkey, region.chrom )
-        region_length = region.end - region.start
-        total_length += region_length
-        coverage = { dbkey: BitSet( region_length ) }
-
-
-        for block in index.get_as_iterator( src, region.start, region.end ):
-            for spec in maf_utilities.get_species_in_block( block ):
-                if spec not in coverage: coverage[spec] = BitSet( region_length )
-            for block in maf_utilities.iter_blocks_split_by_species( block ):
-                if maf_utilities.component_overlaps_region( block.get_component_by_src( src ), region ):
-                    #need to chop and orient the block
-                    block = maf_utilities.orient_block_by_region( maf_utilities.chop_block_by_region( block, src, region ), src, region, force_strand = '+' )
-                    start_offset, alignment = maf_utilities.reduce_block_by_primary_genome( block, dbkey, region.chrom, region.start )
-                    for i in range( len( alignment[dbkey] ) ):
-                        for spec, text in alignment.items():
-                            if text[i] != '-':
-                                coverage[spec].set( start_offset + i )
-        if summary:
-            #record summary
-            for key in coverage.keys():
-                if key not in species_summary: species_summary[key] = 0
-                species_summary[key] = species_summary[key] + coverage[key].count_range()
-        else:
-            #print coverage for interval
-            coverage_sum = coverage[dbkey].count_range()
-            out.write( "%s\t%s\t%s\t%s\n" % ( "\t".join( region.fields ), dbkey, coverage_sum, region_length - coverage_sum ) )
-            keys = coverage.keys()
-            keys.remove( dbkey )
-            keys.sort()
-            for key in keys:
-                coverage_sum = coverage[key].count_range()
-                out.write( "%s\t%s\t%s\t%s\n" % ( "\t".join( region.fields ), key, coverage_sum, region_length - coverage_sum ) )
-    if summary:
-        out.write( "#species\tnucleotides\tcoverage\n" )
-        for spec in species_summary:
-            out.write( "%s\t%s\t%.4f\n" % ( spec, species_summary[spec], float( species_summary[spec] ) / total_length ) )
-    out.close()
-    if num_region is not None:
-        print "%i regions were processed with a total length of %i." % ( num_region + 1, total_length )
-    maf_utilities.remove_temp_index_file( index_filename )
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_stats.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-<tool id="maf_stats1" name="MAF Coverage Stats" version="1.0.1">
-  <description>Alignment coverage information</description>
-  <command interpreter="python">
-    maf_stats.py
-    #if $maf_source_type.maf_source == "user":
-      $maf_source_type.maf_source $input2 $input1 $out_file1 $dbkey ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $summary
-    #else:
-      $maf_source_type.maf_source $maf_source_type.mafType $input1 $out_file1 $dbkey ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $summary
-    #end if
-    ${GALAXY_DATA_INDEX_DIR}
-    #if $maf_source_type.maf_source == "user":
-    $input2.metadata.maf_index
-    #end if
-  </command>
-  <inputs>
-    <param format="interval" name="input1" label="Interval File" type="data">
-      <validator type="unspecified_build" />
-    </param>
-    <conditional name="maf_source_type">
-      <param name="maf_source" type="select" label="MAF Source">
-        <option value="cached" selected="true">Locally Cached Alignments</option>
-        <option value="user">Alignments in Your History</option>
-      </param>
-      <when value="user">
-        <param format="maf" name="input2" label="MAF File" type="data">
-          <options>
-            <filter type="data_meta" ref="input1" key="dbkey" />
-          </options>
-          <validator type="dataset_ok_validator" />
-        </param>
-      </when>
-      <when value="cached">
-        <param name="mafType" type="select" label="MAF Type">
-          <options from_file="maf_index.loc">
-            <column name="name" index="0"/>
-            <column name="value" index="1"/>
-            <column name="dbkey" index="2"/>
-            <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
-            <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-    <param name="summary" type="select" label="Type of Output">
-      <option value="false" selected="true">Coverage by Region</option>
-      <option value="true">Summarize Coverage</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_file1" metadata_source="input1">
-      <change_format>
-        <when input="summary" value="true" format="tabular" />
-      </change_format>
-    </data>
-  </outputs>
-  <requirements>
-    <requirement type="python-module">numpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" dbkey="hg17" format="bed"/>
-      <param name="maf_source" value="cached"/>
-      <param name="mafType" value="8_WAY_MULTIZ_hg17"/>
-      <output name="out_file1" file="maf_stats_interval_out.dat"/>
-      <param name="summary" value="false"/>
-    </test>
-    <test>
-      <param name="input1" value="1.bed" dbkey="hg17" format="bed"/>
-      <param name="maf_source" value="cached"/>
-      <param name="mafType" value="8_WAY_MULTIZ_hg17"/>
-      <output name="out_file1" file="maf_stats_summary_out.dat"/>
-      <param name="summary" value="true"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool takes a MAF file and an interval file and relates coverage information by interval for each species.
-If a column does not exist in the reference genome, it is not included in the output.
-
-Consider the interval: "chrX 1000 1100 myInterval"
-  Let's suppose we want to do stats on three way alignments for H, M, and R. The result look like this:
-
-    chrX 1000 1100 myInterval H XXX YYY
-
-    chrX 1000 1100 myInterval M XXX YYY
-
-    chrX 1000 1100 myInterval R XXX YYY
-
-
-  where XXX and YYY are:
-
-    XXX = number of nucleotides
-
-    YYY = number of gaps
-
-----
-
-Alternatively, you can request only summary information for a set of intervals:
-
-  ========  ===========  ========
-  #species  nucleotides  coverage
-  ========  ===========  ========
-  hg18         30639      0.2372
-  rheMac2      7524       0.0582
-  panTro2      30390      0.2353
-  ========  ===========  ========
-
-  where **coverage** is the number of nucleotides divided by the total length of the provided intervals.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/maf_thread_for_species.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a maf file and write out a new maf with only blocks having all of
-the passed in species, after dropping any other species and removing columns
-containing only gaps. This will attempt to fuse together any blocks
-which are adjacent after the unwanted species have been dropped.
-
-usage: %prog input_maf output_maf species1,species2
-"""
-#Dan Blankenberg
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.align.maf
-
-from bx.align.tools.thread import *
-from bx.align.tools.fuse import *
-
-def main():
-    input_file = sys.argv.pop( 1 )
-    output_file = sys.argv.pop( 1 )
-    species = sys.argv.pop( 1 ).split( ',' )
-
-    try:
-        maf_reader = bx.align.maf.Reader( open( input_file ) )
-    except:
-        print >> sys.stderr, "Unable to open source MAF file"
-        sys.exit()
-    try:
-        maf_writer = FusingAlignmentWriter( bx.align.maf.Writer( open( output_file, 'w' ) ) )
-    except:
-        print >> sys.stderr, "Unable to open output file"
-        sys.exit()
-    try:
-        for m in maf_reader:
-            new_components = m.components
-            if species != ['None']:
-                new_components = get_components_for_species( m, species )
-            if new_components:
-                remove_all_gap_columns( new_components )
-                m.components = new_components
-                m.score = 0.0
-                maf_writer.write( m )
-    except Exception, e:
-        print >> sys.stderr, "Error steping through MAF File: %s" % e
-        sys.exit()
-    maf_reader.close()
-    maf_writer.close()
-
-    print "Restricted to species: %s." % ", ".join( species )
-
-if __name__ == "__main__": main()
--- a/tools/maf/maf_thread_for_species.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="MAF_Thread_For_Species1" name="Join MAF blocks">
-  <description>by Species</description>
-  <command interpreter="python">maf_thread_for_species.py $input1 $out_file1 $species</command>
-  <inputs>
-    <param format="maf" name="input1" type="data" label="MAF file"/>
-    <param name="species" type="select" label="Species to keep" display="checkboxes" multiple="true">
-      <options>
-        <filter type="data_meta" ref="input1" key="species" />
-      </options>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="maf" name="out_file1"  metadata_source="input1"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3.maf" format="maf"/>
-      <param name="species" value="hg17,panTro1"/>
-      <output name="out_file1" file="maf_thread_for_species.dat"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool allows the user to merge MAF blocks which are adjoining in each specified species from a MAF file. Columns which contain only gaps are removed. Species which are not desired are removed from the output.
-
-**Example**
-
-Specifying the desired species as hg17 and panTro1 with this MAF file::
-
-  ##maf version=1
-  a score=60426.000000
-  s hg17.chr7    127471195 331 + 158628139 gtttgccatcttttgctgctctagggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATCATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTAAAACTTCCC-------------------------------AAATACT-GCCACTGATGTCCTG-----ATGGAGGTA-------TGAA-------------------AACATCCACTAA
-  s panTro1.chr6 129885076 331 + 161576975 gtttgccatcttttgctgctcttgggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATCATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTGAAACTTCCC-------------------------------AAATACT-GCCACTGATGTCCTG-----ATGGAGGTA-------TGAA-------------------AACATCCACTAA
-  s mm5.chr6      28904571 357 + 149721531 CTCCACTCTCGTTTGCTGTT----------------CTGTCACCATGGAAACAAA-CGAGGGTGGTCCAGTTACTATCTTGACTGCAGCTGGCAGTCAGTT-GCCACT-----CAGGAATAAGGCTATGCCATT-GATCCACTGAACCGTGATCTGGAAACCTGGCTGTTGTTT-------CAAGCCTTGGGGCCAGTTTGCGGTGTTACTCATGA--CTCTAAGATCGTGTGCTTG----CTGCAGGAAGAGACAGCAAGGGGGTTACATTTAAAAAGCCCCCAGTTTAGCTATAGGCAGGCCAACAGGTGTAAAAATACTCACTAGTAATGGGCTGAACTCATGGAGGTAGCATTAGTGAGACACTGTAACTGTTTTTTTAAAAATCACTAA
-  s rn3.chr4      56178191 282 + 187371129 CTTCACTCTCATTTGCTGTT----------------CTGTCACTATGGAGACAAACACAGGCTAGCCCAGTTACTATCTTGATCACAGCAGCT-GTCAGCTAGCTGCCACTCACAGGAATAAGGCCATACCATT-GATCCACTGAACCTTGATCTAGGAATTTGGC----------------------TGGGGCCAGTTTGCGGTGTCACTCATGA--CTCTAAGATTGTGTGTTTG----CTCCAGGAAGAGACGGCAAGAGGATTACCTTTAAAAGGTTC---------------------------------GGAGTCTAGCTGTAGACAGCCCA-----ATG--GGTA-------TAAC-------------------AATACTCACTAA
-
-  a score=8157.000000
-  s hg17.chr7    127471526 58 + 158628139 AATTTGTGGTTTATTCATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
-  s panTro1.chr6 129885407 58 + 161576975 AATTTGTGGTTTATTCGTTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
-  s mm5.chr6      28904928 54 + 149721531 AA----CGTTTCATTGATTGCTCATCATTTAAAAAAAGAAATTCCTCAGTGGAAGAGG
-
-results in::
-
-  ##maf version=1
-  a score=0.0
-  s hg17.chr7    127471195 389 + 158628139 gtttgccatcttttgctgctctagggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATCATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTAAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGGTATGAAAACATCCACTAAAATTTGTGGTTTATTCATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
-  s panTro1.chr6 129885076 389 + 161576975 gtttgccatcttttgctgctcttgggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATCATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTGAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGGTATGAAAACATCCACTAAAATTTGTGGTTTATTCGTTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
-
--- a/tools/maf/maf_to_bed.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a maf and output intervals for specified list of species.
-"""
-import sys, os, tempfile
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.align import maf
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    #where to store files that become additional output
-    database_tmp_dir = sys.argv[5]
-
-    species = sys.argv[3].split(',')
-    partial = sys.argv[4]
-    out_files = {}
-    primary_spec = None
-
-    if "None" in species:
-        species = {}
-        try:
-            for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ):
-                for c in m.components:
-                    spec,chrom = maf.src_split( c.src )
-                    if not spec or not chrom:
-                        spec = chrom = c.src
-                    species[spec] = ""
-            species = species.keys()
-        except:
-            print >>sys.stderr, "Invalid MAF file specified"
-            return
-
-    if "?" in species:
-        print >>sys.stderr, "Invalid dbkey specified"
-        return
-
-
-    for i in range( 0, len( species ) ):
-        spec = species[i]
-        if i == 0:
-            out_files[spec] = open( output_filename, 'w' )
-            primary_spec = spec
-        else:
-            out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
-            filename = out_files[spec].name
-            out_files[spec].close()
-            out_files[spec] = open( filename, 'w' )
-    num_species = len( species )
-
-    print "Restricted to species:", ",".join( species )
-
-    file_in = open( input_filename, 'r' )
-    maf_reader = maf.Reader( file_in )
-
-    block_num = -1
-
-    for i, m in enumerate( maf_reader ):
-        block_num += 1
-        if "None" not in species:
-            m = m.limit_to_species( species )
-        l = m.components
-        if len(l) < num_species and partial == "partial_disallowed": continue
-        for c in l:
-            spec,chrom = maf.src_split( c.src )
-            if not spec or not chrom:
-                    spec = chrom = c.src
-            if spec not in out_files.keys():
-                out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
-                filename = out_files[spec].name
-                out_files[spec].close()
-                out_files[spec] = open( filename, 'w' )
-
-            if c.strand == "-":
-                out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
-            else:
-                out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
-
-    file_in.close()
-    for file_out in out_files.keys():
-        out_files[file_out].close()
-
-    for spec in out_files.keys():
-        if spec != primary_spec:
-            print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] )
-        else:
-            print "#FILE1\t" + spec + "\t" + out_files[spec].name
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_to_bed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-<tool id="MAF_To_BED1" name="Maf to BED" force_history_refresh="True">
-  <description>Converts a MAF formatted file to the BED format</description>
-  <command interpreter="python">maf_to_bed.py $input1 $out_file1 $species $complete_blocks $__new_file_path__</command>
-  <inputs>
-    <param format="maf" name="input1" type="data" label="MAF file to convert"/>
-    <param name="species" type="select" label="Select species" display="checkboxes" multiple="true" help="a separate history item will be created for each checked species">
-      <options>
-        <filter type="data_meta" ref="input1" key="species" />
-      </options>
-    </param>
-    <param name="complete_blocks" type="select" label="Exclude blocks which have a requested species missing">
-      <option value="partial_allowed">include blocks with missing species</option>
-      <option value="partial_disallowed">exclude blocks with missing species</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="bed" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="4.maf"/>
-      <param name="species" value="hg17"/>
-      <param name="complete_blocks" value="partial_disallowed"/>
-      <output name="out_file1" file="cf_maf_to_bed.dat"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool converts every MAF block to an interval line (in BED format; scroll down for description of MAF and BED formats) describing position of that alignment block within a corresponding genome.
-
-The interface for this tool contains two pages (steps):
-
- * **Step 1 of 2**. Choose multiple alignments from history to be converted to BED format.
- * **Step 2 of 2**. Choose species from the alignment to be included in the output and specify how to deal with alignment blocks that lack one or more species:
-
-   *  **Choose species** - the tool reads the alignment provided during Step 1 and generates a list of species contained within that alignment. Using checkboxes you can specify taxa to be included in the output (only reference genome, shown in **bold**, is selected by default). If you select more than one species, then more than one history item will be created.
-   *  **Choose to include/exclude blocks with missing species** - if an alignment block does not contain any one of the species you selected within **Choose species** menu and this option is set to **exclude blocks with missing species**, then coordinates of such a block **will not** be included in the output (see **Example 2** below).
-
-
------
-
-**Example 1**: **Include only reference genome** (hg18 in this case) and **include blocks with missing species**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=68686.000000
-  s hg18.chr20     56827368 75 +  62435964 GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s panTro2.chr20  56528685 75 +  62293572 GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s rheMac2.chr10  89144112 69 -  94855758 GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  s mm8.chr2      173910832 61 + 181976762 AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  s canFam2.chr24  46551822 67 +  50763139 CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  a score=10289.000000
-  s hg18.chr20    56827443 37 + 62435964 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s panTro2.chr20 56528760 37 + 62293572 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s rheMac2.chr10 89144181 37 - 94855758 ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-the tool will create **a single** history item containing the following (**note** that field 4 is added to the output and is numbered iteratively: hg18_0, hg18_1 etc.)::
-
-  chr20    56827368    56827443   hg18_0   0   +
-  chr20    56827443    56827480   hg18_1   0   +
-
------
-
-**Example 2**: **Include hg18 and mm8** and **exclude blocks with missing species**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=68686.000000
-  s hg18.chr20     56827368 75 +  62435964 GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s panTro2.chr20  56528685 75 +  62293572 GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s rheMac2.chr10  89144112 69 -  94855758 GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  s mm8.chr2      173910832 61 + 181976762 AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  s canFam2.chr24  46551822 67 +  50763139 CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  a score=10289.000000
-  s hg18.chr20    56827443 37 + 62435964 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s panTro2.chr20 56528760 37 + 62293572 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s rheMac2.chr10 89144181 37 - 94855758 ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-the tool will create **two** history items (one for hg18 and one fopr mm8) containing the following (**note** that both history items contain only one line describing the first alignment block. The second MAF block is not included in the output because it does not contain mm8):
-
-History item **1** (for hg18)::
-
-   chr20    56827368    56827443   hg18_0   0   +
-
-History item **2** (for mm8)::
-
-   chr2    173910832   173910893    mm8_0   0   +
-
--------
-
-.. class:: infomark
-
-**About formats**
-
-**MAF format** multiple alignment format file. This format stores multiple alignments at the DNA level between entire genomes.
-
- - The .maf format is line-oriented. Each multiple alignment ends with a blank line.
- - Each sequence in an alignment is on a single line.
- - Lines starting with # are considered to be comments.
- - Each multiple alignment is in a separate paragraph that begins with an "a" line and contains an "s" line for each sequence in the multiple alignment.
- - Some MAF files may contain two optional line types:
-
-   - An "i" line containing information about what is in the aligned species DNA before and after the immediately preceding "s" line;
-   - An "e" line containing information about the size of the gap between the alignments that span the current block.
-
-**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and a number of additional optional ones:
-
-The first three BED fields (required) are::
-
-    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
-    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
-    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
-
-Additional (optional) fields are::
-
-    4. name - The name of the BED line.
-    5. score - A score between 0 and 1000.
-    6. strand - Defines the strand - either '+' or '-'.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-    </help>
-    <code file="maf_to_bed_code.py"/>
-</tool>
-
--- a/tools/maf/maf_to_bed_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.align import maf
-from galaxy import datatypes, config, jobs
-from shutil import move
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    output_data = out_data.items()[0][1]
-    history = output_data.history
-    if history == None:
-        print "unknown history!"
-        return
-    new_stdout = ""
-    split_stdout = stdout.split("\n")
-    basic_name = output_data.name
-    output_data_list = []
-    for line in split_stdout:
-        if line.startswith("#FILE1"):
-            fields = line.split("\t")
-            dbkey = fields[1]
-            filepath = fields[2]
-            output_data.dbkey = dbkey
-            output_data.name = basic_name + " (" + dbkey + ")"
-            app.model.context.add( output_data )
-            app.model.context.flush()
-            output_data_list.append(output_data)
-        elif line.startswith("#FILE"):
-            fields = line.split("\t")
-            dbkey = fields[1]
-            filepath = fields[2]
-            newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context )
-            newdata.set_size()
-            newdata.extension = "bed"
-            newdata.name = basic_name + " (" + dbkey + ")"
-            app.model.context.add( newdata )
-            app.model.context.flush()
-            history.add_dataset( newdata )
-            app.security_agent.copy_dataset_permissions( output_data.dataset, newdata.dataset )
-            app.model.context.add( history )
-            app.model.context.flush()
-            try:
-                move(filepath,newdata.file_name)
-                newdata.info = newdata.name
-                newdata.state = newdata.states.OK
-            except:
-                newdata.info = "The requested file is missing from the system."
-                newdata.state = newdata.states.ERROR
-            newdata.dbkey = dbkey
-            newdata.init_meta()
-            newdata.set_meta()
-            newdata.set_peek()
-            app.model.context.flush()
-            output_data_list.append(newdata)
-        else:
-            new_stdout = new_stdout + line
-        for data in output_data_list:
-            if data.state == data.states.OK:
-                data.info = new_stdout
-                app.model.context.add( data )
-                app.model.context.flush()
--- a/tools/maf/maf_to_fasta.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,199 +0,0 @@
-<tool id="MAF_To_Fasta1" name="MAF to FASTA" version="1.0.1">
-  <description>Converts a MAF formatted file to FASTA format</description>
-  <command interpreter="python">
-    #if $fasta_target_type.fasta_type == "multiple" #maf_to_fasta_multiple_sets.py $input1 $out_file1 $fasta_target_type.species $fasta_target_type.complete_blocks
-    #else                                           #maf_to_fasta_concat.py $fasta_target_type.species $input1 $out_file1
-    #end if#
-  </command>
-  <inputs>
-    <param format="maf" name="input1" type="data" label="MAF file to convert"/>
-    <conditional name="fasta_target_type">
-      <param name="fasta_type" type="select" label="Type of FASTA Output">
-        <option value="multiple" selected="true">Multiple Blocks</option>
-        <option value="concatenated">One Sequence per Species</option>
-      </param>
-      <when value="multiple">
-        <param name="species" type="select" label="Select species" display="checkboxes" multiple="true" help="checked taxa will be included in the output">
-          <options>
-            <filter type="data_meta" ref="input1" key="species" />
-          </options>
-        </param>
-	    <param name="complete_blocks" type="select" label="Choose to">
-	      <option value="partial_allowed">include blocks with missing species</option>
-	      <option value="partial_disallowed">exclude blocks with missing species</option>
-	    </param>
-      </when>
-      <when value="concatenated">
-        <param name="species" type="select" label="Species to extract" display="checkboxes" multiple="true">
-          <options>
-            <filter type="data_meta" ref="input1" key="species" />
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3.maf" ftype="maf"/>
-      <param name="fasta_type" value="concatenated"/>
-      <param name="species" value="canFam1"/>
-      <output name="out_file1" file="cf_maf2fasta_concat.dat" ftype="fasta"/>
-    </test>
-    <test>
-      <param name="input1" value="4.maf" ftype="maf"/>
-      <param name="fasta_type" value="multiple"/>
-      <param name="species" value="hg17,panTro1,rheMac2,rn3,mm7,canFam2,bosTau2,dasNov1"/>
-      <param name="complete_blocks" value="partial_allowed"/>
-      <output name="out_file1" file="cf_maf2fasta_new.dat" ftype="fasta"/>
-    </test>
-  </tests>
-  <help>
-
-**Types of MAF to FASTA conversion**
-
- * **Multiple Blocks** converts a single MAF block to a single FASTA block. For example, if you have 6 MAF blocks, they will be converted to 6 FASTA blocks.
- * **One Sequence per Species** converts MAF blocks to a single aggregated FASTA block. For example, if you have 6 MAF blocks, they will be converted and concatenated into a single FASTA block.
-
--------
-
-**What it does**
-
-This tool converts MAF blocks to FASTA format and concatenates them into a single FASTA block or outputs multiple FASTA blocks separated by empty lines.
-
-The interface for this tool contains two pages (steps):
-
- * **Step 1 of 2**. Choose multiple alignments from history to be converted to FASTA format.
- * **Step 2 of 2**. Choose the type of output as well as the species from the alignment to be included in the output.
-
-   Multiple Block output has additional options:
-
-   *  **Choose species** - the tool reads the alignment provided during Step 1 and generates a list of species contained within that alignment. Using checkboxes you can specify taxa to be included in the output (all species are selected by default).
-   *  **Choose to include/exclude blocks with missing species** - if an alignment block does not contain any one of the species you selected within **Choose species** menu and this option is set to **exclude blocks with missing species**, then such a block **will not** be included in the output (see **Example 2** below).  For example, if you want to extract human, mouse, and rat from a series of alignments and one of the blocks does not contain mouse sequence, then this block will not be converted to FASTA and will not be returned.
-
-
------
-
-**Example 1**:
-
-In the concatenated approach, the following alignment::
-
-  ##maf version=1
-  a score=68686.000000
-  s hg18.chr20     56827368 75 +  62435964 GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s panTro2.chr20  56528685 75 +  62293572 GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s rheMac2.chr10  89144112 69 -  94855758 GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  s mm8.chr2      173910832 61 + 181976762 AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  s canFam2.chr24  46551822 67 +  50763139 CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  a score=10289.000000
-  s hg18.chr20    56827443 37 + 62435964 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s panTro2.chr20 56528760 37 + 62293572 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s rheMac2.chr10 89144181 37 - 94855758 ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-will be converted to (**note** that because mm8 (mouse) and canFam2 (dog) are absent from the second block, they are replaced with gaps after concatenation)::
-
-  &gt;canFam2
-  CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C-------------------------------------
-  &gt;hg18
-  GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  &gt;mm8
-  AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC--------------------------------------------
-  &gt;panTro2
-  GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  &gt;rheMac2
-  GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-------
-
-**Example 2a**: Multiple Block Approach **Include all species** and **include blocks with missing species**:
-
-The following alignment::
-
-  ##maf version=1
-  a score=68686.000000
-  s hg18.chr20     56827368 75 +  62435964 GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s panTro2.chr20  56528685 75 +  62293572 GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s rheMac2.chr10  89144112 69 -  94855758 GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  s mm8.chr2      173910832 61 + 181976762 AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  s canFam2.chr24  46551822 67 +  50763139 CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  a score=10289.000000
-  s hg18.chr20    56827443 37 + 62435964 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s panTro2.chr20 56528760 37 + 62293572 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s rheMac2.chr10 89144181 37 - 94855758 ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-will be converted to::
-
-  &gt;hg18.chr20(+):56827368-56827443|hg18_0
-  GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  &gt;panTro2.chr20(+):56528685-56528760|panTro2_0
-  GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  &gt;rheMac2.chr10(-):89144112-89144181|rheMac2_0
-  GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  &gt;mm8.chr2(+):173910832-173910893|mm8_0
-  AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  &gt;canFam2.chr24(+):46551822-46551889|canFam2_0
-  CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  &gt;hg18.chr20(+):56827443-56827480|hg18_1
-  ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  &gt;panTro2.chr20(+):56528760-56528797|panTro2_1
-  ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  &gt;rheMac2.chr10(-):89144181-89144218|rheMac2_1
-  ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
------
-
-**Example 2b**: Multiple Block Approach **Include hg18 and mm8** and **exclude blocks with missing species**:
-
-The following alignment::
-
-  ##maf version=1
-  a score=68686.000000
-  s hg18.chr20     56827368 75 +  62435964 GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s panTro2.chr20  56528685 75 +  62293572 GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s rheMac2.chr10  89144112 69 -  94855758 GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  s mm8.chr2      173910832 61 + 181976762 AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  s canFam2.chr24  46551822 67 +  50763139 CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  a score=10289.000000
-  s hg18.chr20    56827443 37 + 62435964 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s panTro2.chr20 56528760 37 + 62293572 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s rheMac2.chr10 89144181 37 - 94855758 ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-will be converted to (**note** that the second MAF block, which does not have mm8, is not included in the output)::
-
-  &gt;hg18.chr20(+):56827368-56827443|hg18_0
-  GACAGGGTGCATCTGGGAGGGCCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC
-  &gt;mm8.chr2(+):173910832-173910893|mm8_0
-  AGAAGGATCCACCT---------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC------
-
-------
-
-.. class:: infomark
-
-**About formats**
-
- **MAF format** multiple alignment format file. This format stores multiple alignments at the DNA level between entire genomes.
-
- - The .maf format is line-oriented. Each multiple alignment ends with a blank line.
- - Each sequence in an alignment is on a single line.
- - Lines starting with # are considered to be comments.
- - Each multiple alignment is in a separate paragraph that begins with an "a" line and contains an "s" line for each sequence in the multiple alignment.
- - Some MAF files may contain two optional line types:
-
-   - An "i" line containing information about what is in the aligned species DNA before and after the immediately preceding "s" line;
-   - An "e" line containing information about the size of the gap between the alignments that span the current block.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-  </help>
-</tool>
--- a/tools/maf/maf_to_fasta_concat.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a maf and output a single block fasta file, concatenating blocks
-
-usage %prog species1,species2 maf_file out_file
-"""
-#Dan Blankenberg
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.align import maf
-from galaxy.tools.util import maf_utilities
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    try:
-        species = maf_utilities.parse_species_option( sys.argv[1] )
-    except Exception, e:
-        maf_utilities.tool_fail( "Error determining species value: %s" % e )
-    try:
-        input_filename = sys.argv[2]
-    except Exception, e:
-        maf_utilities.tool_fail( "Error reading MAF filename: %s" % e )
-    try:
-        file_out = open( sys.argv[3], 'w' )
-    except Exception, e:
-        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
-
-    if species:
-        print "Restricted to species: %s" % ', '.join( species )
-    else:
-        print "Not restricted to species."
-
-    if not species:
-        try:
-            species = maf_utilities.get_species_in_maf( input_filename )
-        except Exception, e:
-            maf_utilities.tool_fail( "Error determining species in input MAF: %s" % e )
-
-    for spec in species:
-        file_out.write( ">" + spec + "\n" )
-        try:
-            for start_block in maf.Reader( open( input_filename, 'r' ) ):
-                for block in maf_utilities.iter_blocks_split_by_species( start_block ):
-                    block.remove_all_gap_columns() #remove extra gaps
-                    component = block.get_component_by_src_start( spec ) #blocks only have one occurrence of a particular species, so this is safe
-                    if component:
-                        file_out.write( component.text )
-                    else:
-                        file_out.write( "-" * block.text_size )
-        except Exception, e:
-            maf_utilities.tool_fail( "Your MAF file appears to be malformed: %s" % e )
-        file_out.write( "\n" )
-    file_out.close()
-
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_to_fasta_multiple_sets.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a maf and output a multiple block fasta file.
-"""
-#Dan Blankenberg
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.align import maf
-from galaxy.tools.util import maf_utilities
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    try:
-        maf_reader = maf.Reader( open( sys.argv[1] ) )
-    except Exception, e:
-        maf_utilities.tool_fail( "Error opening input MAF: %s" % e )
-    try:
-        file_out = open( sys.argv[2], 'w' )
-    except Exception, e:
-        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
-    try:
-        species = maf_utilities.parse_species_option( sys.argv[3] )
-        if species:
-            num_species = len( species )
-        else:
-            num_species = 0
-    except Exception, e:
-        maf_utilities.tool_fail( "Error determining species value: %s" % e )
-    try:
-        partial = sys.argv[4]
-    except Exception, e:
-        maf_utilities.tool_fail( "Error determining keep partial value: %s" % e )
-
-    if species:
-        print "Restricted to species: %s" % ', '.join( species )
-    else:
-        print "Not restricted to species."
-
-    for block_num, block in enumerate( maf_reader ):
-        if species:
-            block = block.limit_to_species( species )
-            if len( maf_utilities.get_species_in_block( block ) ) < num_species and partial == "partial_disallowed": continue
-        spec_counts = {}
-        for component in block.components:
-            spec, chrom = maf_utilities.src_split( component.src )
-            if spec not in spec_counts:
-                spec_counts[ spec ] = 0
-            else:
-                spec_counts[ spec ] += 1
-            file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, { 'block_index' : block_num, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, block_num, spec_counts[ spec ] ) ) )
-            file_out.write( "%s\n" % component.text )
-        file_out.write( "\n" )
-    file_out.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_to_interval.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a maf and output intervals for specified list of species.
-"""
-import sys, os
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.align import maf
-from galaxy.tools.util import maf_utilities
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    output_id = sys.argv[3]
-    #where to store files that become additional output
-    database_tmp_dir =  sys.argv[4]
-    primary_spec = sys.argv[5]
-    species = sys.argv[6].split( ',' )
-    all_species = sys.argv[7].split( ',' )
-    partial = sys.argv[8]
-    keep_gaps = sys.argv[9]
-    out_files = {}
-
-    if "None" in species:
-        species = []
-
-    if primary_spec not in species:
-        species.append( primary_spec )
-    if primary_spec not in all_species:
-        all_species.append( primary_spec )
-
-    all_species.sort()
-    for spec in species:
-        if spec == primary_spec:
-            out_files[ spec ] = open( output_filename, 'wb+' )
-        else:
-            out_files[ spec ] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_interval_%s' % ( output_id, spec, spec ) ), 'wb+' )
-        out_files[ spec ].write( '#chrom\tstart\tend\tstrand\tscore\tname\t%s\n' % ( '\t'.join( all_species ) ) )
-    num_species = len( all_species )
-
-    file_in = open( input_filename, 'r' )
-    maf_reader = maf.Reader( file_in )
-
-    for i, m in enumerate( maf_reader ):
-        for j, block in enumerate( maf_utilities.iter_blocks_split_by_species( m ) ):
-            if len( block.components ) < num_species and partial == "partial_disallowed": continue
-            sequences = {}
-            for c in block.components:
-                spec, chrom = maf_utilities.src_split( c.src )
-                if keep_gaps == 'remove_gaps':
-                    sequences[ spec ] = c.text.replace( '-', '' )
-                else:
-                    sequences[ spec ] = c.text
-            sequences = '\t'.join( [ sequences.get( spec, '' ) for spec in all_species ] )
-            for spec in species:
-                c = block.get_component_by_src_start( spec )
-                if c is not None:
-                    spec2, chrom = maf_utilities.src_split( c.src )
-                    assert spec2 == spec, Exception( 'Species name inconsistancy found in component: %s != %s' % ( spec, spec2 ) )
-                    out_files[ spec ].write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( chrom, c.forward_strand_start, c.forward_strand_end, c.strand, m.score, "%s_%s_%s" % (spec, i, j), sequences ) )
-    file_in.close()
-    for file_out in out_files.values():
-        file_out.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/maf/maf_to_interval.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,133 +0,0 @@
-<tool id="MAF_To_Interval1" name="MAF to Interval" force_history_refresh="True">
-  <description>Converts a MAF formatted file to the Interval format</description>
-  <command interpreter="python">maf_to_interval.py $input1 $out_file1 $out_file1.id $__new_file_path__ $input1.dbkey $species $input1.metadata.species $complete_blocks $remove_gaps</command>
-  <inputs>
-    <param format="maf" name="input1" type="data" label="MAF file to convert"/>
-    <param name="species" type="select" label="Select additional species" display="checkboxes" multiple="true" help="The species matching the dbkey of the alignment is always included. A separate history item will be created for each species.">
-      <options>
-        <filter type="data_meta" ref="input1" key="species" />
-        <filter type="remove_value" meta_ref="input1" key="dbkey" />
-      </options>
-    </param>
-    <param name="complete_blocks" type="select" label="Exclude blocks which have a species missing">
-      <option value="partial_allowed">include blocks with missing species</option>
-      <option value="partial_disallowed">exclude blocks with missing species</option>
-    </param>
-    <param name="remove_gaps" type="select" label="Remove Gap characters from sequences">
-      <option value="keep_gaps">keep gaps</option>
-      <option value="remove_gaps">remove gaps</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="4.maf" dbkey="hg17"/>
-      <param name="complete_blocks" value="partial_disallowed"/>
-      <param name="remove_gaps" value="keep_gaps"/>
-      <param name="species" value="panTro1" />
-      <output name="out_file1" file="maf_to_interval_out_hg17.interval"/>
-      <output name="out_file1" file="maf_to_interval_out_panTro1.interval"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool converts every MAF block to a set of genomic intervals describing the position of that alignment block within a corresponding genome. Sequences from aligning species are also included in the output.
-
-The interface for this tool contains several options:
-
- * **MAF file to convert**. Choose multiple alignments from history to be converted to BED format.
- * **Choose species**. Choose additional species from the alignment to be included in the output
- * **Exclude blocks which have a species missing**. if an alignment block does not contain any one of the species found in the alignment set and this option is set to **exclude blocks with missing species**, then coordinates of such a block **will not** be included in the output (see **Example 2** below).
- * **Remove Gap characters from sequences**. Gaps can be removed from sequences before they are output.
-
-
------
-
-**Example 1**: **Include only reference genome** (hg18 in this case) and **include blocks with missing species**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=68686.000000
-  s hg18.chr20     56827368 75 +  62435964 GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s panTro2.chr20  56528685 75 +  62293572 GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s rheMac2.chr10  89144112 69 -  94855758 GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  s mm8.chr2      173910832 61 + 181976762 AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  s canFam2.chr24  46551822 67 +  50763139 CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  a score=10289.000000
-  s hg18.chr20    56827443 37 + 62435964 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s panTro2.chr20 56528760 37 + 62293572 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s rheMac2.chr10 89144181 37 - 94855758 ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-the tool will create **a single** history item containing the following (**note** the name field is numbered iteratively: hg18_0_0, hg18_1_0 etc. where the first number is the block number and the second number is the iteration through the block (if a species appears twice in a block, that interval will be repeated) and sequences for each species are included in the order specified in the header: the field is left empty when no sequence is available for that species)::
-
-  #chrom	start	end	strand	score	name	canFam2	hg18	mm8	panTro2	rheMac2
-  chr20	56827368	56827443	+	68686.0	hg18_0_0	CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C	GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-	AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------	GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-	GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  chr20	56827443	56827480	+	10289.0	hg18_1_0		ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG		ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG	ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-
------
-
-**Example 2**: **Include hg18 and mm8** and **exclude blocks with missing species**:
-
-For the following alignment::
-
-  ##maf version=1
-  a score=68686.000000
-  s hg18.chr20     56827368 75 +  62435964 GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s panTro2.chr20  56528685 75 +  62293572 GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-
-  s rheMac2.chr10  89144112 69 -  94855758 GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-  s mm8.chr2      173910832 61 + 181976762 AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------
-  s canFam2.chr24  46551822 67 +  50763139 CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C
-
-  a score=10289.000000
-  s hg18.chr20    56827443 37 + 62435964 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s panTro2.chr20 56528760 37 + 62293572 ATGTGCAGAAAATGTGATACAGAAACCTGCAGAGCAG
-  s rheMac2.chr10 89144181 37 - 94855758 ATGTGCGGAAAATGTGATACAGAAACCTGCAGAGCAG
-
-the tool will create **two** history items (one for hg18 and one for mm8) containing the following (**note** that both history items contain only one line describing the first alignment block. The second MAF block is not included in the output because it does not contain mm8):
-
-History item **1** (for hg18)::
-
-   #chrom	start	end	strand	score	name	canFam2	hg18	mm8	panTro2	rheMac2
-   chr20	56827368	56827443	+	68686.0	hg18_0_0	CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C	GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-	AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------	GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-	GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-
-
-History item **2** (for mm8)::
-
-   #chrom	start	end	strand	score	name	canFam2	hg18	mm8	panTro2	rheMac2
-   chr2	173910832	173910893	+	68686.0	mm8_0_0	CG------GCGTCTGTAAGGGGCCACCGCCCGGCCTGTG-CTCAAAGCTACAAATGACTCAACTCCCAACCGA------C	GACAGGGTGCATCTGGGAGGG---CCTGCCGGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-	AGAAGGATCCACCT------------TGCTGGGCCTCTGCTCCAGCAAGACCCACCTCCCAACTCAAATGCCC-------	GACAGGGTGCATCTGAGAGGG---CCTGCCAGGCCTTTA-TTCAACACTAGATACGCCCCATCTCCAATTCTAATGGAC-	GACAGGGTGCATCTGAGAGGG---CCTGCTGGGCCTTTG-TTCAAAACTAGATATGCCCCAACTCCAATTCTA-------
-
-
--------
-
-.. class:: infomark
-
-**About formats**
-
-**MAF format** multiple alignment format file. This format stores multiple alignments at the DNA level between entire genomes.
-
- - The .maf format is line-oriented. Each multiple alignment ends with a blank line.
- - Each sequence in an alignment is on a single line.
- - Lines starting with # are considered to be comments.
- - Each multiple alignment is in a separate paragraph that begins with an "a" line and contains an "s" line for each sequence in the multiple alignment.
- - Some MAF files may contain two optional line types:
-
-   - An "i" line containing information about what is in the aligned species DNA before and after the immediately preceding "s" line;
-   - An "e" line containing information about the size of the gap between the alignments that span the current block.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_
-
-
-    </help>
-</tool>
-
--- a/tools/maf/vcf_to_maf_customtrack.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-#Dan Blankenberg
-from optparse import OptionParser
-import sys
-import galaxy_utils.sequence.vcf
-
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.align.maf
-
-UNKNOWN_NUCLEOTIDE = '*'
-
-class PopulationVCFParser( object ):
-    def __init__( self, reader, name ):
-        self.reader = reader
-        self.name = name
-        self.counter = 0
-    def next( self ):
-        rval = []
-        vc = self.reader.next()
-        for i, allele in enumerate( vc.alt ):
-            rval.append( ( '%s_%i.%i' % ( self.name, i + 1, self.counter + 1 ), allele ) )
-        self.counter += 1
-        return ( vc, rval )
-    def __iter__( self ):
-        while True:
-            yield self.next()
-
-class SampleVCFParser( object ):
-    def __init__( self, reader ):
-        self.reader = reader
-        self.counter = 0
-    def next( self ):
-        rval = []
-        vc = self.reader.next()
-        alleles = [ vc.ref ] + vc.alt
-
-        if 'GT' in vc.format:
-            gt_index = vc.format.index( 'GT' )
-            for sample_name, sample_value in zip( vc.sample_names, vc.sample_values ):
-                gt_indexes = []
-                for i in sample_value[ gt_index ].replace( '|', '/' ).replace( '\\', '/' ).split( '/' ): #Do we need to consider phase here?
-                    try:
-                        gt_indexes.append( int( i ) )
-                    except:
-                        gt_indexes.append( None )
-                for i, allele_i in enumerate( gt_indexes ):
-                    if allele_i is not None:
-                        rval.append( ( '%s_%i.%i' % ( sample_name, i + 1, self.counter + 1 ), alleles[ allele_i ] ) )
-        self.counter += 1
-        return ( vc, rval )
-    def __iter__( self ):
-        while True:
-            yield self.next()
-
-def main():
-    usage = "usage: %prog [options] output_file dbkey inputfile pop_name"
-    parser = OptionParser( usage=usage )
-    parser.add_option( "-p", "--population", action="store_true", dest="population", default=False, help="Create MAF on a per population basis")
-    parser.add_option( "-s", "--sample", action="store_true", dest="sample", default=False, help="Create MAF on a per sample basis")
-    parser.add_option( "-n", "--name", dest="name", default='Unknown Custom Track', help="Name for Custom Track")
-    parser.add_option( "-g", "--galaxy", action="store_true", dest="galaxy", default=False, help="Tool is being executed by Galaxy (adds extra error messaging).")
-
-
-    ( options, args ) = parser.parse_args()
-
-    if len ( args ) < 3:
-        if options.galaxy:
-            print >>sys.stderr, "It appears that you forgot to specify an input VCF file, click 'Add new VCF...' to add at least input.\n"
-        parser.error( "Need to specify an output file, a dbkey and at least one input file" )
-
-    if not ( options.population ^ options.sample ):
-        parser.error( 'You must specify either a per population conversion or a per sample conversion, but not both' )
-
-    out = open( args.pop(0), 'wb' )
-    out.write( 'track name="%s" visibility=pack\n' %  options.name.replace( "\"", "'" ) )
-
-    maf_writer = bx.align.maf.Writer( out )
-
-    dbkey = args.pop(0)
-
-    vcf_files = []
-    if options.population:
-        i = 0
-        while args:
-            filename = args.pop( 0 )
-            pop_name = args.pop( 0 ).replace( ' ', '_' )
-            if not pop_name:
-                pop_name = 'population_%i' % ( i + 1 )
-            vcf_files.append( PopulationVCFParser( galaxy_utils.sequence.vcf.Reader( open( filename ) ), pop_name  ) )
-            i += 1
-    else:
-        while args:
-            filename = args.pop( 0 )
-            vcf_files.append( SampleVCFParser( galaxy_utils.sequence.vcf.Reader( open( filename ) ) ) )
-
-    non_spec_skipped = 0
-    for vcf_file in vcf_files:
-        for vc, variants in vcf_file:
-            num_ins = 0
-            num_dels = 0
-            for variant_name, variant_text in variants:
-                if 'D' in variant_text:
-                    num_dels = max( num_dels, int( variant_text[1:] ) )
-                elif 'I' in variant_text:
-                    num_ins = max( num_ins, len( variant_text ) - 1 )
-
-            alignment = bx.align.maf.Alignment()
-            ref_text = vc.ref + '-' * num_ins + UNKNOWN_NUCLEOTIDE * ( num_dels - len( vc.ref ) )
-            start_pos = vc.pos - 1
-            if num_dels and start_pos:
-                ref_text = UNKNOWN_NUCLEOTIDE + ref_text
-                start_pos -= 1
-            alignment.add_component( bx.align.maf.Component( src='%s.%s%s' % (
-                 dbkey, ("chr" if not vc.chrom.startswith("chr") else ""), vc.chrom ),
-                 start = start_pos, size = len( ref_text.replace( '-', '' ) ),
-                 strand = '+', src_size = start_pos + len( ref_text ),
-                 text = ref_text ) )
-            for variant_name, variant_text in variants:
-                #FIXME:
-                ## skip non-spec. compliant data, see: http://1000genomes.org/wiki/doku.php?id=1000_genomes:analysis:vcf3.3 for format spec
-                ## this check is due to data having indels not represented in the published format spec,
-                ## e.g. 1000 genomes pilot 1 indel data: ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/indels/CEU.SRP000031.2010_03.indels.sites.vcf.gz
-                if variant_text and variant_text[0] in [ '-', '+' ]:
-                    non_spec_skipped += 1
-                    continue
-
-                #do we need a left padding unknown nucleotide (do we have deletions)?
-                if num_dels and start_pos:
-                    var_text = UNKNOWN_NUCLEOTIDE
-                else:
-                    var_text = ''
-                if 'D' in variant_text:
-                    cur_num_del = int( variant_text[1:] )
-                    pre_del = min( len( vc.ref ), cur_num_del )
-                    post_del = cur_num_del - pre_del
-                    var_text = var_text + '-' * pre_del + '-' * num_ins + '-' * post_del
-                    var_text = var_text + UNKNOWN_NUCLEOTIDE * ( len( ref_text ) - len( var_text ) )
-                elif 'I' in variant_text:
-                    cur_num_ins = len( variant_text ) - 1
-                    var_text = var_text + vc.ref + variant_text[1:] + '-' * ( num_ins - cur_num_ins ) + UNKNOWN_NUCLEOTIDE * max( 0, ( num_dels - 1 ) )
-                else:
-                    var_text = var_text + variant_text + '-' * num_ins + UNKNOWN_NUCLEOTIDE * ( num_dels - len( vc.ref ) )
-                alignment.add_component( bx.align.maf.Component( src=variant_name, start = 0, size = len( var_text.replace( '-', '' ) ), strand = '+', src_size = len( var_text.replace( '-', '' ) ), text = var_text ) )
-            maf_writer.write( alignment )
-
-    maf_writer.close()
-
-    if non_spec_skipped:
-        print 'Skipped %i non-specification compliant indels.' % non_spec_skipped
-
-if __name__ == "__main__": main()
--- a/tools/maf/vcf_to_maf_customtrack.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-<tool id="vcf_to_maf_customtrack1" name="VCF to MAF Custom Track">
-  <description>for display at UCSC</description>
-  <command interpreter="python">vcf_to_maf_customtrack.py '$out_file1'
-    #if $vcf_source_type.vcf_file
-    '${vcf_source_type.vcf_file[0].vcf_input.dbkey}'
-    #else
-    '?'
-    #end if
-    ${vcf_source_type.vcf_source} -n '$track_name'
-    #for $vcf_repeat in $vcf_source_type.vcf_file
-    '${vcf_repeat.vcf_input}'
-    #if $vcf_source_type.vcf_source == '-p'
-      '${vcf_repeat.population_name}'
-    #end if
-    #end for
-    -g
-  </command>
-  <inputs>
-    <param name="track_name" type="text" label="Custom Track Name" value="Galaxy Custom Track" size="30" />
-    <conditional name="vcf_source_type">
-      <param name="vcf_source" type="select" label="VCF Source Source Type">
-        <option value="-p" selected="true">Per Population (file)</option>
-        <option value="-s">Per Sample</option>
-      </param>
-      <when value="-p">
-        <repeat name="vcf_file" title="VCF population file" min="1">
-          <param format="tabular" name="vcf_input" type="data" label="VCF file"/>
-          <param name="population_name" type="text" label="Name for this population" value=""/>
-        </repeat>
-      </when>
-      <when value="-s">
-        <repeat name="vcf_file" title="VCF sample file" min="1">
-          <param format="tabular" name="vcf_input" type="data" label="VCF file"/>
-          <!-- add column count validator >= 8? -->
-        </repeat>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="mafcustomtrack" name="out_file1" />
-  </outputs>
-<!--  <tests>
-    <test>
-      <param name="track_name" value="Galaxy Custom Track"/>
-      <param name="vcf_source" value="Per Population"/>
-      <param name="vcf_input" value="vcf_to_maf_in.vcf" ftype="tabular"/>
-      <param name="population_name" value=""/>
-      <output name="out_file1" file="vcf_to_maf_population_out.mafcustomtrack"/>
-    </test>
-    <test>
-      <param name="track_name" value="Galaxy Custom Track"/>
-      <param name="vcf_source" value="Per Sample"/>
-      <param name="vcf_input" value="vcf_to_maf_in.vcf" ftype="tabular"/>
-      <output name="out_file1" file="vcf_to_maf_sample_out.mafcustomtrack"/>
-    </test>
-  </tests> -->
-  <help>
-**What it does**
-
-This tool converts a Variant Call Format (VCF) file into a Multiple Alignment Format (MAF) custom track file suitable for display at genome browsers.
-
-This file should be used for display purposes only (e.g as a UCSC Custom Track). Performing an analysis using the output created by this tool as input is not recommended; the source VCF file should be used when performing an analysis.
-
-*Unknown nucleotides* are represented as '*' as required to allow the display to draw properly; these include e.g. reference bases which appear before a deletion and are not available without querying the original reference sequence.
-
-**Example**
-
-Starting with a VCF::
-
-  ##fileformat=VCFv3.3
-  ##fileDate=20090805
-  ##source=myImputationProgramV3.1
-  ##reference=1000GenomesPilot-NCBI36
-  ##phasing=partial
-  ##INFO=NS,1,Integer,"Number of Samples With Data"
-  ##INFO=DP,1,Integer,"Total Depth"
-  ##INFO=AF,-1,Float,"Allele Frequency"
-  ##INFO=AA,1,String,"Ancestral Allele"
-  ##INFO=DB,0,Flag,"dbSNP membership, build 129"
-  ##INFO=H2,0,Flag,"HapMap2 membership"
-  ##FILTER=q10,"Quality below 10"
-  ##FILTER=s50,"Less than 50% of samples have data"
-  ##FORMAT=GT,1,String,"Genotype"
-  ##FORMAT=GQ,1,Integer,"Genotype Quality"
-  ##FORMAT=DP,1,Integer,"Read Depth"
-  ##FORMAT=HQ,2,Integer,"Haplotype Quality"
-  #CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  NA00001 NA00002 NA00003
-  20  14370   rs6054257   G   A   29  0   NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51  1|0:48:8:51,51  1/1:43:5:-1,-1
-  20  17330   .   T   A   3   q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50  0|1:3:5:65,3    0/0:41:3:-1,-1
-  20  1110696 rs6040355   A   G,T 67  0   NS=2;DP=10;AF=0.333,0.667;AA=T;DB   GT:GQ:DP:HQ 1|2:21:6:23,27  2|1:2:0:18,2    2/2:35:4:-1,-1
-  20  1230237 .   T   .   47  0   NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60  0|0:48:4:51,51  0/0:61:2:-1,-1
-  20  1234567 microsat1   G   D4,IGA  50  0   NS=3;DP=9;AA=G  GT:GQ:DP    0/1:35:4    0/2:17:2    1/1:40:3
-
-
-
-
-Under the following conditions: **VCF Source type:** *Per Population (file)*, **Name for this population:** *CHB+JPT*
-Results in the following MAF custom track::
-
-  track name="Galaxy Custom Track" visibility=pack
-  ##maf version=1
-  a score=0
-  s hg18.chr20  14369 1 + 14370 G
-  s CHB+JPT_1.1     0 1 +     1 A
-
-  a score=0
-  s hg18.chr20  17329 1 + 17330 T
-  s CHB+JPT_1.2     0 1 +     1 A
-
-  a score=0
-  s hg18.chr20  1110695 1 + 1110696 A
-  s CHB+JPT_1.3       0 1 +       1 G
-  s CHB+JPT_2.3       0 1 +       1 T
-
-  a score=0
-  s hg18.chr20  1230236 1 + 1230237 T
-  s CHB+JPT_1.4       0 1 +       1 .
-
-  a score=0
-  s hg18.chr20  1234565 5 + 1234572 *G--***
-  s CHB+JPT_1.5       0 1 +       1 *------
-  s CHB+JPT_2.5       0 7 +       7 *GGA***
-
-
-    </help>
-</tool>
-
Binary file tools/meme/._meme.xml has changed
--- a/tools/meme/fimo.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,230 +0,0 @@
-<tool id="meme_fimo" name="FIMO" version="0.0.1">
-  <description>- Find Individual Motif Occurrences</description>
-  <command interpreter="python">fimo_wrapper.py 'fimo --o "${$html_outfile.files_path}" --verbosity "1"
-
-  #if str( $options_type.options_type_selector ) == 'advanced':
-  --max-seq-length "${options_type.max_seq_length}"
-  --max-stored-scores "${options_type.max_stored_scores }"
-  --motif-pseudo "${options_type.motif_pseudo}"
-  ${options_type.norc}
-  --output-pthresh "${options_type.output_pthresh}"
-
-
-  #for $motif in $options_type.motifs:
-    --motif "${motif.motif}"
-  #end for
-
-  #if str( $options_type.bgfile_type.bgfile_type_selector ) == 'motif-file':
-    --bgfile "motif-file"
-  #elif str( $options_type.bgfile_type.bgfile_type_selector ) == 'motif-file':
-    --bgfile "${options_type.bgfile_type.bgfile}"
-  #end if
-
-  #if str( $options_type.qvalue_type.qvalue_type_selector ) == 'no-qvalue':
-    --no-qvalue
-  #else:
-    --output-qthresh "${options_type.qvalue_type.output_qthresh}"
-  #end if
-  #end if
-
-  "${input_motifs}"
-
-  #if str( $fasta_type.fasta_type_selector ) == 'history':
-    "${fasta_type.input_database}"
-  #else:
-    "${ filter( lambda x: str( x[0] ) == str( $fasta_type.input_database ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][3] }"
-  #end if
-
-  '
-
-  '${html_outfile.files_path}'
-
-  '${html_outfile}'
-
-  '${interval_outfile}'
-
-  '${txt_outfile}'
-
-  '${xml_outfile}'
-
-  '${gff_outfile}'
-
-  </command>
-  <inputs>
-    <param format="memexml" name="input_motifs" type="data" label="'MEME output' formatted file"/>
-
-    <conditional name="fasta_type">
-      <param name="fasta_type_selector" type="select" label="Source for sequence to search">
-        <option value="cached">Locally Cached sequences</option>
-        <option value="history" selected="true">Sequences from your history</option>
-      </param>
-      <when value="cached">
-        <param name="input_database" type="select" label="Genome to search">
-          <options from_data_table="all_fasta">
-          </options>
-        </param>
-      </when>
-      <when value="history">
-         <param format="fasta" name="input_database" type="data" label="Sequences"/>
-      </when>
-    </conditional>
-
-      <conditional name="options_type">
-        <param name="options_type_selector" type="select" label="Options Configuration">
-          <option value="basic" selected="true">Basic</option>
-          <option value="advanced">Advanced</option>
-        </param>
-        <when value="basic">
-          <!-- do nothing here -->
-        </when>
-        <when value="advanced">
-
-    <conditional name="bgfile_type">
-      <param name="bgfile_type_selector" type="select" label="Background file type">
-        <option value="motif-file">Use Frequencies from Motif File</option>
-        <option value="default" selected="true">Use frequencies from non-redundant database (default)</option>
-        <option value="bgfile">Use Frequencies from Background File</option>
-      </param>
-      <when value="motif-file">
-      <!-- do nothing here -->
-      </when>
-      <when value="default">
-      <!-- do nothing here -->
-      </when>
-      <when value="bgfile">
-        <param name="bgfile" type="data" format="txt" optional="True" label="Background Model" />
-      </when>
-    </conditional>
-
-    <repeat name="motifs" title="Limit to specified motif">
-      <param name="motif" type="text" value="" label="Specify motif by id" />
-    </repeat>
-
-    <param name="max_seq_length" type="integer" value="250000000" label="Maximum input sequence length" />
-    <param name="max_stored_scores" type="integer" value="100000" label="Maximum score count to store" />
-    <param name="motif_pseudo" type="float" value="0.1" label="Pseudocount to add to counts in motif matrix" />
-    <param name="norc" label="Do not check reverse complement" type="boolean" truevalue="--norc" falsevalue="" checked="False"/>
-    <param name="output_pthresh" type="float" value="1e-4" label="p-value threshold" />
-
-    <conditional name="qvalue_type">
-      <param name="qvalue_type_selector" type="select" label="q-value options">
-        <option value="no-qvalue">Do not compute q-value</option>
-        <option value="q-value" selected="true">Compute q-value</option>
-      </param>
-      <when value="no-qvalue">
-      <!-- do nothing here -->
-      </when>
-      <when value="q-value">
-        <param name="output_qthresh" type="float" value="1.0" label="q-value threshold" />
-      </when>
-    </conditional>
-
-      </when>
-    </conditional>
-
-    <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
-      <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
-    </param>
-
-  </inputs>
-  <outputs>
-    <data format="html" name="html_outfile" label="${tool.name} on ${on_string} (html)">
-      <actions>
-        <conditional name="fasta_type.fasta_type_selector">
-          <when value="cached">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="all_fasta" column="1" offset="0">
-                <filter type="param_value" column="0" value="seq" keep="True"/>
-                <filter type="param_value" ref="fasta_type.input_database" column="1"/>
-              </option>
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="tabular" name="txt_outfile" label="${tool.name} on ${on_string} (text)">
-      <actions>
-        <conditional name="fasta_type.fasta_type_selector">
-          <when value="cached">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="all_fasta" column="1" offset="0">
-                <filter type="param_value" ref="fasta_type.input_database" column="0"/>
-              </option>
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="tabular" name="gff_outfile" label="${tool.name} on ${on_string} (almost-gff)">
-      <actions>
-        <conditional name="fasta_type.fasta_type_selector">
-          <when value="cached">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="all_fasta" column="1" offset="0">
-                <filter type="param_value" ref="fasta_type.input_database" column="0"/>
-              </option>
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="cisml" name="xml_outfile" label="${tool.name} on ${on_string} (xml)">
-      <actions>
-        <conditional name="fasta_type.fasta_type_selector">
-          <when value="cached">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="all_fasta" column="1" offset="0">
-                <filter type="param_value" ref="fasta_type.input_database" column="0"/>
-              </option>
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="interval" name="interval_outfile" label="${tool.name} on ${on_string} (interval)">
-      <actions>
-        <conditional name="fasta_type.fasta_type_selector">
-          <when value="cached">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="all_fasta" column="1" offset="0">
-                <filter type="param_value" ref="fasta_type.input_database" column="0"/>
-              </option>
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_motifs" value="meme/meme/meme_output_xml_1.xml" ftype="memexml"/>
-      <param name="fasta_type_selector" value="history"/>
-      <param name="input_database" value="phiX.fasta" ftype="fasta"/>
-      <param name="options_type_selector" value="basic"/>
-      <param name="non_commercial_use" value="True"/>
-      <output name="html_outfile" file="meme/fimo/fimo_output_html_1.html" lines_diff="12"/>
-      <output name="txt_outfile" file="meme/fimo/fimo_output_txt_1.txt" lines_diff="0"/>
-      <output name="gff_outfile" file="meme/fimo/fimo_output_almost-gff_1.txt" lines_diff="0"/>
-      <output name="xml_outfile" file="meme/fimo/fimo_output_xml_1.xml" lines_diff="8"/>
-      <output name="interval_outfile" file="meme/fimo/fimo_output_interval_1.txt" lines_diff="0"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted. Before using, be sure to review, agree, and comply with the license.**
-
-.. class:: infomark
-
-**To cite FIMO:**
-`Grant CE, Bailey TL, Noble WS. FIMO: scanning for occurrences of a given motif. Bioinformatics. 2011 Apr 1;27(7):1017-8. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21330290&gt;`_
-
-
-For detailed information on FIMO, click here_. To view the license_.
-
-.. _here: http://meme.nbcr.net/meme/fimo-intro.html
-.. _license: http://meme.nbcr.net/meme/COPYRIGHT.html
-
-  </help>
-</tool>
--- a/tools/meme/fimo_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-
-"""
-Read text output from FIMO and create an interval file.
-"""
-import sys, tempfile, subprocess, shutil, os
-from galaxy_utils.sequence.transform import DNA_reverse_complement
-
-buffsize = 1048576
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    assert len( sys.argv ) == 8, "Wrong number of arguments"
-    sys.argv.pop(0)
-    fimo_cmd = sys.argv.pop(0)
-    html_path = sys.argv.pop(0)
-    html_out = sys.argv.pop(0)
-    interval_out = sys.argv.pop(0)
-    txt_out = sys.argv.pop(0)
-    xml_out = sys.argv.pop(0)
-    gff_out = sys.argv.pop(0)
-
-    #run fimo
-    try:
-        tmp_stderr = tempfile.NamedTemporaryFile()
-        #tmp_stderr = open( tmp_filename, 'wb' )
-        proc = subprocess.Popen( args=fimo_cmd, shell=True, stderr=tmp_stderr )
-        returncode = proc.wait()
-        #tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        #tmp_stderr = open( tmp, 'rb' )
-        tmp_stderr.seek(0)
-        stderr = ''
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        raise Exception, 'Error running FIMO:\n' + str( e )
-
-    shutil.move( os.path.join( html_path, 'fimo.txt' ), txt_out )
-    shutil.move( os.path.join( html_path, 'fimo.gff' ), gff_out )
-    shutil.move( os.path.join( html_path, 'fimo.xml' ), xml_out )
-    shutil.move( os.path.join( html_path, 'fimo.html' ), html_out )
-
-    out_file = open( interval_out, 'wb' )
-    out_file.write( "#%s\n" % "\t".join( ( "chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value" ) ) )
-    for line in open( txt_out ):
-        if line.startswith( '#' ): continue
-        fields = line.rstrip( "\n\r" ).split( "\t" )
-        start, end = int( fields[2] ), int( fields[3] )
-        sequence = fields[7]
-        if start > end:
-            start, end = end, start #flip start and end, and set strand
-            strand = "-"
-            sequence = DNA_reverse_complement( sequence ) #we want sequences relative to strand; FIMO always provides + stranded sequence
-        else:
-            strand = "+"
-        start -= 1 #make 0-based start position
-        out_file.write( "%s\n" % "\t".join( [ fields[1], str( start ), str( end ), fields[0], fields[4], strand, sequence, fields[5], fields[6] ] ) )
-    out_file.close()
-
-if __name__ == "__main__": main()
--- a/tools/meme/meme.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,349 +0,0 @@
-<tool id="meme_meme" name="MEME" version="1.0.0">
-  <requirements><requirement type='package'>meme</requirement></requirements>
-  <description>- Multiple Em for Motif Elicitation</description>
-  <command>meme "$input1" -o "${html_outfile.files_path}"
-  -nostatus
-
-  ##-p 8 ##number of processors
-
-  #if str( $options_type.options_type_selector ) == 'advanced':
-  -sf "${ str( $options_type.sf ).replace( ' ', '_' ) }"
-  -${options_type.alphabet_type.alphabet_type_selector}
-  -mod "${options_type.mod_type.mod_type_selector}"
-  -nmotifs "${options_type.nmotifs}"
-  -wnsites "${options_type.wnsites}"
-  -maxsize "${options_type.maxsize}"
-
-  #if $options_type.evt &lt; float('inf'):
-    -evt "${options_type.evt}"
-  #end if
-
-  #if str( $options_type.mod_type.mod_type_selector ) != 'oops':
-    #if str( $options_type.mod_type.motif_occurrence_type.motif_occurrence_type_selector ) == 'nsites':
-      -nsites "${options_type.mod_type.motif_occurrence_type.nsites}"
-    #elif str( $options_type.mod_type.motif_occurrence_type.motif_occurrence_type_selector ) == 'min_max_sites':
-      -minsites "${options_type.mod_type.motif_occurrence_type.minsites}" -maxsites "${options_type.mod_type.motif_occurrence_type.maxsites}"
-    #end if
-  #end if
-
-  #if str( $options_type.motif_width_type.motif_width_type_selector ) == 'exact':
-    -w "${options_type.motif_width_type.width}"
-  #else
-    -minw "${options_type.motif_width_type.minw}" -maxw "${options_type.motif_width_type.maxw}"
-  #end if
-
-  #if str( $options_type.motif_trim_type.motif_trim_type_selector ) == 'nomatrim':
-    -nomatrim
-  #else
-    -wg "${options_type.motif_trim_type.wg}" -ws "${options_type.motif_trim_type.ws}" ${options_type.motif_trim_type.noendgaps}
-  #end if
-
-  #if str( $options_type.bfile ) != 'None':
-    -bfile "${options_type.bfile}"
-  #end if
-
-  #if str( $options_type.pspfile ) != 'None':
-    -psp "${options_type.pspfile}"
-  #end if
-
-  #if str( $options_type.alphabet_type.alphabet_type_selector ) == "dna":
-    ${options_type.alphabet_type.revcomp} ${options_type.alphabet_type.pal}
-  #end if
-
-  -maxiter "${options_type.maxiter}" -distance "${options_type.distance}"
-
-  -prior "${options_type.alphabet_type.prior_type.prior_type_selector}"
-  #if str( $options_type.alphabet_type.prior_type.prior_type_selector ) != 'addone':
-    -b "${options_type.alphabet_type.prior_type.prior_b}"
-    #if str( $options_type.alphabet_type.prior_type.plib ) != 'None':
-      -plib "${options_type.alphabet_type.prior_type.plib}"
-    #end if
-  #end if
-
-  #if str( $options_type.alphabet_type.spmap_type.spmap_type_selector ) == 'cons':
-    -cons "${options_type.alphabet_type.spmap_type.cons}"
-  #else
-    -spmap "${options_type.alphabet_type.spmap_type.spmap_type_selector}"
-    -spfuzz "${options_type.alphabet_type.spmap_type.spfuzz}"
-  #end if
-
-  #if str( $options_type.branching_type.branching_type_selector ) == 'x_branch':
-    -x_branch -bfactor "${options_type.branching_type.bfactor}" -heapsize "${options_type.branching_type.heapsize}"
-  #end if
-
-  ##-maxsize "1000000" ##remove hardcoded maxsize? should increase number of processors instead
-
-  #end if
-
-  2&gt;&amp;1 || echo "Error running MEME."
-
-
-  &amp;&amp; mv ${html_outfile.files_path}/meme.html ${html_outfile}
-
-  &amp;&amp; mv ${html_outfile.files_path}/meme.txt ${txt_outfile}
-
-  &amp;&amp; mv ${html_outfile.files_path}/meme.xml ${xml_outfile}
-
-  </command>
-  <inputs>
-    <param format="fasta" name="input1" type="data" label="Sequences"/>
-
-      <conditional name="options_type">
-        <param name="options_type_selector" type="select" label="Options Configuration">
-          <option value="basic" selected="true">Basic</option>
-          <option value="advanced">Advanced</option>
-        </param>
-        <when value="basic">
-          <!-- do nothing here -->
-        </when>
-        <when value="advanced">
-
-      <param name="sf" type="text" value="Galaxy FASTA Input" label="Name of sequence set" />
-
-      <conditional name="alphabet_type">
-        <param name="alphabet_type_selector" type="select" label="Sequence Alphabet">
-          <option value="protein">Protein</option>
-          <option value="dna" selected="true">DNA</option>
-        </param>
-        <when value="protein">
-          <conditional name="prior_type">
-            <param name="prior_type_selector" type="select" label="Choice of prior">
-              <option value="dirichlet">simple Dirichlet prior</option>
-              <option value="dmix" selected="true">mixture of Dirichlets prior</option>
-              <option value="mega">extremely low variance dmix</option>
-              <option value="megap">mega for all but last iteration of EM; dmix on last iteration</option>
-              <option value="addone">add +1 to each observed count</option>
-            </param>
-            <when value="dirichlet">
-              <param name="prior_b" type="float" value="0.01" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="dmix">
-              <param name="prior_b" type="float" value="0" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="mega">
-              <param name="prior_b" type="float" value="0" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="megap">
-              <param name="prior_b" type="float" value="0" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="addone">
-              <!-- no values here? -->
-            </when>
-          </conditional>
-          <conditional name="spmap_type">
-            <param name="spmap_type_selector" type="select" label="EM starting points">
-              <option value="uni">uni</option>
-              <option value="pam" selected="true">pam</option>
-              <option value="cons">Use starting point from string</option>
-            </param>
-            <when value="uni">
-              <param name="spfuzz" type="float" value="0.5" label="Fuzziness of the mapping" />
-            </when>
-            <when value="pam">
-              <param name="spfuzz" type="integer" value="120" label="Fuzziness of the mapping" />
-            </when>
-            <when value="cons">
-              <param name="cons" type="text" value="" label="Starting point from string" />
-            </when>
-          </conditional>
-        </when>
-        <when value="dna">
-          <param name="revcomp" label="Check reverse complement" type="boolean" truevalue="-revcomp" falsevalue="" checked="False"/>
-          <param name="pal" label="Check for palindromes" type="boolean" truevalue="-pal" falsevalue="" checked="False"/>
-          <conditional name="prior_type">
-            <param name="prior_type_selector" type="select" label="Sequence Alphabet">
-              <option value="dirichlet" selected="true">simple Dirichlet prior</option>
-              <option value="dmix">mixture of Dirichlets prior</option>
-              <option value="mega">extremely low variance dmix</option>
-              <option value="megap">mega for all but last iteration of EM; dmix on last iteration</option>
-              <option value="addone">add +1 to each observed count</option>
-            </param>
-            <when value="dirichlet">
-              <param name="prior_b" type="float" value="0.01" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="dmix">
-              <param name="prior_b" type="float" value="0" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="mega">
-              <param name="prior_b" type="float" value="0" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="megap">
-              <param name="prior_b" type="float" value="0" label="strength of prior on model parameters" />
-              <param name="plib" type="data" format="txt" optional="True" label="Dirichlet prior file" />
-            </when>
-            <when value="addone">
-              <!-- no values here? -->
-            </when>
-          </conditional>
-          <conditional name="spmap_type">
-            <param name="spmap_type_selector" type="select" label="EM starting points">
-              <option value="uni" selected="true">uni</option>
-              <option value="pam">pam</option>
-              <option value="cons">Use starting point from string</option>
-            </param>
-            <when value="uni">
-              <param name="spfuzz" type="float" value="0.5" label="Fuzziness of the mapping" />
-            </when>
-            <when value="pam">
-              <param name="spfuzz" type="integer" value="120" label="Fuzziness of the mapping" />
-            </when>
-            <when value="cons">
-              <param name="cons" type="text" value="" label="Starting point from string" />
-            </when>
-          </conditional>
-        </when>
-      </conditional>
-
-      <param name="nmotifs" type="integer" value="1" label="Number of different motifs to search" />
-      <param name="maxsize" type="integer" value="1000000" label="Max number of characters in the sequence file"/>
-      <param name="evt" type="float" value="inf" label="E-value to stop looking for motifs" />
-      <conditional name="mod_type">
-        <param name="mod_type_selector" type="select" label="Expected motif distribution">
-          <option value="oops">One Occurrence Per Sequence</option>
-          <option value="zoops" selected="true">Zero or One Occurrence Per Sequence</option>
-          <option value="anr">Any Number of Repetitions</option>
-        </param>
-        <when value="oops">
-          <!-- no values here -->
-        </when>
-        <when value="zoops">
-          <conditional name="motif_occurrence_type">
-            <param name="motif_occurrence_type_selector" type="select" label="Number of motif occurrences">
-              <option value="default" selected="true">Use defaults</option>
-              <option value="nsites">nsites</option>
-              <option value="min_max_sites">min and max sites</option>
-            </param>
-            <when value="default">
-              <!-- no values here -->
-            </when>
-            <when value="nsites">
-              <param name="nsites" type="integer" value="1" label="Search nsites number of occurrences" />
-            </when>
-            <when value="min_max_sites">
-              <param name="minsites" type="integer" value="1" label="minsites" />
-              <param name="maxsites" type="integer" value="50" label="maxsites" />
-            </when>
-          </conditional>
-        </when>
-        <when value="anr">
-          <conditional name="motif_occurrence_type">
-            <param name="motif_occurrence_type_selector" type="select" label="Number of motif occurrences">
-              <option value="default" selected="true">Use defaults</option>
-              <option value="nsites">nsites</option>
-              <option value="min_max_sites">min and max sites</option>
-            </param>
-            <when value="default">
-              <!-- no values here -->
-            </when>
-            <when value="nsites">
-              <param name="nsites" type="integer" value="1" label="Search nsites number of occurrences" />
-            </when>
-            <when value="min_max_sites">
-              <param name="minsites" type="integer" value="1" label="minsites" />
-              <param name="maxsites" type="integer" value="50" label="maxsites" />
-            </when>
-          </conditional>
-        </when>
-      </conditional>
-      <param name="wnsites" type="float" value="0.8" label="Weight on the prior on nsites" />
-
-      <conditional name="motif_width_type">
-        <param name="motif_width_type_selector" type="select" label="Motif width type">
-          <option value="exact">Exact width</option>
-          <option value="range" selected="true">Specify a range</option>
-        </param>
-        <when value="exact">
-          <param name="width" type="integer" value="10" label="Width of motif to search" />
-        </when>
-        <when value="range">
-          <param name="minw" type="integer" value="8" label="Min width of motif to search" />
-          <param name="maxw" type="integer" value="50" label="Max width of motif to search" />
-        </when>
-      </conditional>
-
-      <conditional name="motif_trim_type">
-        <param name="motif_trim_type_selector" type="select" label="Motif trim type">
-          <option value="nomatrim">No motif trim</option>
-          <option value="trim" selected="true">Trim motif</option>
-        </param>
-        <when value="nomatrim">
-          <!-- no values here -->
-        </when>
-        <when value="trim">
-          <param name="wg" type="integer" value="11" label="Gap cost" />
-          <param name="ws" type="integer" value="1" label="Space cost" />
-          <param name="noendgaps" label="Do not penalize endgaps" type="boolean" truevalue="-noendgaps" falsevalue="" checked="False"/>
-        </when>
-      </conditional>
-
-    <param name="bfile" type="data" format="txt" optional="True" label="Background Model" />
-    <param name="pspfile" type="data" format="txt" optional="True" label="Position-Specific Prior" />
-
-    <param name="maxiter" type="integer" value="50" label="Number of iterations of EM to run" />
-    <param name="distance" type="float" value="0.001" label="Convergence criterion" />
-
-      <conditional name="branching_type">
-        <param name="branching_type_selector" type="select" label="x-branching type">
-          <option value="x_branch">Perform x-branching</option>
-          <option value="no_x_branch" selected="true">No x-branching</option>
-        </param>
-        <when value="no_x_branch">
-          <!-- no values here -->
-        </when>
-        <when value="x_branch">
-          <param name="bfactor" type="integer" value="3" label="Number of iterations of branching" />
-          <param name="heapsize" type="integer" value="64" label="Maximum number of heaps to use" />
-        </when>
-      </conditional>
-
-    </when>
-  </conditional>
-
-  <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
-    <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
-  </param>
-
-  </inputs>
-  <outputs>
-    <data format="html" name="html_outfile" label="${tool.name} on ${on_string} (html)"/>
-    <data format="txt" name="txt_outfile" label="${tool.name} on ${on_string} (text)"/>
-    <data format="memexml" name="xml_outfile" label="${tool.name} on ${on_string} (xml)"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="meme/meme/meme_input_1.fasta" ftype="fasta" dbkey="hg19"/>
-      <param name="options_type_selector" value="basic"/>
-      <param name="non_commercial_use" value="True"/>
-      <output name="html_outfile" file="meme/meme/meme_output_html_1.html" lines_diff="12"/>
-      <output name="txt_outfile" file="meme/meme/meme_output_txt_1.txt" lines_diff="12"/>
-      <output name="xml_outfile" file="meme/meme/meme_output_xml_1.xml" lines_diff="8"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted. Before using, be sure to review, agree, and comply with the license.**
-
-If you want to specify sequence weights, you must include them at the top of your input FASTA file.
-
-.. class:: infomark
-
-**To cite MEME:**
-Timothy L. Bailey and Charles Elkan, "Fitting a mixture model by expectation maximization to discover motifs in biopolymers", Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology, pp. 28-36, AAAI Press, Menlo Park, California, 1994.
-
-
-For detailed information on MEME, click here_. To view the license_.
-
-.. _here: http://meme.nbcr.net/meme/meme-intro.html
-.. _license: http://meme.nbcr.net/meme/COPYRIGHT.html
-
-  </help>
-</tool>
--- a/tools/metag_tools/blat_coverage_report.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-#!/usr/bin/env python
-
-import os, sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def reverse_complement(s):
-    complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."}
-    reversed_s = []
-    for i in s:
-        reversed_s.append(complement_dna[i])
-    reversed_s.reverse()
-    return "".join(reversed_s)
-
-def __main__():
-    nuc_index = {'a':0,'t':1,'c':2,'g':3}
-    diff_hash = {}    # key = (chrom, index)
-    infile = sys.argv[1]
-    outfile = sys.argv[2]
-    invalid_lines = 0
-    invalid_chars = 0
-    data_id = ''
-    data_seq = ''
-
-    for i, line in enumerate( open( infile ) ):
-        line = line.rstrip( '\r\n' )
-        if not line or line.startswith( '#' ):
-            continue
-        fields = line.split()
-        if len(fields) != 23:    # standard number of pslx columns
-            invalid_lines += 1
-            continue
-        if not fields[0].isdigit():
-            invalid_lines += 1
-            continue
-        read_id = fields[9]
-        chrom = fields[13]
-        try:
-            block_count = int(fields[17])
-        except:
-            invalid_lines += 1
-            continue
-        block_size = fields[18].split(',')
-        read_start = fields[19].split(',')
-        chrom_start = fields[20].split(',')
-        read_seq = fields[21].split(',')
-        chrom_seq = fields[22].split(',')
-
-        for j in range(block_count):
-            try:
-                this_block_size = int(block_size[j])
-                this_read_start = int(read_start[j])
-                this_chrom_start = int(chrom_start[j])
-            except:
-                invalid_lines += 1
-                break
-            this_read_seq = read_seq[j]
-            this_chrom_seq = chrom_seq[j]
-
-            if not this_read_seq.isalpha():
-                continue
-            if not this_chrom_seq.isalpha():
-                continue
-
-            # brut force to check coverage
-            for k in range(this_block_size):
-                cur_index = this_chrom_start+k
-                sub_a = this_read_seq[k:(k+1)].lower()
-                sub_b = this_chrom_seq[k:(k+1)].lower()
-                if not diff_hash.has_key((chrom, cur_index)):
-                    try:
-                        diff_hash[(chrom, cur_index)] = [0,0,0,0,sub_b.upper()]    # a, t, c, g, ref. nuc.
-                    except Exception, e:
-                        stop_err( str( e ) )
-                if sub_a in ['a','t','c','g']:
-                    diff_hash[(chrom, cur_index)][nuc_index[(sub_a)]] += 1
-                else:
-                    invalid_chars += 1
-
-    outputfh = open(outfile, 'w')
-    outputfh.write( "##title\tlocation\tref.\tcov.\tA\tT\tC\tG\n" )
-    keys = diff_hash.keys()
-    keys.sort()
-    for i in keys:
-        (chrom, location) = i
-        sum = diff_hash[ (i) ][ 0 ] + diff_hash[ ( i ) ][ 1 ] + diff_hash[ ( i ) ][ 2 ] + diff_hash[ ( i ) ][ 3 ]    # did not include N's
-        if sum == 0:
-            continue
-        ratio_A = diff_hash[ ( i ) ][ 0 ] * 100.0 / sum
-        ratio_T = diff_hash[ ( i ) ][ 1 ] * 100.0 / sum
-        ratio_C = diff_hash[ ( i ) ][ 2 ] * 100.0 / sum
-        ratio_G = diff_hash[ ( i ) ][ 3 ] * 100.0 / sum
-        (title_head, title_tail) = os.path.split(chrom)
-        result = "%s\t%s\t%s\t%d\tA(%0.0f)\tT(%0.0f)\tC(%0.0f)\tG(%0.0f)\n" % ( title_tail, location, diff_hash[(i)][4], sum, ratio_A, ratio_T, ratio_C, ratio_G )
-        outputfh.write(result)
-    outputfh.close()
-
-    if invalid_lines:
-        print 'Skipped %d invalid lines. ' % ( invalid_lines )
-    if invalid_chars:
-        print 'Skipped %d invalid characters in the alignment. ' % (invalid_chars)
-
-if __name__ == '__main__': __main__()
\ No newline at end of file
--- a/tools/metag_tools/blat_coverage_report.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="generate_coverage_report" name="Polymorphism of the Reads">
-	<description>the percentage of reads supporting each nucleotide at each location</description>
-	<command interpreter="python">blat_coverage_report.py $input1 $output1</command>
-	<inputs>
-		<param name="input1" type="data" format="tabular" label="Alignment result"/>
-	</inputs>
-	<outputs>
-		<data name="output1" format="tabular"/>
-	</outputs>
-	<tests>
-		<test>
-		<param name="input1" value="blat_coverage_report_test1.txt" ftype="tabular" />
-		<output name="output1" file="blat_coverage_report_test1.out" />
-		</test>
-	</tests>
-	<help>
-
-.. class:: warningmark
-
-**IMPORTANT**. Only works for BLAT **standard** or **pslx** output formats (hint: to output pslx format, add **-out=pslx** in the command).
-
------
-
-**What it does**
-
- The tool will generate a table of 6 columns as following:
-
-- 1st column: chromosome id.
-
-- 2nd column: chromosome location.
-
-- 3rd column: the nucleotide from reference genome at the chromosome location (2nd column).
-
-- 4th column: total coverage of the reads (number of reads that were mapped to the chromosome location).
-
-- 5th column: percentage of reads that support nucleotide **A** at this location.
-
-- 6th column: percentage of reads that support nucleotide **T** at this location.
-
-- 7th column: percentage of reads that support nucleotide **C** at this location.
-
-- 8th column: percentage of reads that support nucleotide **G** at this location.
-
-
------
-
-**Example**
-
-- The BLAT pslx results look like the following (tab separated with sequence at the end)::
-
-	30	0	0	0	0	0	0	0	+	seq0	30	0	30	chr	4639675	4549207	4549237	1	30,	0,	4549207,	cggacagcgccgccaccaacaaagccacca,	cggacagcgccgccaccaacaaagccacca,
-	30	0	0	0	0	0	0	0	+	seq1	30	0	30	chr	4639675	614777	614807	1	30,	0,	614777,		aaaacaccggatgctccggcgctggcagat,	aaaacaccggatgctccggcgctggcagat,
-	28	1	0	0	0	0	0	0	+	seq2	30	0	29	chr	4639675	3289283	3289312	1	29,	0,	3289283,	tttgcttttagtacaccggattcagaacc,	tttgctttcagtacaccggattcagaacc,
-	30	0	0	0	0	0	0	0	+	seq4	30	0	30	chr	4639675	2665584	2665614	1	30,	0,	2665584,	cacgctacgtgcgcccccgcccagaaggcg,	cacgctacgtgcgcccccgcccagaaggcg,
-
-	The 14th column is the chromosome id, and the 16th and 17th columns shows the reads were mapped to chromosome start and end locations.
-
-- The report showed overall coverage of reads on each chromosome location (partial result)::
-
-   +-------+----------+------+------+--------+------+--------+------+
-   | title | location | ref. | cov. |   A    |  T   |   C    |  G   |
-   +-------+----------+------+------+--------+------+--------+------+
-   |   chr |   614777 |	 A   |  1   | A(100) | T(0) |	C(0) | G(0) |
-   |   chr |   614778 |  A   |	1   | A(100) | T(0) |   C(0) | G(0) |
-   |   chr |   614779 |  A   |  1   | A(100) | T(0) |   C(0) | G(0) |
-   +-------+----------+------+------+--------+------+--------+------+
-
------
-
-**Reference**
-
- **BLAT**: Kent, W James, BLAT--the BLAST-like alignment tool. (2002) Genome Research:12(4) 656-664.
-
-	</help>
-</tool>
--- a/tools/metag_tools/blat_mapping.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-
-import os, sys
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def reverse_complement(s):
-    complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."}
-    reversed_s = []
-    for i in s:
-        reversed_s.append(complement_dna[i])
-    reversed_s.reverse()
-    return "".join(reversed_s)
-
-def __main__():
-    nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4}
-    coverage = {}        # key = (chrom, index)
-    invalid_lines = 0
-    invalid_chrom = 0
-    infile = sys.argv[1]
-    outfile = sys.argv[2]
-
-    for i, line in enumerate( open( infile ) ):
-        line = line.rstrip('\r\n')
-        if not line or line.startswith('#'):
-            continue
-        fields = line.split()
-        if len(fields) < 21:                # standard number of pslx columns
-            invalid_lines += 1
-            continue
-        if not fields[0].isdigit():
-            invalid_lines += 1
-            continue
-        chrom = fields[13]
-        if not chrom.startswith( 'chr' ):
-            invalid_lines += 1
-            invalid_chrom += 1
-            continue
-        try:
-            block_count = int(fields[17])
-        except:
-            invalid_lines += 1
-            continue
-        block_size = fields[18].split(',')
-        chrom_start = fields[20].split(',')
-
-        for j in range( block_count ):
-            try:
-                this_block_size = int(block_size[j])
-                this_chrom_start = int(chrom_start[j])
-            except:
-                invalid_lines += 1
-                break
-            # brut force coverage
-            for k in range( this_block_size ):
-                cur_index = this_chrom_start + k
-                if coverage.has_key( ( chrom, cur_index ) ):
-                    coverage[(chrom, cur_index)] += 1
-                else:
-                    coverage[(chrom, cur_index)] = 1
-
-    # generate a index file
-    outputfh = open(outfile, 'w')
-    keys = coverage.keys()
-    keys.sort()
-    previous_chrom = ''
-    for i in keys:
-        (chrom, location) = i
-        sum = coverage[(i)]
-        if chrom != previous_chrom:
-            outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) )
-            previous_chrom = chrom
-        outputfh.write( "%s\t%s\n" % ( location, sum ) )
-    outputfh.close()
-
-    if invalid_lines:
-        invalid_msg = "Skipped %d invalid lines" % invalid_lines
-        if invalid_chrom:
-            invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. "
-
-if __name__ == '__main__': __main__()
\ No newline at end of file
--- a/tools/metag_tools/blat_mapping.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-<tool id="blat2wig" name="Coverage of the Reads">
-  <description>in wiggle format</description>
-  <command interpreter="python">blat_mapping.py $input1 $output1</command>
-  <inputs>
-    <param name="input1" type="data" format="tabular" label="Alignment result"/>
-  </inputs>
-  <outputs>
-    <data name="output1" format="wig"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="blat_mapping_test1.txt" ftype="tabular" />
-      <output name="output1" file="blat_mapping_test1.out" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
- To generate acceptable files, please use alignment program **BLAT** with option **-out=pslx**.
-
-.. class:: warningmark
-
- Please edit the database information by click on the pencil icon next to your dataset. Select the corresponding genome build.
-
------
-
-**What it does**
-
- This tool takes **BLAT pslx** output and returns a wig-like file showing the number of reads (coverage) mapped at each chromosome location. Use **Graph/Display Data --> Build custom track** tool to show the coverage mapping in UCSC Genome Browser.
-
------
-
-**Example**
-
- Showing reads coverage on human chromosome 22 (partial result) in UCSC Genome Browser Custom Track:
-
- .. image:: ./static/images/blat_mapping_example.png
- 	:width: 600
-
-  </help>
-</tool>
--- a/tools/metag_tools/blat_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-
-import os, sys, tempfile
-
-assert sys.version_info[:2] >= (2.4)
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def check_nib_file( dbkey, GALAXY_DATA_INDEX_DIR ):
-    nib_file = "%s/alignseq.loc" % GALAXY_DATA_INDEX_DIR
-    nib_path = ''
-    nibs = {}
-    for i, line in enumerate( file( nib_file ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( "#" ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[0] == 'seq':
-                nibs[( fields[1] )] = fields[2]
-    if nibs.has_key( dbkey ):
-        nib_path = nibs[( dbkey )]
-    return nib_path
-
-def check_twobit_file( dbkey, GALAXY_DATA_INDEX_DIR ):
-    twobit_file = "%s/twobit.loc" % GALAXY_DATA_INDEX_DIR
-    twobit_path = ''
-    twobits = {}
-    for i, line in enumerate( file( twobit_file ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( "#" ):
-            fields = line.split( '\t' )
-            if len( fields ) < 2:
-                continue
-            twobits[( fields[0] )] = fields[1]
-    if twobits.has_key( dbkey ):
-        twobit_path = twobits[( dbkey )]
-    return twobit_path
-
-def __main__():
-    # I/O
-    source_format = sys.argv[1]        # 0: dbkey; 1: upload file
-    target_file = sys.argv[2]
-    query_file = sys.argv[3]
-    output_file = sys.argv[4]
-    min_iden = sys.argv[5]
-    tile_size = sys.argv[6]
-    one_off = sys.argv[7]
-
-    try:
-        float(min_iden)
-    except:
-        stop_err('Invalid value for minimal identity.')
-
-    try:
-        test = int(tile_size)
-        assert test >= 6 and test <= 18
-    except:
-        stop_err('Invalid value for tile size. DNA word size must be between 6 and 18.')
-
-    try:
-        test = int(one_off)
-        assert test >= 0 and test <= int(tile_size)
-    except:
-        stop_err('Invalid value for mismatch numbers in the word')
-
-    GALAXY_DATA_INDEX_DIR = sys.argv[8]
-
-    all_files = []
-    if source_format == '0':
-        # check target genome
-        dbkey = target_file
-        nib_path = check_nib_file( dbkey, GALAXY_DATA_INDEX_DIR )
-        twobit_path = check_twobit_file( dbkey, GALAXY_DATA_INDEX_DIR )
-        if not os.path.exists( nib_path ) and not os.path.exists( twobit_path ):
-            stop_err("No sequences are available for %s, request them by reporting this error." % dbkey)
-
-        # check the query file, see whether all of them are legitimate sequence
-        if nib_path and os.path.isdir( nib_path ):
-            compress_files = os.listdir(nib_path)
-            target_path = nib_path
-        elif twobit_path:
-            compress_files = [twobit_path]
-            target_path = ""
-        else:
-            stop_err("Requested genome build has no available sequence.")
-
-        for file in compress_files:
-            file = "%s/%s" % ( target_path, file )
-            file = os.path.normpath(file)
-            all_files.append(file)
-    else:
-        all_files = [target_file]
-
-    for detail_file_path in all_files:
-        output_tempfile = tempfile.NamedTemporaryFile().name
-        command = "blat %s %s %s -oneOff=%s -tileSize=%s -minIdentity=%s -mask=lower -noHead -out=pslx 2>&1" % ( detail_file_path, query_file, output_tempfile, one_off, tile_size, min_iden )
-        os.system( command )
-        os.system( 'cat %s >> %s' % ( output_tempfile, output_file ) )
-        os.remove( output_tempfile )
-
-if __name__ == '__main__': __main__()
--- a/tools/metag_tools/blat_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-<tool id="blat_wrapper" name="BLAT" version="1.0.0">
-  <description> compare sequencing reads against UCSC genome builds</description>
-  <command interpreter="python">
-    #if $source.source_select=="database" #blat_wrapper.py 0 $source.dbkey $input_query $output1 $iden $tile_size $one_off
-    #else                                 #blat_wrapper.py 1 $source.input_target $input_query $output1 $iden $tile_size $one_off
-    #end if# ${GALAXY_DATA_INDEX_DIR}
-  </command>
-	<inputs>
-	<conditional name="source">
-		<param name="source_select" type="select" label="Target source">
-				<option value="database">Genome Build</option>
-				<option value="input_ref">Your Upload File</option>
-		</param>
-		<when value="database">
-			<param name="dbkey" type="genomebuild" label="Genome" />
-		</when>
-		<when value="input_ref">
-			<param name="input_target" type="data" format="fasta" label="Reference sequence" />
- 		</when>
-	</conditional>
-		<param name="input_query" type="data" format="fasta" label="Sequence file"/>
-		<param name="iden" type="float" size="15" value="90.0" label="Minimal identity (-minIdentity)" />
-		<param name="tile_size" type="integer" size="15" value="11" label="Minimal size of exact match (-tileSize)" help="Must be between 6 and 18."/>
-		<param name="one_off" type="integer" size="15" value="0" label="Number of mismatch in the word (-oneOff)" help="Must be between 0 and 2." />
-	</inputs>
-	<outputs>
-		<data name="output1" format="tabular"/>
-	</outputs>
-	<requirements>
-	  <requirement type="binary">blat</requirement>
-	</requirements>
-	<tests>
-		<test>
-		<param name="source_select" value="database" />
-		<param name="dbkey" value="eschColi_K12" />
-		<param name="input_query" value="blat_wrapper_test1.fa" ftype="fasta"/>
-		<param name="iden" value="90.0" />
-		<param name="tile_size" value="11" />
-		<param name="one_off" value="0" />
-		<output name="output1" file="blat_wrapper_test1.out" />
-		</test>
-	</tests>
-	<help>
-
-.. class:: warningmark
-
-Using a smaller word size (*Minimal Size of Exact Match*) will increase the computational time.
-
-.. class:: warningmark
-
-Using a larger mismatch number (*Number of Mismatch in the Word*) will increase the computational time.
-
------
-
-**What it does**
-
-This tool currently uses the **BLAT** alignment program. Your short reads file is searched against a genome build or another uploaded file.
-
------
-
-**Example**
-
-- Input a multiple fasta file::
-
-	&gt;seq1
-	TGGTAATGGTGGTTTTTTTTTTTTTTTTTTATTTTT
-
-- Use the default settings:
-
-  - alignment identity must be higher than or equal to 90%.
-
-  - minimal size of exact match to trigger an alignment is 11.
-
-  - allow 0 mismatches in the above exact match size.
-
-- Search against ce2 (C. elegans March 2004), partial result::
-
-	25 1 0 0 0 0 0 0 + seq1 36 10 36 chrI 15080483 9704438 9704464 1 26, 10, 9704438, ggttttttttttttttttttattttt, ggtttttttttttttttttttttttt,
-	27 0 0 0 0 0 1 32 + seq1 36 9 36 chrI 15080483 1302536 1302595 2 21,6, 9,30, 1302536,1302589, tggtttttttttttttttttt,attttt, tggtttttttttttttttttt,attttt,
-
------
-
-**Parameters**
-
-- *Minimal Identity* (**-minIdentity**) : In percent, the minimum sequence identity between the query and target alignment. Default is 90.
-
-- *Minimal Size of Exact Match* (**-tileSize**) : The size of a match that will trigger an alignment. Default is 11. Usually between 8 and 12. Must be between 6 and 18.
-
-- *Number of Mismatch in the Word* (**-oneOff**) : The number of mismatches allowed in the word (tile size) and still triggers an alignment. Default is 0.
-
------
-
-**Reference**
-
- **BLAT**: Kent, W James, BLAT--the BLAST-like alignment tool. (2002) Genome Research:12(4) 656-664.
-
-
-	</help>
-</tool>
--- a/tools/metag_tools/convert_SOLiD_color2nuc.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#!/usr/bin/env python
-"""
-convert SOLiD calor-base data to nucleotide sequence
-example: T011213122200221123032111221021210131332222101
-         TTGTCATGAGAAAGACAGCCGACACTCAAGTCAACGTATCTCTGGT
-"""
-
-import sys, os
-
-def stop_err(msg):
-
-    sys.stderr.write(msg)
-    sys.stderr.write('\n')
-    sys.exit()
-
-def color2base(color_seq):
-
-    first_nuc = ['A','C','G','T']
-    code_matrix = {}
-    code_matrix['0'] = ['A','C','G','T']
-    code_matrix['1'] = ['C','A','T','G']
-    code_matrix['2'] = ['G','T','A','C']
-    code_matrix['3'] = ['T','G','C','A']
-
-    overlap_nuc = ''
-    nuc_seq = ''
-
-    seq_prefix = prefix = color_seq[0].upper()
-    color_seq = color_seq[1:]
-
-    if not (seq_prefix in first_nuc):
-        stop_err('The leading nucleotide is invalid. Must be one of the four nucleotides: A, T, C, G.\nThe file contains a %s' %seq_prefix )
-
-    for code in color_seq:
-
-        if not (code in ['0','1','2','3']):
-            stop_err('Expect digits (0, 1, 2, 3) in the color-cading data. File contains numbers other than the set.\nThe file contains a %s' %code)
-
-        second_nuc = code_matrix[code]
-        overlap_nuc = second_nuc[first_nuc.index(prefix)]
-        nuc_seq += overlap_nuc
-        prefix = overlap_nuc
-
-    return seq_prefix, nuc_seq
-
-def __main__():
-
-    infilename = sys.argv[1]
-    keep_prefix = sys.argv[2].lower()
-    outfilename = sys.argv[3]
-
-    outfile = open(outfilename,'w')
-
-    prefix = ''
-    color_seq = ''
-    for i, line in enumerate(file(infilename)):
-        line = line.rstrip('\r\n')
-
-        if not line: continue
-        if line.startswith("#"): continue
-
-        if line.startswith(">"):
-
-            if color_seq:
-                prefix, nuc_seq = color2base(color_seq)
-
-                if keep_prefix == 'yes':
-                    nuc_seq = prefix + nuc_seq
-
-                outfile.write(title+'\n')
-                outfile.write(nuc_seq+'\n')
-
-            title = line
-            color_seq = ''
-        else:
-            color_seq += line
-
-    if color_seq:
-        prefix, nuc_seq = color2base(color_seq)
-
-        if keep_prefix == 'yes':
-            nuc_seq = prefix + nuc_seq
-
-        outfile.write(title+'\n')
-        outfile.write(nuc_seq+'\n')
-
-    outfile.close()
-
-if __name__=='__main__': __main__()
--- a/tools/metag_tools/convert_SOLiD_color2nuc.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="color2nuc" name="Convert Color Space" version="1.0.0">
-<description> to Nucleotides </description>
-<command interpreter="python">convert_SOLiD_color2nuc.py $input1 $input2 $output1 </command>
-
-<inputs>
-    <param name="input1" type="data" format="txt" label="SOLiD color coding file" />
-    <param name="input2" type="select" label="Keep prefix nucleotide">
-    	<option value="yes">Yes</option>
-    	<option value="no">No</option>
-    </param>
-</inputs>
-<outputs>
-  	<data name="output1" format="fasta" />
-</outputs>
-<!--
-<tests>
-	<test>
-		<param name="input1" value="convert_SOLiD_color2nuc_test1.txt" ftype="txt" />
-		<param name="input2" value="no" />
-		<output name="output1" file="convert_SOLiD_color2nuc_test1.out" />
-	</test>
-</tests>
--->
-<help>
-
-.. class:: warningmark
-
-The tool was designed for color space files generated from an ABI SOLiD sequencer. The file format must be fasta-like: the title starts with a ">" character, and each color space sequence starts with a leading nucleotide.
-
------
-
-**What it does**
-
-This tool converts a color space sequence to nucleotides. The leading character must be a nucleotide: A, C, G, or T.
-
------
-
-**Example**
-
-- If the color space file looks like this::
-
-	&gt;seq1
-	A013
-	&gt;seq2
-	T011213122200221123032111221021210131332222101
-
-- If you would like to **keep** the leading nucleotide::
-
-	&gt;seq1
-	AACG
-	&gt;seq2
-	TTGTCATGAGAAAGACAGCCGACACTCAAGTCAACGTATCTCTGGT
-
-- If you **do not want to keep** the leading nucleotide (the length of nucleotide sequence will be one less than the color-space sequence)::
-
- 	&gt;seq1
- 	ACG
- 	&gt;seq2
-	TGTCATGAGAAAGACAGCCGACACTCAAGTCAACGTATCTCTGGT
-
------
-
-**ABI SOLiD Color Coding Alignment matrix**
-
- Each di-nucleotide is represented by a single digit: 0 to 3. The matrix is symmetric, thus the leading nucleotide is necessary to determine the sequence (otherwise there are four possibilities).
-
-
- .. image:: ./static/images/dualcolorcode.png
-
-
-</help>
-</tool>
--- a/tools/metag_tools/fastqsolexa_to_fasta_qual.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-#!/usr/bin/env python
-
-"""
-convert fastqsolexa file to separated sequence and quality files.
-
-assume each sequence and quality score are contained in one line
-the order should be:
-1st line: @title_of_seq
-2nd line: nucleotides
-3rd line: +title_of_qualityscore (might be skipped)
-4th line: quality scores
-(in three forms: a. digits, b. ASCII codes, the first char as the coding base, c. ASCII codes without the first char.)
-
-Usage:
-%python fastqsolexa_to_fasta_qual.py <your_fastqsolexa_filename> <output_seq_filename> <output_score_filename>
-"""
-
-import sys, os
-from math import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s" % msg )
-    sys.exit()
-
-def __main__():
-    infile_name = sys.argv[1]
-    outfile_seq = open( sys.argv[2], 'w' )
-    outfile_score = open( sys.argv[3], 'w' )
-    datatype = sys.argv[4]
-    seq_title_startswith = ''
-    qual_title_startswith = ''
-    default_coding_value = 64
-    fastq_block_lines = 0
-
-    for i, line in enumerate( file( infile_name ) ):
-        line = line.rstrip()
-        if not line or line.startswith( '#' ):
-            continue
-        fastq_block_lines = ( fastq_block_lines + 1 ) % 4
-        line_startswith = line[0:1]
-        if fastq_block_lines == 1:
-            # first line is @title_of_seq
-            if not seq_title_startswith:
-                seq_title_startswith = line_startswith
-            if line_startswith != seq_title_startswith:
-                outfile_seq.close()
-                outfile_score.close()
-                stop_err( 'Invalid fastqsolexa format at line %d: %s.' % ( i + 1, line ) )
-            read_title = line[1:]
-            outfile_seq.write( '>%s\n' % line[1:] )
-        elif fastq_block_lines == 2:
-            # second line is nucleotides
-            read_length = len( line )
-            outfile_seq.write( '%s\n' % line )
-        elif fastq_block_lines == 3:
-            # third line is +title_of_qualityscore ( might be skipped )
-            if not qual_title_startswith:
-                qual_title_startswith = line_startswith
-            if line_startswith != qual_title_startswith:
-                outfile_seq.close()
-                outfile_score.close()
-                stop_err( 'Invalid fastqsolexa format at line %d: %s.' % ( i + 1, line ) )
-            quality_title = line[1:]
-            if quality_title and read_title != quality_title:
-                outfile_seq.close()
-                outfile_score.close()
-                stop_err( 'Invalid fastqsolexa format at line %d: sequence title "%s" differes from score title "%s".' % ( i + 1, read_title, quality_title ) )
-            if not quality_title:
-                outfile_score.write( '>%s\n' % read_title )
-            else:
-                outfile_score.write( '>%s\n' % line[1:] )
-        else:
-            # fourth line is quality scores
-            qual = ''
-            fastq_integer = True
-            # peek: ascii or digits?
-            val = line.split()[0]
-            try:
-                check = int( val )
-                fastq_integer = True
-            except:
-                fastq_integer = False
-
-            if fastq_integer:
-                # digits
-                qual = line
-            else:
-                # ascii
-                quality_score_length = len( line )
-                if quality_score_length == read_length + 1:
-                    # first char is qual_score_startswith
-                    qual_score_startswith = ord( line[0:1] )
-                    line = line[1:]
-                elif quality_score_length == read_length:
-                    qual_score_startswith = default_coding_value
-                else:
-                    stop_err( 'Invalid fastqsolexa format at line %d: the number of quality scores ( %d ) is not the same as bases ( %d ).' % ( i + 1, quality_score_length, read_length ) )
-                for j, char in enumerate( line ):
-                    score = ord( char ) - qual_score_startswith    # 64
-                    qual = "%s%s " % ( qual, str( score ) )
-            outfile_score.write( '%s\n' % qual )
-
-    outfile_seq.close()
-    outfile_score.close()
-
-if __name__ == "__main__": __main__()
-
\ No newline at end of file
--- a/tools/metag_tools/fastqsolexa_to_fasta_qual.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-<tool id="fastqsolexa_to_fasta_qual" name="FASTQSOLEXA-to-FASTA-QUAL" version="1.0.0">
-  <description>extracts sequences and quality scores from FASTQSOLEXA data</description>
-  <command interpreter="python">fastqsolexa_to_fasta_qual.py $input1 $output1 $output2 $input1.extension</command>
-  <inputs>
-    <param name="input1" type="data" format="fastqsolexa" label="Fastqsolexa file"/>
-  </inputs>
-  <outputs>
-    <data name="output1" format="fasta"/>
-    <data name="output2" format="qualsolexa"/>
-  </outputs>
-  <tests>
-    <!-- NOTE: this tool generates 2 output files, but our functional tests currently only handle the last one generated -->
-    <test>
-      <param name="input1" value="1.fastqsolexa" ftype="fastqsolexa" />
-      <output name="output1" file="fastqsolexa_to_fasta_qual_out4.fasta" />
-    </test>
-    <test>
-      <param name="input1" value="2.fastqsolexa" ftype="fastqsolexa" />
-      <output name="output1" file="fastqsolexa_to_fasta_qual_out2.fasta" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-IMPORTANT: This tool currently only supports data where the quality scores are integers or ASCII quality scores with base 64.
-
------
-
-**What it does**
-
-This tool extracts sequences and quality scores from FASTQ data ( Solexa variant ), producing a FASTA dataset and a QUAL dataset.
-
------
-
-**Example1**
-
-- Converting the following Solexa fastq data::
-
-    @seq1
-    GACAGCTTGGTTTTTAGTGAGTTGTTCCTTTCTTT
-    +seq1
-    hhhhhhhhhhhhhhhhhhhhhhhhhhPW@hhhhhh
-    @seq2
-    GCAATGACGGCAGCAATAAACTCAACAGGTGCTGG
-    +seq2
-    hhhhhhhhhhhhhhYhhahhhhWhAhFhSIJGChO
-
-- will extract the following sequences::
-
-    >seq1
-    GACAGCTTGGTTTTTAGTGAGTTGTTCCTTTCTTT
-    >seq2
-    GCAATGACGGCAGCAATAAACTCAACAGGTGCTGG
-
-- and quality scores::
-
-    >seq1
-    40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 0 40 40 40 40 40 40
-    >seq2
-    40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 33 40 40 40 40 23 40 1 40 6 40 19 9 10 7 3 40 15
-
-**Example2**
-
-- Converting the following Solexa fastq data::
-
-    @HANNIBAL_1_FC302VTAAXX:2:1:228:167
-    GAATTGATCAGGACATAGGACAACTGTAGGCACCAT
-    +HANNIBAL_1_FC302VTAAXX:2:1:228:167
-    40 40 40 40 35 40 40 40 25 40 40 26 40 9 33 11 40 35 17 40 40 33 40 7 9 15 3 22 15 30 11 17 9 4 9 4
-    @HANNIBAL_1_FC302VTAAXX:2:1:156:340
-    GAGTTCTCGTCGCCTGTAGGCACCATCAATCGTATG
-    +HANNIBAL_1_FC302VTAAXX:2:1:156:340
-    40 15 40 17 6 36 40 40 40 25 40 9 35 33 40 14 14 18 15 17 19 28 31 4 24 18 27 14 15 18 2 8 12 8 11 9
-
-- will extract the following sequences::
-
-    >HANNIBAL_1_FC302VTAAXX:2:1:228:167
-    GAATTGATCAGGACATAGGACAACTGTAGGCACCAT
-    >HANNIBAL_1_FC302VTAAXX:2:1:156:340
-    GAGTTCTCGTCGCCTGTAGGCACCATCAATCGTATG
-
-- and quality scores::
-
-    >HANNIBAL_1_FC302VTAAXX:2:1:228:167
-    40 40 40 40 35 40 40 40 25 40 40 26 40 9 33 11 40 35 17 40 40 33 40 7 9 15 3 22 15 30 11 17 9 4 9 4
-    >HANNIBAL_1_FC302VTAAXX:2:1:156:340
-    40 15 40 17 6 36 40 40 40 25 40 9 35 33 40 14 14 18 15 17 19 28 31 4 24 18 27 14 15 18 2 8 12 8 11 9
-
-    </help>
-</tool>
--- a/tools/metag_tools/mapping_to_ucsc.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,204 +0,0 @@
-#!/usr/bin/env python
-
-from galaxy import eggs
-import sys, tempfile, os
-
-assert sys.version_info[:2] >= (2.4)
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-
-    out_fname = sys.argv[1]
-    in_fname = sys.argv[2]
-    chr_col = int(sys.argv[3])-1
-    coord_col = int(sys.argv[4])-1
-    track_type = sys.argv[5]
-    if track_type == 'coverage' or track_type == 'both':
-        coverage_col = int(sys.argv[6])-1
-        cname = sys.argv[7]
-        cdescription = sys.argv[8]
-        ccolor = sys.argv[9].replace('-',',')
-        cvisibility = sys.argv[10]
-    if track_type == 'snp' or track_type == 'both':
-        if track_type == 'both':
-            j = 5
-        else:
-            j = 0
-        #sname = sys.argv[7+j]
-        sdescription = sys.argv[6+j]
-        svisibility = sys.argv[7+j]
-        #ref_col = int(sys.argv[10+j])-1
-        read_col = int(sys.argv[8+j])-1
-
-
-    # Sort the input file based on chromosome (alphabetically) and start co-ordinates (numerically)
-    sorted_infile = tempfile.NamedTemporaryFile()
-    try:
-        os.system("sort -k %d,%d -k %dn -o %s %s" %(chr_col+1,chr_col+1,coord_col+1,sorted_infile.name,in_fname))
-    except Exception, exc:
-        stop_err( 'Initialization error -> %s' %str(exc) )
-
-    #generate chr list
-    sorted_infile.seek(0)
-    chr_vals = []
-    for line in file( sorted_infile.name ):
-        line = line.strip()
-        if not(line):
-            continue
-        try:
-            fields = line.split('\t')
-            chr = fields[chr_col]
-            if chr not in chr_vals:
-                chr_vals.append(chr)
-        except:
-            pass
-    if not(chr_vals):
-        stop_err("Skipped all lines as invalid.")
-
-    if track_type == 'coverage' or track_type == 'both':
-        if track_type == 'coverage':
-            fout = open( out_fname, "w" )
-        else:
-            fout = tempfile.NamedTemporaryFile()
-        fout.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
-                      % ( cname, cdescription, ccolor, cvisibility ))
-    if track_type == 'snp' or track_type == 'both':
-        fout_a = tempfile.NamedTemporaryFile()
-        fout_t = tempfile.NamedTemporaryFile()
-        fout_g = tempfile.NamedTemporaryFile()
-        fout_c = tempfile.NamedTemporaryFile()
-        fout_ref = tempfile.NamedTemporaryFile()
-
-        fout_a.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
-                      % ( "Track A", sdescription, '255,0,0', svisibility ))
-        fout_t.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
-                      % ( "Track T", sdescription, '0,255,0', svisibility ))
-        fout_g.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
-                      % ( "Track G", sdescription, '0,0,255', svisibility ))
-        fout_c.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
-                      % ( "Track C", sdescription, '255,0,255', svisibility ))
-
-
-    sorted_infile.seek(0)
-    for line in file( sorted_infile.name ):
-        line = line.strip()
-        if not(line):
-            continue
-        try:
-            fields = line.split('\t')
-            chr = fields[chr_col]
-            start = int(fields[coord_col])
-            assert start > 0
-        except:
-            continue
-        try:
-            ind = chr_vals.index(chr)    #encountered chr for the 1st time
-            del chr_vals[ind]
-            prev_start = ''
-            header = "variableStep chrom=%s\n" %(chr)
-            if track_type == 'coverage' or track_type == 'both':
-                coverage = int(fields[coverage_col])
-                line1 = "%s\t%s\n" %(start,coverage)
-                fout.write("%s%s" %(header,line1))
-            if track_type == 'snp' or track_type == 'both':
-                a = t = g = c = 0
-                fout_a.write("%s" %(header))
-                fout_t.write("%s" %(header))
-                fout_g.write("%s" %(header))
-                fout_c.write("%s" %(header))
-                try:
-                    #ref_nt = fields[ref_col].capitalize()
-                    read_nt = fields[read_col].capitalize()
-                    try:
-                        nt_ind = ['A','T','G','C'].index(read_nt)
-                        if nt_ind == 0:
-                            a+=1
-                        elif nt_ind == 1:
-                            t+=1
-                        elif nt_ind == 2:
-                            g+=1
-                        else:
-                            c+=1
-                    except ValueError:
-                        pass
-                except:
-                    pass
-            prev_start = start
-        except ValueError:
-            if start != prev_start:
-                if track_type == 'coverage' or track_type == 'both':
-                    coverage = int(fields[coverage_col])
-                    fout.write("%s\t%s\n" %(start,coverage))
-                if track_type == 'snp' or track_type == 'both':
-                    if a:
-                        fout_a.write("%s\t%s\n" %(prev_start,a))
-                    if t:
-                        fout_t.write("%s\t%s\n" %(prev_start,t))
-                    if g:
-                        fout_g.write("%s\t%s\n" %(prev_start,g))
-                    if c:
-                        fout_c.write("%s\t%s\n" %(prev_start,c))
-                    a = t = g = c = 0
-                    try:
-                        #ref_nt = fields[ref_col].capitalize()
-                        read_nt = fields[read_col].capitalize()
-                        try:
-                            nt_ind = ['A','T','G','C'].index(read_nt)
-                            if nt_ind == 0:
-                                a+=1
-                            elif nt_ind == 1:
-                                t+=1
-                            elif nt_ind == 2:
-                                g+=1
-                            else:
-                                c+=1
-                        except ValueError:
-                            pass
-                    except:
-                        pass
-                prev_start = start
-            else:
-                if track_type == 'snp' or track_type == 'both':
-                    try:
-                        #ref_nt = fields[ref_col].capitalize()
-                        read_nt = fields[read_col].capitalize()
-                        try:
-                            nt_ind = ['A','T','G','C'].index(read_nt)
-                            if nt_ind == 0:
-                                a+=1
-                            elif nt_ind == 1:
-                                t+=1
-                            elif nt_ind == 2:
-                                g+=1
-                            else:
-                                c+=1
-                        except ValueError:
-                            pass
-                    except:
-                        pass
-
-    if track_type == 'snp' or track_type == 'both':
-        if a:
-            fout_a.write("%s\t%s\n" %(prev_start,a))
-        if t:
-            fout_t.write("%s\t%s\n" %(prev_start,t))
-        if g:
-            fout_g.write("%s\t%s\n" %(prev_start,g))
-        if c:
-            fout_c.write("%s\t%s\n" %(prev_start,c))
-
-        fout_a.seek(0)
-        fout_g.seek(0)
-        fout_t.seek(0)
-        fout_c.seek(0)
-
-    if track_type == 'snp':
-        os.system("cat %s %s %s %s >> %s" %(fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))
-    elif track_type == 'both':
-        fout.seek(0)
-        os.system("cat %s %s %s %s %s | cat > %s" %(fout.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/tools/metag_tools/mapping_to_ucsc.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,202 +0,0 @@
-<tool id="mapToUCSC" name="Format mapping data" version="1.0.0">
-  <description> as UCSC custom track</description>
-  <command interpreter="python">
-  	mapping_to_ucsc.py
-  	$out_file1
-  	$input
-  	$chr_col
-  	$coord_col
-  	$track.track_type
-  	#if $track.track_type == "coverage" or $track.track_type == "both"
-  	$track.coverage_col
-    "${track.cname}"
-    "${track.cdescription}"
-    "${track.ccolor}"
-    "${track.cvisibility}"
-    #end if
-    #if $track.track_type == "snp" or $track.track_type == "both"
-    "${track.sdescription}"
-    "${track.svisibility}"
-     $track.col2
-    #end if
-  </command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Select mapping data"/>
-    <param name="chr_col" type="data_column" data_ref="input" label="Column for reference chromosome" />
-    <param name="coord_col" type="data_column" data_ref="input" numerical="True" label="Numerical column for reference co-ordinate" />
-    <conditional name="track">
-      <param name="track_type" type="select" label="Display">
-    	<option value="snp" selected="true">SNPs</option>
-        <option value="coverage">Read coverage</option>
-    	<option value="both">Both</option>
-      </param>
-      <when value = "coverage">
-      <param name="coverage_col" type="data_column" data_ref="input" numerical="True" label="Numerical column for read coverage" />
-      <param name="cname" type="text" size="15" value="User Track" label="Coverage track name">
-        <validator type="length" max="15"/>
-      </param>
-      <param name="cdescription" type="text" value="User Supplied Coverage Track (from Galaxy)" label="Coverage track description">
-        <validator type="length" max="60" size="15"/>
-      </param>
-      <param label="Coverage track Color" name="ccolor" type="select">
-            <option selected="yes" value="0-0-0">Black</option>
-            <option value="255-0-0">Red</option>
-            <option value="0-255-0">Green</option>
-            <option value="0-0-255">Blue</option>
-            <option value="255-0-255">Magenta</option>
-            <option value="0-255-255">Cyan</option>
-            <option value="255-215-0">Gold</option>
-            <option value="160-32-240">Purple</option>
-            <option value="255-140-0">Orange</option>
-            <option value="255-20-147">Pink</option>
-            <option value="92-51-23">Dark Chocolate</option>
-            <option value="85-107-47">Olive green</option>
-      </param>
-      <param label="Coverage track Visibility" name="cvisibility" type="select">
-            <option selected="yes" value="1">Dense</option>
-            <option value="2">Full</option>
-            <option value="3">Pack</option>
-            <option value="4">Squish</option>
-            <option value="0">Hide</option>
-      </param>
-      </when>
-
-      <when value = "snp">
-      <!--
-      <param name="col1" type="data_column" data_ref="input" label="Column containing the reference nucleotide" />
-       -->
-      <param name="col2" type="data_column" data_ref="input" label="Column containing the read nucleotide" />
-      <!--
-      <param name="sname" type="text" size="15" value="User Track-2" label="SNP track name">
-        <validator type="length" max="15"/>
-      </param>
-       -->
-      <param name="sdescription" type="text" value="User Supplied Track (from Galaxy)" label="SNP track description">
-        <validator type="length" max="60" size="15"/>
-      </param>
-      <param label="SNP track Visibility" name="svisibility" type="select">
-            <option selected="yes" value="1">Dense</option>
-            <option value="2">Full</option>
-            <option value="3">Pack</option>
-            <option value="4">Squish</option>
-            <option value="0">Hide</option>
-      </param>
-      </when>
-
-      <when value = "both">
-      <param name="coverage_col" type="data_column" data_ref="input" numerical="True" label="Numerical column for read coverage" />
-      <param name="cname" type="text" size="15" value="User Track" label="Coverage track name">
-        <validator type="length" max="15"/>
-      </param>
-      <param name="cdescription" type="text" size="15" value="User Supplied Track (from Galaxy)" label="Coverage track description">
-        <validator type="length" max="60"/>
-      </param>
-      <param label="Coverage track Color" name="ccolor" type="select">
-            <option selected="yes" value="0-0-0">Black</option>
-            <option value="255-0-0">Red</option>
-            <option value="0-255-0">Green</option>
-            <option value="0-0-255">Blue</option>
-            <option value="255-0-255">Magenta</option>
-            <option value="0-255-255">Cyan</option>
-            <option value="255-215-0">Gold</option>
-            <option value="160-32-240">Purple</option>
-            <option value="255-140-0">Orange</option>
-            <option value="255-20-147">Pink</option>
-            <option value="92-51-23">Dark Chocolate</option>
-            <option value="85-107-47">Olive green</option>
-      </param>
-      <param label="Coverage track Visibility" name="cvisibility" type="select">
-            <option selected="yes" value="1">Dense</option>
-            <option value="2">Full</option>
-            <option value="3">Pack</option>
-            <option value="4">Squish</option>
-            <option value="0">Hide</option>
-      </param>
-      <!--
-      <param name="col1" type="data_column" data_ref="input" label="Column containing the reference nucleotide" />
-       -->
-      <param name="col2" type="data_column" data_ref="input" label="Column containing the read nucleotide" />
-      <!--
-      <param name="sname" type="text" size="15" value="User Track-2" label="SNP track name">
-        <validator type="length" max="15"/>
-      </param>
-       -->
-      <param name="sdescription" type="text" size="15" value="User Supplied Track (from Galaxy)" label="SNP track description">
-        <validator type="length" max="60"/>
-      </param>
-      <param label="SNP track Visibility" name="svisibility" type="select">
-            <option selected="yes" value="1">Dense</option>
-            <option value="2">Full</option>
-            <option value="3">Pack</option>
-            <option value="4">Squish</option>
-            <option value="0">Hide</option>
-      </param>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="customtrack" name="out_file1"/>
-  </outputs>
-
-
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool turns mapping data generated by short read mappers into a format that can be displayed in the UCSC genome browser as a custom track.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-This tool requires the mapping data to contain at least the following information:
-
-chromosome, genome coordinate, read nucleotide (if option to display is SNPs), read coverage (if option to display is Read coverage).
-
------
-
-**Example**
-
-For the following Mapping data::
-
-   #chr g_start read_id          read_coord g_nt read_nt qual read_coverage
-   chrM    1   1:29:1672:1127/1    11        G    G       40  134
-   chrM    1   1:32:93:933/1       4         G    A       40  134
-   chrM    1   1:34:116:2032/1     11        G    A       40  134
-   chrM    1   1:39:207:964/1      1         G    G       40  134
-   chrM    2   1:3:359:848/1       1         G    C       40  234
-   chrM    2   1:40:1435:1013/1    1         G    G       40  234
-   chrM    3   1:40:730:972/1      9         G    G       40  334
-   chrM    4   1:42:1712:921/2     31        G    T       35  434
-   chrM    4   1:44:1649:493/1     4         G    G       40  434
-
-running this tool to display both SNPs and Read coverage will return the following tracks, containing aggregated data per genome co-ordinate::
-
-   track type=wiggle_0 name="Coverage Track" description="User Supplied Track (from Galaxy)" color=0,0,0 visibility=1
-   variableStep chrom=chrM
-   1   134
-   2   234
-   3   334
-   4   434
-   track type=wiggle_0 name="Track A" description="User Supplied SNP Track (from Galaxy)" color=255,0,0 visibility=1
-   variableStep chrom=chrM
-   1   2
-   track type=wiggle_0 name="Track T" description="User Supplied SNP Track (from Galaxy)" color=0,255,0 visibility=1
-   variableStep chrom=chrM
-   4   1
-   track type=wiggle_0 name="Track G" description="User Supplied SNP Track (from Galaxy)" color=0,0,255 visibility=1
-   variableStep chrom=chrM
-   1   2
-   2   1
-   3   1
-   4   1
-   track type=wiggle_0 name="Track C" description="User Supplied SNP Track (from Galaxy)" color=255,0,255 visibility=1
-   variableStep chrom=chrM
-   2   1
-
-  </help>
-</tool>
--- a/tools/metag_tools/megablast_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-#!/usr/bin/env python
-"""
-run megablast for metagenomics data
-
-usage: %prog [options]
-   -d, --db_build=d: The database to use
-   -i, --input=i: Input FASTQ candidate file
-   -w, --word_size=w: Size of best perfect match
-   -c, --identity_cutoff=c: Report hits at or above this identity
-   -e, --eval_cutoff=e: Expectation value cutoff
-   -f, --filter_query=f: Filter out low complexity regions
-   -x, --index_dir=x: Data index directory
-   -o, --output=o: Output file
-
-usage: %prog db_build input_file word_size identity_cutoff eval_cutoff filter_query index_dir output_file
-"""
-
-import os, subprocess, sys, tempfile
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    query_filename = options.input.strip()
-    output_filename = options.output.strip()
-    mega_word_size = options.word_size        # -W
-    mega_iden_cutoff = options.identity_cutoff      # -p
-    mega_evalue_cutoff = options.eval_cutoff      # -e
-    mega_temp_output = tempfile.NamedTemporaryFile().name
-    GALAXY_DATA_INDEX_DIR = options.index_dir
-    DB_LOC = "%s/blastdb.loc" % GALAXY_DATA_INDEX_DIR
-
-    # megablast parameters
-    try:
-        int( mega_word_size )
-    except:
-        stop_err( 'Invalid value for word size' )
-    try:
-        float( mega_iden_cutoff )
-    except:
-        stop_err( 'Invalid value for identity cut-off' )
-    try:
-        float( mega_evalue_cutoff )
-    except:
-        stop_err( 'Invalid value for Expectation value' )
-
-    if not os.path.exists( os.path.split( options.db_build )[0] ):
-        stop_err( 'Cannot locate the target database directory. Please check your location file.' )
-
-    # arguments for megablast
-    megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null" \
-        % ( options.db_build, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, options.filter_query )
-
-    print megablast_command
-
-    tmp = tempfile.NamedTemporaryFile().name
-    try:
-        tmp_stderr = open( tmp, 'wb' )
-        proc = subprocess.Popen( args=megablast_command, shell=True, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-        if os.path.exists( tmp ):
-            os.unlink( tmp )
-    except Exception, e:
-        if os.path.exists( mega_temp_output ):
-            os.unlink( mega_temp_output )
-        if os.path.exists( tmp ):
-            os.unlink( tmp )
-        stop_err( 'Error indexing reference sequence. ' + str( e ) )
-
-    output = open( output_filename, 'w' )
-    invalid_lines = 0
-    for i, line in enumerate( file( mega_temp_output ) ):
-        line = line.rstrip( '\r\n' )
-        fields = line.split()
-        try:
-            # get gi and length of that gi seq
-            gi, gi_len = fields[1].split( '_' )
-            # convert the last column (causing problem in filter tool) to float
-            fields[-1] = float( fields[-1] )
-            new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] )
-        except:
-            new_line = line
-            invalid_lines += 1
-        output.write( "%s\n" % new_line )
-    output.close()
-
-    if os.path.exists( mega_temp_output ):
-        os.unlink( mega_temp_output ) #remove the tempfile that we just reformatted the contents of
-
-    if invalid_lines:
-        print "Unable to parse %d lines. Keep the default format." % invalid_lines
-
-    # megablast generates a file called error.log, if empty, delete it, if not, show the contents
-    if os.path.exists( './error.log' ):
-        for i, line in enumerate( file( './error.log' ) ):
-            line = line.rstrip( '\r\n' )
-            print line
-        os.remove( './error.log' )
-
-if __name__ == "__main__" : __main__()
--- a/tools/metag_tools/megablast_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="megablast_wrapper" name="Megablast" version="1.1.0">
-    <description> compare short reads against htgs, nt, and wgs databases</description>
-    <command interpreter="python">
-      megablast_wrapper.py
-        --db_build="${ filter( lambda x: str( x[0] ) == str( $source_select ), $__app__.tool_data_tables[ 'blastdb' ].get_fields() )[0][-1] }"
-        --input=$input_query
-        --word_size=$word_size
-        --identity_cutoff=$iden_cutoff
-        --eval_cutoff=$evalue_cutoff
-        --filter_query=$filter_query
-        --index_dir=${GALAXY_DATA_INDEX_DIR}
-        --output=$output1
-    </command>
-    <inputs>
-        <param name="input_query" type="data" format="fasta" label="Compare these sequences"/>
-        <param name="source_select" type="select" display="radio" label="against target database">
-            <options from_data_table="blastdb" />
-        </param>
-        <param name="word_size" type="select" label="using word size" help="Size of best perfect match (-W)">
-            <option value="28">28</option>
-            <option value="16">16</option>
-        </param>
-        <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity (-p)" help="no cutoff if 0" />
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff (-e)" />
-        <param name="filter_query" type="select" label="Filter out low complexity regions? (-F)">
-            <option value="T">Yes</option>
-            <option value="F">No</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular"/>
-    </outputs>
-    <requirements>
-        <requirement type="package">megablast</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <param name="input_query" value="megablast_wrapper_test1.fa" ftype="fasta"/>
-            <!-- source_select needs to match the entry in the blastdb.loc file, which includes the last update date if appropriate -->
-            <param name="source_select" value="/galaxy/data/blastdb/phiX/phiX" />
-            <param name="word_size" value="28" />
-            <param name="iden_cutoff" value="99.0" />
-            <param name="evalue_cutoff" value="10.0" />
-            <param name="filter_query" value="T" />
-            <output name="output1" file="megablast_wrapper_test1.out"/>
-        </test>
-    </tests>
-    <help>
-
-.. class:: warningmark
-
-**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing.
-
------
-
-**What it does**
-
-This tool runs **megablast** (for information about megablast, please see the reference below) a high performance nucleotide local aligner developed by Webb Miller and colleagues.
-
------
-
-**Output format**
-
-Output of this tool contains 13 columns delimited by Tabs:
-
-1. Id of your sequence
-2. GI of the database hit
-3. Length of the database hit
-4. % identity
-5. Alignment length
-6. # mismatches
-7. # gaps
-8. Start position in your sequence
-9. End position in your sequence
-10. Start position in database hit
-11. End position in database hit
-12. E-value
-13. Bit score
-
--------
-
-**Reference**
-
-Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214.
-
-    </help>
-</tool>
--- a/tools/metag_tools/megablast_xml_parser.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os, re
-
-if sys.version_info[:2] >= ( 2, 5 ):
-    import xml.etree.cElementTree as ElementTree
-else:
-    from galaxy import eggs
-    import pkg_resources; pkg_resources.require( "elementtree" )
-    from elementtree import ElementTree
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def __main__():
-    source  = sys.argv[1]
-    hspTags = [
-           "Hsp_bit-score",
-           "Hsp_evalue",
-           "Hsp_query-from",
-           "Hsp_query-to",
-           "Hsp_hit-from",
-           "Hsp_hit-to",
-           "Hsp_query-frame",
-           "Hsp_hit-frame",
-           "Hsp_identity",
-           "Hsp_align-len",
-           "Hsp_qseq",
-           "Hsp_hseq",
-           "Hsp_midline"
-          ]
-    hspData = []
-
-    # get an iterable
-    try:
-        context = ElementTree.iterparse( source, events=( "start", "end" ) )
-    except:
-        stop_err( "Invalid data format." )
-    # turn it into an iterator
-    context = iter( context )
-    # get the root element
-    try:
-        event, root = context.next()
-    except:
-        stop_err( "Invalid data format." )
-
-    outfile = open( sys.argv[2], 'w' )
-    try:
-        for event, elem in context:
-           # for every <Iteration> tag
-           if event == "end" and elem.tag == "Iteration":
-               query = elem.findtext( "Iteration_query-def" )
-               qLen = elem.findtext( "Iteration_query-len" )
-               # for every <Hit> within <Iteration>
-               for hit in elem.findall( "Iteration_hits/Hit" ):
-                   subject = hit.findtext( "Hit_id" )
-                   if re.search( '^gi', subject ):
-                       subject = subject.split('|')[1]
-                   sLen = hit.findtext( "Hit_len" )
-                   # for every <Hsp> within <Hit>
-                   for hsp in hit.findall( "Hit_hsps/Hsp" ):
-                        outfile.write( "%s\t%s\t%s\t%s" % ( query, qLen, subject, sLen ) )
-                        for tag in hspTags:
-                            outfile.write("\t%s" %(hsp.findtext( tag )))
-                            #hspData.append( hsp.findtext( tag ) )
-                        #hspData = []
-                        outfile.write('\n')
-               # prevents ElementTree from growing large datastructure
-               root.clear()
-               elem.clear()
-    except:
-        outfile.close()
-        stop_err( "The input data is malformed, or there is more than one dataset in the input file. Error: %s" % sys.exc_info()[1] )
-
-    outfile.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/metag_tools/megablast_xml_parser.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="megablast_xml_parser" name="Parse blast XML output">
-<description></description>
-<command interpreter="python">megablast_xml_parser.py $input1 $output1</command>
-<inputs>
-  <param name="input1" type="data" format="blastxml" label="Megablast XML output" />
-</inputs>
-<outputs>
-  <data name="output1" format="tabular"/>
-</outputs>
-<tests>
-  <test>
-    <param name="input1" value="megablast_xml_parser_test1.gz" ftype="blastxml" />
-    <output name="output1" file="megablast_xml_parser_test1_out.tabular" ftype="tabular" />
-  </test>
-</tests>
-<help>
-
-**What it does**
-
-This tool processes the XML output of any NCBI blast tool (if you run your own blast jobs, the XML output can be generated with **-m 7** option).
-
------
-
-**Output fields**
-
-This tools returns tab-delimited output with the following fields::
-
-    Description                               Example
-    ----------------------------------------- -----------------
-
-    1. Name of the query sequence             Seq1
-    2. Length of the query sequence           30
-    3. Name of target sequence                gnl|BL_ORD_ID|0
-    4. Length of target sequence              5528445
-    5. Alignment bit score                    59.96
-    6. E-value                                8.38112e-11
-    7. Start of alignment within query        1
-    8. End of alignment within query          30
-    9. Start of alignment within target       5436010
-   10. End of alignment within target         5436039
-   11. Query frame                            1
-   12. Target frame                           1
-   13. Number of identical bases within       29
-       the alignment
-   14. Alignment length                       30
-   15. Aligned portion (sequence) of query    CGGACAGCGCCGCCACCAACAAAGCCACCA
-   16. Aligned portion (sequence) of target   CGGACAGCGCCGCCACCAACAAAGCCATCA
-   17. Midline indicating positions of        ||||||||||||||||||||||||||| ||
-       matches within the alignment
-
-------
-
-.. class:: infomark
-
-Note that this form of output does not contain alignment identify value. However, it can be computed by dividing the number of identical bases within the alignment (Field 13) by the alignment length (Field 14) using *Text Manipulation->Compute* tool
-
-
-
-</help>
-</tool>
--- a/tools/metag_tools/rmap_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-
-import os, sys, tempfile
-
-assert sys.version_info[:2] >= (2.4)
-
-def stop_err( msg ):
-
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-
-def __main__():
-
-    # I/O
-    target_path = sys.argv[1]
-    infile = sys.argv[2]
-    read_len = sys.argv[3]              # -w
-    align_len = sys.argv[4]             # -h
-    mismatch = sys.argv[5]              # -m
-    output_file = sys.argv[6]
-
-    # first guess the read length
-    guess_read_len = 0
-    seq = ''
-    for i, line in enumerate(open(infile)):
-        line = line.rstrip('\r\n')
-        if line.startswith('>'):
-            if seq:
-                guess_read_len = len(seq)
-                break
-        else:
-            seq += line
-
-    try:
-        test = int(read_len)
-        if test == 0:
-            read_len = str(guess_read_len)
-        else:
-            assert test >= 20 and test <= 64
-    except:
-        stop_err('Invalid value for read length. Must be between 20 and 64.')
-
-    try:
-        int(align_len)
-    except:
-        stop_err('Invalid value for minimal length of a hit.')
-
-    try:
-        int(mismatch)
-        #assert test >= 0 and test <= int(0.1*int(read_len))
-    except:
-        stop_err('Invalid value for mismatch numbers in an alignment.')
-
-    all_files = []
-    if os.path.isdir(target_path):
-
-        # check target genome
-        fa_files = os.listdir(target_path)
-
-        for file in fa_files:
-            file = "%s/%s" % ( target_path, file )
-            file = os.path.normpath(file)
-            all_files.append(file)
-    else:
-        stop_err("No sequences for %s are available for search, please report this error." %(target_path))
-
-    for detail_file_path in all_files:
-        output_tempfile = tempfile.NamedTemporaryFile().name
-        command = "rmap -h %s -w %s -m %s -c %s %s -o %s 2>&1" % ( align_len, read_len, mismatch, detail_file_path, infile, output_tempfile )
-        #print command
-        try:
-            os.system( command )
-        except Exception, e:
-            stop_err( str( e ) )
-
-        try:
-            os.system( 'cat %s >> %s' % ( output_tempfile, output_file ) )
-        except Exception, e:
-            stop_err( str( e ) )
-
-        try:
-            os.remove( output_tempfile )
-        except:
-            pass
-
-
-if __name__ == '__main__': __main__()
--- a/tools/metag_tools/rmap_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-<tool id="rmap_wrapper" name="RMAP" version="1.0.0">
-    <description>for Solexa Short Reads Alignment</description>
-    <command interpreter="python">
-    #if $trim.choice=="No": #rmap_wrapper.py $database $input_seq 0 $align_len $mismatch $output1
-    #else: #rmap_wrapper.py $database $input_seq $trim.read_len $align_len $mismatch $output1
-    #end if
-    </command>
-    <inputs>
-        <param name="database" type="select" display="radio" label="Target database">
-			<options from_file="faseq.loc">
-			  <column name="name" index="0"/>
-			  <column name="value" index="0"/>
-			</options>
-        </param>
-        <param name="input_seq" type="data" format="fasta" label="Sequence file"/>
-        <param name="align_len" type="integer" size="15" value="11" label="Minimal length of a hit (-h)" help="seed" />
-        <param name="mismatch" type="select" label="Number of mismatches allowed (-m)">
-            <option value="0">0</option>
-            <option value="1">1</option>
-            <option value="3">3</option>
-            <option value="5">5</option>
-        </param>
-        <conditional name="trim">
-            <param name="choice" type="select" label="To trim the reads">
-                <option value="No">No</option>
-                <option value="Yes">Yes</option>
-            </param>
-            <when value="No">
-            </when>
-            <when value="Yes">
-                <param name="read_len" type="integer" size="15" value="36" label="Read length (-w)"/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="bed"/>
-    </outputs>
-    <requirements>
-      <requirement type="binary">rmap</requirement>
-    </requirements>
-    <!--
-    <tests>
-        <test>
-            <param name="database" value="/galaxy/data/faseq/test" />
-            <param name="input_seq" value="rmap_wrapper_test1.fasta" ftype="fasta"/>
-            <param name="read_len" value="36" />
-            <param name="align_len" value="36" />
-            <param name="mismatch" value="3" />
-            <output name="output1" file="rmap_wrapper_test1.bed"/>
-        </test>
-    </tests>
-     -->
-    <help>
-
-.. class:: warningmark
-
- RMAP was developed for **Solexa** reads.
-
-.. class:: infomark
-
-**TIP**. The tool will guess the length of the reads, however, if you select to trim the reads, the *Reads length* must be between 20 and 64. Reads with lengths longer than the specified value will be trimmed at the 3'end.
-
------
-
-**What it does**
-
-This tool runs **rmap** (for more information, please see the reference below), mapping Solexa reads onto a genome build.
-
------
-
-**Parameters**
-
-- *Minimal Length of a Hit* (**-h**) : this is the seed length or the minimal exact match length
-- *Number of Mismatches Allowed* (**-m**) : the maximal number of mismatches allowed in an alignment
-- *Read Length* (**-w**) : maximal length of the reads; reads longer than the threshold will be truncated at 3' end.
-
------
-
-**Reference**
-
- **RMAP** is developed by Dr. Andrew D Smith and Dr. Zhenyu Xuan at the Cold Spring Harbor Laboratory. Please see http://rulai.cshl.edu/rmap/
-
-    </help>
-</tool>
--- a/tools/metag_tools/rmapq_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-
-import os, sys, tempfile
-
-assert sys.version_info[:2] >= (2.4)
-
-def stop_err( msg ):
-
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-
-def __main__():
-
-    # I/O
-    target_path = sys.argv[1]
-    infile = sys.argv[2]
-    scorefile = sys.argv[3]
-    high_score = sys.argv[4]            # -q
-    high_len = sys.argv[5]              # -M
-    read_len = sys.argv[6]              # -w
-    align_len = sys.argv[7]             # -h
-    mismatch = sys.argv[8]              # -m
-    output_file = sys.argv[9]
-
-    try:
-        float(high_score)
-    except:
-        stop_err('Invalid value for minimal quality score.')
-
-    try:
-        int(high_len)
-    except:
-        stop_err('Invalid value for minimal high quality bases.')
-
-    # first guess the read length
-    guess_read_len = 0
-    seq = ''
-    for i, line in enumerate(open(infile)):
-        line = line.rstrip('\r\n')
-        if line.startswith('>'):
-            if seq:
-                guess_read_len = len(seq)
-                break
-        else:
-            seq += line
-
-    try:
-        test = int(read_len)
-        if test == 0:
-            read_len = str(guess_read_len)
-        else:
-            assert test >= 20 and test <= 64
-    except:
-        stop_err('Invalid value for read length. Must be between 20 and 64.')
-
-
-    try:
-        int(align_len)
-    except:
-        stop_err('Invalid value for minimal length of a hit.')
-
-    try:
-        int(mismatch)
-    except:
-        stop_err('Invalid value for mismatch numbers in an alignment.')
-
-    all_files = []
-    if os.path.isdir(target_path):
-        # check target genome
-        fa_files = os.listdir(target_path)
-
-        for file in fa_files:
-            file = "%s/%s" % ( target_path, file )
-            file = os.path.normpath(file)
-            all_files.append(file)
-    else:
-        stop_err("No sequences for %s are available for search, please report this error." %(target_path))
-
-    for detail_file_path in all_files:
-        output_tempfile = tempfile.NamedTemporaryFile().name
-        command = "rmapq -q %s -M %s -h %s -w %s -m %s -Q %s -c %s %s -o %s 2>&1" % ( high_score, high_len, align_len, read_len, mismatch, scorefile, detail_file_path, infile, output_tempfile )
-        #print command
-        try:
-            os.system( command )
-        except Exception, e:
-            stop_err( str( e ) )
-
-        try:
-            assert os.system( 'cat %s >> %s' % ( output_tempfile, output_file ) ) == 0
-        except Exception, e:
-            stop_err( str( e ) )
-
-        try:
-            os.remove( output_tempfile )
-        except:
-            pass
-
-
-if __name__ == '__main__': __main__()
--- a/tools/metag_tools/rmapq_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-<tool id="rmapq_wrapper" name="RMAPQ" version="1.0.0">
-    <description>for Solexa Short Reads Alignment with Quality Scores</description>
-    <command interpreter="python">
-    #if $trim.choice=="No": #rmapq_wrapper.py $database $input_seq $input_score $high_score $high_len 0 $align_len $mismatch $output1
-    #else: #rmapq_wrapper.py $database $input_seq $input_score $high_score $high_len $trim.read_len $align_len $mismatch $output1
-    #end if
-    </command>
-    <inputs>
-        <param name="database" type="select" display="radio" label="Target database">
-			<options from_file="faseq.loc">
-			  <column name="name" index="0"/>
-			  <column name="value" index="0"/>
-			</options>
-        </param>
-        <param name="input_seq" type="data" format="fasta" label="Sequence file"/>
-        <param name="input_score" type="data" format="qualsolexa" label="Quality score file"/>
-        <param name="high_score" type="float" size="15" value="40" label="Minimum score for high-quality base (-q)"/>
-        <param name="high_len" type="integer" size="15" value="36" label="Minimal high-quality bases (-M)"/>
-        <param name="align_len" type="integer" size="15" value="11" label="Minimal length of a hit (-h)" help="seed"/>
-        <param name="mismatch" type="select" label="Number of mismatches allowed (-m)">
-            <option value="0">0</option>
-            <option value="1">1</option>
-            <option value="3">3</option>
-            <option value="5">5</option>
-        </param>
-        <conditional name="trim">
-            <param name="choice" type="select" label="To trim the reads">
-                <option value="No">No</option>
-                <option value="Yes">Yes</option>
-            </param>
-            <when value="No">
-            </when>
-            <when value="Yes">
-                <param name="read_len" type="integer" size="15" value="36" label="Read length (-w)" />
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="bed"/>
-    </outputs>
-    <requirements>
-      <requirement type="binary">rmapq</requirement>
-    </requirements>
-    <!--
-    <tests>
-        <test>
-            <param name="database" value="/galaxy/data/faseq/test" />
-            <param name="input_seq" value="rmapq_wrapper_test1.fasta" ftype="fasta"/>
-            <param name="input_score" value="rmapq_wrapper_test1.qual" ftype="qualsolexa" />
-            <param name="high_score" value="40" />
-            <param name="high_len" value="36" />
-            <param name="read_len" value="36" />
-            <param name="align_len" value="36" />
-            <param name="mismatch" value="3" />
-            <output name="output1" file="rmapq_wrapper_test1.bed"/>
-        </test>
-    </tests>
-    -->
-    <help>
-
-.. class:: warningmark
-
- RMAPQ was developed for **Solexa** reads.
-
-.. class:: infomark
-
-**TIP**. The tool will guess the length of the reads, however, if you select to trim the reads, the *Maximal Length of the Reads* must be between 20 and 64. Reads with lengths longer than the specified value will be trimmed at the 3'end.
-
------
-
-**What it does**
-
-This tool runs **rmapq** (for more information, please see the reference below), searching against a genome build with sequence qualities.
-
------
-
-**Parameters**
-
-- *Minimal High-quality Bases* (**-M**): the minimal length of the high quality score bases
-- *Minimum Score for High-quality Base* (**-q**) : the minimal quality score
-- *Minimal Length of a Hit* (**-h**) : the minimal length of an exact match or seed
-- *Number of Mismatches Allowed* (**-m**) : the maximal number of mismatches allowed in an alignment
-- *Read Length* (**-w**) : maximal length of the reads; reads longer than the threshold will be truncated at 3' end.
-
------
-
-**Reference**
-
- **RMAP** is developed by Dr. Andrew D Smith and Dr. Zhenyu Xuan at the Cold Spring Harbor Laboratory. Please see http://rulai.cshl.edu/rmap/
-
-    </help>
-</tool>
--- a/tools/metag_tools/short_reads_figure_high_quality_length.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,165 +0,0 @@
-#!/usr/bin/env python
-
-import os, sys, math, tempfile, zipfile, re
-from rpy import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def unzip( filename ):
-    zip_file = zipfile.ZipFile( filename, 'r' )
-    tmpfilename = tempfile.NamedTemporaryFile().name
-    for name in zip_file.namelist():
-        file( tmpfilename, 'a' ).write( zip_file.read( name ) )
-    zip_file.close()
-    return tmpfilename
-
-def __main__():
-    infile_score_name = sys.argv[1].strip()
-    outfile_R_name = sys.argv[2].strip()
-
-    try:
-        score_threshold = int( sys.argv[3].strip() )
-    except:
-        stop_err( 'Threshold for quality score must be numerical.' )
-
-    infile_is_zipped = False
-    if zipfile.is_zipfile( infile_score_name ):
-        infile_is_zipped = True
-        infile_name = unzip( infile_score_name )
-    else:
-        infile_name = infile_score_name
-
-    # detect whether it's tabular or fasta format
-    seq_method = None
-    data_type = None
-    for i, line in enumerate( file( infile_name ) ):
-        line = line.rstrip( '\r\n' )
-        if not line or line.startswith( '#' ):
-            continue
-        if data_type == None:
-            if line.startswith( '>' ):
-                data_type = 'fasta'
-                continue
-            elif len( line.split( '\t' ) ) > 0:
-                fields = line.split()
-                for score in fields:
-                    try:
-                        int( score )
-                        data_type = 'tabular'
-                        seq_method = 'solexa'
-                        break
-                    except:
-                        break
-        elif data_type == 'fasta':
-            fields = line.split()
-            for score in fields:
-                try:
-                    int( score )
-                    seq_method = '454'
-                    break
-                except:
-                    break
-        if i == 100:
-            break
-
-    if data_type is None:
-        stop_err( 'This tool can only use fasta data or tabular data.' )
-    if seq_method is None:
-        stop_err( 'Invalid data for fasta format.')
-
-    cont_high_quality = []
-    invalid_lines = 0
-    invalid_scores = 0
-    if seq_method == 'solexa':
-        for i, line in enumerate( open( infile_name ) ):
-            line = line.rstrip( '\r\n' )
-            if not line or line.startswith( '#' ):
-                continue
-            locs = line.split( '\t' )
-            for j, base in enumerate( locs ):
-                nuc_errors = base.split()
-                try:
-                    nuc_errors[0] = int( nuc_errors[0] )
-                    nuc_errors[1] = int( nuc_errors[1] )
-                    nuc_errors[2] = int( nuc_errors[2] )
-                    nuc_errors[3] = int( nuc_errors[3] )
-                    big = max( nuc_errors )
-                except:
-                    invalid_scores += 1
-                    big = 0
-                if j == 0:
-                    cont_high_quality.append(1)
-                else:
-                    if big >= score_threshold:
-                        cont_high_quality[ len( cont_high_quality ) - 1 ] += 1
-                    else:
-                        cont_high_quality.append(1)
-    else: # seq_method == '454'
-        tmp_score = ''
-        for i, line in enumerate( open( infile_name ) ):
-            line = line.rstrip( '\r\n' )
-            if not line or line.startswith( '#' ):
-                continue
-            if line.startswith( '>' ):
-                if len( tmp_score ) > 0:
-                    locs = tmp_score.split()
-                    for j, base in enumerate( locs ):
-                        try:
-                            base = int( base )
-                        except:
-                            invalid_scores += 1
-                            base = 0
-                        if j == 0:
-                            cont_high_quality.append(1)
-                        else:
-                            if base >= score_threshold:
-                                cont_high_quality[ len( cont_high_quality ) - 1 ] += 1
-                            else:
-                                cont_high_quality.append(1)
-                tmp_score = ''
-            else:
-                tmp_score = "%s %s" % ( tmp_score, line )
-        if len( tmp_score ) > 0:
-            locs = tmp_score.split()
-            for j, base in enumerate( locs ):
-                try:
-                    base = int( base )
-                except:
-                    invalid_scores += 1
-                    base = 0
-                if j == 0:
-                    cont_high_quality.append(1)
-                else:
-                    if base >= score_threshold:
-                        cont_high_quality[ len( cont_high_quality ) - 1 ] += 1
-                    else:
-                        cont_high_quality.append(1)
-
-    # generate pdf figures
-    cont_high_quality = array ( cont_high_quality )
-    outfile_R_pdf = outfile_R_name
-    r.pdf( outfile_R_pdf )
-    title = "Histogram of continuous high quality scores"
-    xlim_range = [ 1, max( cont_high_quality ) ]
-    nclass = max( cont_high_quality )
-    if nclass > 100:
-        nclass = 100
-    r.hist( cont_high_quality, probability=True, xlab="Continuous High Quality Score length (bp)", ylab="Frequency (%)", xlim=xlim_range, main=title, nclass=nclass)
-    r.dev_off()
-
-    if infile_is_zipped and os.path.exists( infile_name ):
-        # Need to delete temporary file created when we unzipped the infile archive
-        os.remove( infile_name )
-
-    if invalid_lines > 0:
-        print 'Skipped %d invalid lines. ' % invalid_lines
-    if invalid_scores > 0:
-        print 'Skipped %d invalid scores. ' % invalid_scores
-
-    r.quit( save="no" )
-
-if __name__=="__main__":__main__()
\ No newline at end of file
--- a/tools/metag_tools/short_reads_figure_high_quality_length.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="hist_high_quality_score" name="Histogram">
-<description> of high quality score reads </description>
-
-<command interpreter="python">short_reads_figure_high_quality_length.py $input1 $output1 $input2</command>
-
-<inputs>
-<page>
-    <param name="input1" type="data" format="qualsolexa,qual454,txtseq.zip" label="Quality score file" help="No dataset? Read tip below"/>
-    <param name="input2" type="integer" size="5" value="20" label="Quality score threshold" />
-</page>
-</inputs>
-<outputs>
-  	<data name="output1" format="pdf" />
-</outputs>
-<requirements>
-	<requirement type="python-module">rpy</requirement>
-</requirements>
-<tests>
-	<test>
-		<param name="input1" value="solexa.qual" ftype="qualsolexa" />
-		<param name="input2" value="5" />
-  		<output name="output1" file="solexa_high_quality_hist.pdf" ftype="pdf"/>
-	</test>
-	<test>
-		<param name="input1" value="454.qual" ftype="qual454" />
-		<param name="input2" value="5" />
-		<output name="output1" file="454_high_quality_hist.pdf" ftype="pdf"/>
-	</test>
-</tests>
-
-<help>
-
-.. class:: warningmark
-
-To use this tool, your dataset needs to be in the *Quality Score* format. Click the pencil icon next to your dataset to set the datatype to *Quality Score* (see below for examples).
-
------
-
-**What it does**
-
-This tool takes Quality Files generated by Roche (454), Illumina (Solexa), or ABI SOLiD machines and builds a histogram of lengths of high quality reads.
-
------
-
-**Examples of Quality Data**
-
-Roche (454) or ABI SOLiD data::
-
-	&gt;seq1
-	23 33 34 25 28 28 28 32 23 34 27 4 28 28 31 21 28
-
-Illumina (Solexa) data::
-
- 	-40 -40 40 -40	 -40 -40 -40 40
-
------
-
-**Note**
-
-- Quality score data::
-
-	&gt;seq1
-	23 33 34 25 28 28 28 32 23 34 27 4 28 28 31 21 28
-
-- If the threshold is set to 20:
-
-  - a low quality score 4 in the middle separated two segments of lengths 11 and 5.
-
-  - The histogram will be built based on the numbers (11, 5).
-
-- For Illumina (Solexa) data, only the maximal of the 4 values will be used.
-
-
-</help>
-</tool>
--- a/tools/metag_tools/short_reads_figure_score.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,248 +0,0 @@
-#!/usr/bin/env python
-"""
-boxplot:
-- box: first quartile and third quartile
-- line inside the box: median
-- outlier: 1.5 IQR higher than the third quartile or 1.5 IQR lower than the first quartile
-           IQR = third quartile - first quartile
-- The smallest/largest value that is not an outlier is connected to the box by with a horizontal line.
-"""
-
-import os, sys, math, tempfile, re
-from rpy import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def merge_to_20_datapoints( score ):
-    number_of_points = 20
-    read_length = len( score )
-    step = int( math.floor( ( read_length - 1 ) * 1.0 / number_of_points ) )
-    scores = []
-    point = 1
-    point_sum = 0
-    step_average = 0
-    score_points = 0
-
-    for i in xrange( 1, read_length ):
-        if i < ( point * step ):
-            point_sum += int( score[i] )
-            step_average += 1
-        else:
-            point_avg = point_sum * 1.0 / step_average
-            scores.append( point_avg )
-            point += 1
-            point_sum = 0
-            step_average = 0
-    if step_average > 0:
-        point_avg = point_sum * 1.0 / step_average
-        scores.append( point_avg )
-    if len( scores ) > number_of_points:
-        last_avg = 0
-        for j in xrange( number_of_points - 1, len( scores ) ):
-            last_avg += scores[j]
-        last_avg = last_avg / ( len(scores) - number_of_points + 1 )
-    else:
-        last_avg = scores[-1]
-    score_points = []
-    for k in range( number_of_points - 1 ):
-        score_points.append( scores[k] )
-    score_points.append( last_avg )
-    return score_points
-
-def __main__():
-
-    invalid_lines = 0
-
-    infile_score_name = sys.argv[1].strip()
-    outfile_R_name = sys.argv[2].strip()
-
-    infile_name = infile_score_name
-
-    # Determine tabular or fasta format within the first 100 lines
-    seq_method = None
-    data_type = None
-    for i, line in enumerate( file( infile_name ) ):
-        line = line.rstrip( '\r\n' )
-        if not line or line.startswith( '#' ):
-            continue
-        if data_type == None:
-            if line.startswith( '>' ):
-                data_type = 'fasta'
-                continue
-            elif len( line.split( '\t' ) ) > 0:
-                fields = line.split()
-                for score in fields:
-                    try:
-                        int( score )
-                        data_type = 'tabular'
-                        seq_method = 'solexa'
-                        break
-                    except:
-                        break
-        elif data_type == 'fasta':
-            fields = line.split()
-            for score in fields:
-                try:
-                    int( score )
-                    seq_method = '454'
-                    break
-                except:
-                    break
-        if i == 100:
-            break
-
-    if data_type is None:
-        stop_err( 'This tool can only use fasta data or tabular data.' )
-    if seq_method is None:
-        stop_err( 'Invalid data for fasta format.')
-
-    # Determine fixed length or variable length within the first 100 lines
-    read_length = 0
-    variable_length = False
-    if seq_method == 'solexa':
-        for i, line in enumerate( file( infile_name ) ):
-            line = line.rstrip( '\r\n' )
-            if not line or line.startswith( '#' ):
-                continue
-            scores = line.split('\t')
-            if read_length == 0:
-                read_length = len( scores )
-            if read_length != len( scores ):
-                variable_length = True
-                break
-            if i == 100:
-                break
-    elif seq_method == '454':
-        score = ''
-        for i, line in enumerate( file( infile_name ) ):
-            line = line.rstrip( '\r\n' )
-            if not line or line.startswith( '#' ):
-                continue
-            if line.startswith( '>' ):
-                if len( score ) > 0:
-                    score = score.split()
-                    if read_length == 0:
-                        read_length = len( score )
-                    if read_length != len( score ):
-                        variable_length = True
-                        break
-                score = ''
-            else:
-                score = score + ' ' + line
-            if i == 100:
-                break
-
-    if variable_length:
-        number_of_points = 20
-    else:
-        number_of_points = read_length
-    read_length_threshold = 100 # minimal read length for 454 file
-    score_points = []
-    score_matrix = []
-    invalid_scores = 0
-
-    if seq_method == 'solexa':
-        for i, line in enumerate( open( infile_name ) ):
-            line = line.rstrip( '\r\n' )
-            if not line or line.startswith( '#' ):
-                continue
-            tmp_array = []
-            scores = line.split( '\t' )
-            for bases in scores:
-                nuc_errors = bases.split()
-                try:
-                    nuc_errors[0] = int( nuc_errors[0] )
-                    nuc_errors[1] = int( nuc_errors[1] )
-                    nuc_errors[2] = int( nuc_errors[2] )
-                    nuc_errors[3] = int( nuc_errors[3] )
-                    big = max( nuc_errors )
-                except:
-                    #print 'Invalid numbers in the file. Skipped.'
-                    invalid_scores += 1
-                    big = 0
-                tmp_array.append( big )
-            score_points.append( tmp_array )
-    elif seq_method == '454':
-        # skip the last fasta sequence
-        score = ''
-        for i, line in enumerate( open( infile_name ) ):
-            line = line.rstrip( '\r\n' )
-            if not line or line.startswith( '#' ):
-                continue
-            if line.startswith( '>' ):
-                if len( score ) > 0:
-                    score = ['0'] + score.split()
-                    read_length = len( score )
-                    tmp_array = []
-                    if not variable_length:
-                        score.pop(0)
-                        score_points.append( score )
-                        tmp_array = score
-                    elif read_length > read_length_threshold:
-                        score_points_tmp = merge_to_20_datapoints( score )
-                        score_points.append( score_points_tmp )
-                        tmp_array = score_points_tmp
-                score = ''
-            else:
-                score = "%s %s" % ( score, line )
-        if len( score ) > 0:
-            score = ['0'] + score.split()
-            read_length = len( score )
-            if not variable_length:
-                score.pop(0)
-                score_points.append( score )
-            elif read_length > read_length_threshold:
-                score_points_tmp = merge_to_20_datapoints( score )
-                score_points.append( score_points_tmp )
-                tmp_array = score_points_tmp
-
-    # reverse the matrix, for R
-    for i in range( number_of_points - 1 ):
-        tmp_array = []
-        for j in range( len( score_points ) ):
-            try:
-                tmp_array.append( int( score_points[j][i] ) )
-            except:
-                invalid_lines += 1
-        score_matrix.append( tmp_array )
-
-    # generate pdf figures
-    #outfile_R_pdf = outfile_R_name
-    #r.pdf( outfile_R_pdf )
-    outfile_R_png = outfile_R_name
-    r.bitmap( outfile_R_png )
-
-    title = "boxplot of quality scores"
-    empty_score_matrix_columns = 0
-    for i, subset in enumerate( score_matrix ):
-        if not subset:
-            empty_score_matrix_columns += 1
-            score_matrix[i] = [0]
-
-    if not variable_length:
-        r.boxplot( score_matrix, xlab="location in read length", main=title )
-    else:
-        r.boxplot( score_matrix, xlab="position within read (% of total length)", xaxt="n", main=title )
-        x_old_range = []
-        x_new_range = []
-        step = read_length_threshold / number_of_points
-        for i in xrange( 0, read_length_threshold, step ):
-            x_old_range.append( ( i / step ) )
-            x_new_range.append( i )
-        r.axis( 1, x_old_range, x_new_range )
-    r.dev_off()
-
-    if invalid_scores > 0:
-        print 'Skipped %d invalid scores. ' % invalid_scores
-    if invalid_lines > 0:
-        print 'Skipped %d invalid lines. ' % invalid_lines
-    if empty_score_matrix_columns > 0:
-        print '%d missing scores in score_matrix. ' % empty_score_matrix_columns
-
-    r.quit(save = "no")
-
-if __name__=="__main__":__main__()
--- a/tools/metag_tools/short_reads_figure_score.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="quality_score_distribution" name="Build base quality distribution" version="1.0.2">
-<description></description>
-
-<command interpreter="python">short_reads_figure_score.py $input1 $output1 </command>
-
-<inputs>
-<page>
-    <param name="input1" type="data" format="qualsolexa, qual454" label="Quality score file" help="No dataset? Read tip below"/>
-</page>
-</inputs>
-
-<outputs>
-  	<data name="output1" format="png" />
-</outputs>
-<requirements>
-	<requirement type="python-module">rpy</requirement>
-</requirements>
-<tests>
-	<test>
-		<param name="input1" value="solexa.qual" ftype="qualsolexa" />
-  		<output name="output1" file="solexaScore.png" ftype="png" />
-	</test>
-	<test>
-		<param name="input1" value="454.qual" ftype="qual454" />
-		<output name="output1" file="454Score.png" ftype="png" />
-	</test>
-</tests>
-<help>
-
-.. class:: warningmark
-
-To use this tool, your dataset needs to be in the *Quality Score* format. Click the pencil icon next to your dataset to set the datatype to *Quality Score* (see below for examples).
-
------
-
-**What it does**
-
-This tool takes Quality Files generated by Roche (454), Illumina (Solexa), or ABI SOLiD machines and builds a graph showing score distribution like the one below. Such graph allows you to perform initial evaluation of data quality in a single pass.
-
------
-
-**Examples of Quality Data**
-
-Roche (454) or ABI SOLiD data::
-
-	&gt;seq1
-	23 33 34 25 28 28 28 32 23 34 27 4 28 28 31 21 28
-
-Illumina (Solexa) data::
-
- 	-40 -40 40 -40	 -40 -40 -40 40
-
------
-
-**Output example**
-
-Quality scores are summarized as boxplot (Roche 454 FLX data):
-
-.. image:: ./static/images/short_reads_boxplot.png
-
-where the **X-axis** is coordinate along the read and the **Y-axis** is quality score adjusted to comply with the Phred score metric. Units on the X-axis depend on whether your data comes from Roche (454) or Illumina (Solexa) and ABI SOLiD machines:
-
-  - For Roche (454) X-axis (shown above) indicates **relative** position (in %) within reads as this technology produces reads of different lengths;
-  - For Illumina (Solexa) and ABI SOLiD X-axis shows **absolute** position in nucleotides within reads.
-
-Every box on the plot shows the following values::
-
-       o     &lt;---- Outliers
-       o
-      -+-    &lt;---- Upper Extreme Value that is no more
-       |           than box length away from the box
-       |
-    +--+--+  &lt;---- Upper Quartile
-    |     |
-    +-----+  &lt;---- Median
-    |     |
-    +--+--+  &lt;---- Lower Quartile
-       |
-       |
-      -+-    &lt;---- Lower Extreme Value that is no more
-                   than box length away from the box
-       o     &lt;---- Outlier
-
-
-
-</help>
-</tool>
--- a/tools/metag_tools/short_reads_trim_seq.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,234 +0,0 @@
-#!/usr/bin/env python
-"""
-trim reads based on the quality scores
-input: read file and quality score file
-output: trimmed read file
-"""
-
-import os, sys, math, tempfile, re
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def append_to_outfile( outfile_name, seq_title, segments ):
-    segments = segments.split( ',' )
-    if len( segments ) > 1:
-        outfile = open( outfile_name, 'a' )
-        for i in range( len( segments ) ):
-            outfile.write( "%s_%d\n%s\n" % ( seq_title, i, segments[i] ) )
-        outfile.close()
-    elif segments[0]:
-        outfile = open( outfile_name, 'a' )
-        outfile.write( "%s\n%s\n" % ( seq_title, segments[0] ) )
-        outfile.close()
-
-def trim_seq( seq, score, arg, trim_score, threshold ):
-    seq_method = '454'
-    trim_pos = 0
-    # trim after a certain position
-    if arg.isdigit():
-        keep_homopolymers = False
-        trim_pos = int( arg )
-        if trim_pos > 0 and trim_pos < len( seq ):
-            seq = seq[0:trim_pos]
-    else:
-        keep_homopolymers = arg=='yes'
-
-    new_trim_seq = ''
-    max_segment = 0
-
-    for i in range( len( seq ) ):
-        if i >= len( score ):
-            score.append(-1)
-        if int( score[i] ) >= trim_score:
-            pass_nuc = seq[ i:( i + 1 ) ]
-        else:
-            if keep_homopolymers and ( (i == 0 ) or ( seq[ i:( i + 1 ) ].lower() == seq[ ( i - 1 ):i ].lower() ) ):
-                pass_nuc = seq[ i:( i + 1 ) ]
-            else:
-                pass_nuc = ' '
-        new_trim_seq = '%s%s' % ( new_trim_seq, pass_nuc )
-        # find the max substrings
-        segments = new_trim_seq.split()
-        max_segment = ''
-        len_max_segment = 0
-        if threshold == 0:
-            for seg in segments:
-                if len_max_segment < len( seg ):
-                    max_segment = '%s,' % seg
-                    len_max_segment = len( seg )
-                elif len_max_segment == len( seg ):
-                    max_segment = '%s%s,' % ( max_segment, seg )
-        else:
-            for seg in segments:
-                if len( seg ) >= threshold:
-                    max_segment = '%s%s,' % ( max_segment, seg )
-    return max_segment[ 0:-1 ]
-
-def __main__():
-
-    try:
-        threshold_trim = int( sys.argv[1].strip() )
-    except:
-        stop_err( "Minimal quality score must be numeric." )
-    try:
-        threshold_report = int( sys.argv[2].strip() )
-    except:
-        stop_err( "Minimal length of trimmed reads must be numeric." )
-    outfile_seq_name = sys.argv[3].strip()
-    infile_seq_name = sys.argv[4].strip()
-    infile_score_name = sys.argv[5].strip()
-    arg = sys.argv[6].strip()
-
-    seq_infile_name = infile_seq_name
-    score_infile_name = infile_score_name
-
-
-    # Determine quailty score format: tabular or fasta format within the first 100 lines
-    seq_method = None
-    data_type = None
-    for i, line in enumerate( file( score_infile_name ) ):
-        line = line.rstrip( '\r\n' )
-        if not line or line.startswith( '#' ):
-            continue
-        if data_type == None:
-            if line.startswith( '>' ):
-                data_type = 'fasta'
-                continue
-            elif len( line.split( '\t' ) ) > 0:
-                fields = line.split()
-                for score in fields:
-                    try:
-                        int( score )
-                        data_type = 'tabular'
-                        seq_method = 'solexa'
-                        break
-                    except:
-                        break
-        elif data_type == 'fasta':
-            fields = line.split()
-            for score in fields:
-                try:
-                    int( score )
-                    seq_method = '454'
-                    break
-                except:
-                    break
-        if i == 100:
-            break
-
-    if data_type is None:
-        stop_err( 'This tool can only use fasta data or tabular data.' )
-    if seq_method is None:
-        stop_err( 'Invalid data for fasta format.')
-
-    if os.path.exists( seq_infile_name ) and os.path.exists( score_infile_name ):
-        seq = None
-        score = None
-        score_found = False
-
-        score_file = open( score_infile_name, 'r' )
-
-        for i, line in enumerate( open( seq_infile_name ) ):
-            line = line.rstrip( '\r\n' )
-            if not line or line.startswith( '#' ):
-                continue
-            if line.startswith( '>' ):
-                if seq:
-                    scores = []
-                    if data_type == 'fasta':
-                        score = None
-                        score_found = False
-                        score_line = 'start'
-                        while not score_found and score_line:
-                            score_line = score_file.readline().rstrip( '\r\n' )
-                            if not score_line or score_line.startswith( '#' ):
-                                continue
-                            if score_line.startswith( '>' ):
-                                if score:
-                                    scores = score.split()
-                                    score_found = True
-                                score = None
-                            else:
-                                for val in score_line.split():
-                                    try:
-                                        int( val )
-                                    except:
-                                        score_file.close()
-                                        stop_err( "Non-numerical value '%s' in score file." % val )
-                                if not score:
-                                    score = score_line
-                                else:
-                                    score = '%s %s' % ( score, score_line )
-                    elif data_type == 'tabular':
-                        score = score_file.readline().rstrip('\r\n')
-                        loc = score.split( '\t' )
-                        for base in loc:
-                            nuc_error = base.split()
-                            try:
-                                nuc_error[0] = int( nuc_error[0] )
-                                nuc_error[1] = int( nuc_error[1] )
-                                nuc_error[2] = int( nuc_error[2] )
-                                nuc_error[3] = int( nuc_error[3] )
-                                big = max( nuc_error )
-                            except:
-                                score_file.close()
-                                stop_err( "Invalid characters in line %d: '%s'" % ( i, line ) )
-                            scores.append( big )
-                    if scores:
-                        new_trim_seq_segments = trim_seq( seq, scores, arg, threshold_trim, threshold_report )
-                        append_to_outfile( outfile_seq_name, seq_title, new_trim_seq_segments )
-
-                seq_title = line
-                seq = None
-            else:
-                if not seq:
-                    seq = line
-                else:
-                    seq = "%s%s" % ( seq, line )
-        if seq:
-            scores = []
-            if data_type == 'fasta':
-                score = None
-                while score_line:
-                    score_line = score_file.readline().rstrip( '\r\n' )
-                    if not score_line or score_line.startswith( '#' ) or score_line.startswith( '>' ):
-                        continue
-                    for val in score_line.split():
-                        try:
-                            int( val )
-                        except:
-                            score_file.close()
-                            stop_err( "Non-numerical value '%s' in score file." % val )
-                    if not score:
-                        score = score_line
-                    else:
-                        score = "%s %s" % ( score, score_line )
-                if score:
-                    scores = score.split()
-            elif data_type == 'tabular':
-                score = score_file.readline().rstrip('\r\n')
-                loc = score.split( '\t' )
-                for base in loc:
-                    nuc_error = base.split()
-                    try:
-                        nuc_error[0] = int( nuc_error[0] )
-                        nuc_error[1] = int( nuc_error[1] )
-                        nuc_error[2] = int( nuc_error[2] )
-                        nuc_error[3] = int( nuc_error[3] )
-                        big = max( nuc_error )
-                    except:
-                        score_file.close()
-                        stop_err( "Invalid characters in line %d: '%s'" % ( i, line ) )
-                    scores.append( big )
-            if scores:
-                new_trim_seq_segments = trim_seq( seq, scores, arg, threshold_trim, threshold_report )
-                append_to_outfile( outfile_seq_name, seq_title, new_trim_seq_segments )
-        score_file.close()
-    else:
-        stop_err( "Cannot locate sequence file '%s'or score file '%s'." % ( seq_infile_name, score_infile_name ) )
-
-if __name__ == "__main__": __main__()
--- a/tools/metag_tools/short_reads_trim_seq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-<tool id="trim_reads" name="Select high quality segments" version="1.0.0">
-<description></description>
-
-<command interpreter="python">
- short_reads_trim_seq.py $trim $length $output1 $input1 $input2 $sequencing_method_choice.input3
-</command>
-<inputs>
-<page>
-    <param name="input1" type="data" format="fasta" label="Reads" />
-    <param name="input2" type="data" format="qualsolexa,qual454" label="Quality scores" />
-	<param name="trim" type="integer" size="5" value="20" label="Minimal quality score" help="bases scoring below this value will trigger splitting"/>
-    <param name="length" type="integer" size="5" value="100" label="Minimal length of contiguous segment" help="report all high quality segments above this length. Setting this option to '0' will cause the program to return a single longest run of high quality bases per read" />
-    <conditional name="sequencing_method_choice">
-        <param name="sequencer" type="select" label="Select technology">
-            <option value="454">Roche (454) or ABI SOLiD</option>
-            <option value="Solexa">Illumina (Solexa)</option>
-        </param>
-        <when value="454">
-            <param name="input3" type="select" label="Low quality bases in homopolymers" help="if set to 'DO NOT trigger splitting' the program will not count low quality bases that are within or adjacent to homonucleotide runs.  This will significantly reduce fragmentation of 454 data">
-                <option value="yes">DO NOT trigger splitting </option>
-                <option value="no">trigger splitting</option>
-            </param>
-        </when>
-        <when value="Solexa">
-            <param name="input3" type="integer" size="5" value="0" label="Restrict length of each read to" help="('0' = do not trim) The quality of Solexa reads drops towards the end. This option allows selecting the specified number of nucleotides from the beginning and then running the tool." />
-        </when>
-    </conditional>
-</page>
-</inputs>
-
-<outputs>
-    <data name="output1" format="fasta" />
-</outputs>
-
-<tests>
-	<test>
-		<param name="sequencer" value="454" />
-		<param name="input1" value="454.fasta" ftype="fasta" />
-		<param name="input2" value="454.qual" ftype="qual454" />
-		<param name="input3" value="no" />
-		<param name="trim" value="20" />
-		<param name="length" value="0" />
-		<output name="output1" file="short_reads_trim_seq_out1.fasta" />
-	</test>
-	<test>
-		<param name="sequencer" value="Solexa" />
-		<param name="input1" value="solexa.fasta" ftype="fasta" />
-		<param name="input2" value="solexa.qual" ftype="qualsolexa" />
-		<param name="input3" value="0" />
-		<param name="trim" value="20" />
-		<param name="length" value="0" />
-		<output name="output1" file="short_reads_trim_seq_out2.fasta" />
-	</test>
-</tests>
-
-<help>
-
-.. class:: warningmark
-
-To use this tool, your dataset needs to be in the *Quality Score* format. Click the pencil icon next to your dataset to set the datatype to *Quality Score* (see below for examples).
-
------
-
-**What it does**
-
-This tool finds high quality segments within sequencing reads generated by by Roche (454), Illumina (Solexa), or ABI SOLiD machines.
-
------
-
-**Example**
-
-
-Suppose this is your sequencing read::
-
-   5'---------*-------------*------**----3'
-
-where **dashes** (-) are HIGH quality bases (above 20) and **asterisks** (*) are LOW quality bases (below 20). If the **Minimal length of contiguous segment** is set to **5** (of course, only for the purposes of this example), the tool will return::
-
-   5'---------
-               -------------
-                             -------
-
-you can see that the tool simply splits the read on low quality bases and then returns all segments longer than 5.  **Note**, that the output of this tool will likely contain higher number of shorter sequences compared to the original input.   If we set the **Minimal length of contiguous segment** to **0**, the tool will only return the single longest segment::
-
-               -------------
-
-
-
-
-
-
-</help>
-</tool>
--- a/tools/metag_tools/shrimp_color_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-
-"""
-SHRiMP wrapper : Color space
-"""
-
-import os, sys, tempfile, os.path, re
-
-assert sys.version_info[:2] >= (2.4)
-
-def stop_err( msg ):
-
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-
-def __main__():
-
-    # SHRiMP path
-    shrimp = 'rmapper-cs'
-
-    # I/O
-    input_target_file = sys.argv[1]                  # fasta
-    input_query_file = sys.argv[2]
-    shrimp_outfile    = sys.argv[3]                # shrimp output
-
-    # SHRiMP parameters
-    spaced_seed = '1111001111'
-    seed_matches_per_window = '2'
-    seed_hit_taboo_length = '4'
-    seed_generation_taboo_length = '0'
-    seed_window_length = '115.0'
-    max_hits_per_read = '100'
-    max_read_length = '1000'
-    kmer = '-1'
-    sw_match_value = '100'
-    sw_mismatch_value = '-150'
-    sw_gap_open_ref = '-400'
-    sw_gap_open_query = '-400'
-    sw_gap_ext_ref = '-70'
-    sw_gap_ext_query = '-70'
-    sw_crossover_penalty = '-140'
-    sw_full_hit_threshold = '68.0'
-    sw_vector_hit_threshold = '60.0'
-
-    # TODO: put the threshold on each of these parameters
-    if len(sys.argv) > 4:
-
-        try:
-            if sys.argv[4].isdigit():
-                spaced_seed = sys.argv[4]
-            else:
-                stop_err('Error in assigning parameter: Spaced seed.')
-        except:
-            stop_err('Spaced seed must be a combination of 1s and 0s.')
-
-        seed_matches_per_window = sys.argv[5]
-        seed_hit_taboo_length = sys.argv[6]
-        seed_generation_taboo_length = sys.argv[7]
-        seed_window_length = sys.argv[8]
-        max_hits_per_read = sys.argv[9]
-        max_read_length = sys.argv[10]
-        kmer = sys.argv[11]
-        sw_match_value = sys.argv[12]
-        sw_mismatch_value = sys.argv[13]
-        sw_gap_open_ref = sys.argv[14]
-        sw_gap_open_query = sys.argv[15]
-        sw_gap_ext_ref = sys.argv[16]
-        sw_gap_ext_query = sys.argv[17]
-        sw_crossover_penalty = sys.argv[18]
-        sw_full_hit_threshold = sys.argv[19]
-        sw_vector_hit_threshold = sys.argv[20]
-
-    # temp file for shrimp log file
-    shrimp_log = tempfile.NamedTemporaryFile().name
-
-    # SHRiMP command
-    command = ' '.join([shrimp,  '-s', spaced_seed, '-n', seed_matches_per_window, '-t', seed_hit_taboo_length, '-9', seed_generation_taboo_length, '-w', seed_window_length, '-o', max_hits_per_read, '-r', max_read_length, '-d', kmer, '-m', sw_match_value, '-i', sw_mismatch_value, '-g', sw_gap_open_ref, '-q', sw_gap_open_query, '-e', sw_gap_ext_ref, '-f', sw_gap_ext_query, '-x', sw_crossover_penalty, '-h', sw_full_hit_threshold, '-v', sw_vector_hit_threshold, input_query_file, input_target_file, '>', shrimp_outfile, '2>', shrimp_log])
-
-    try:
-        os.system(command)
-    except Exception, e:
-        if os.path.exists(query_fasta): os.remove(query_fasta)
-        if os.path.exists(query_qual): os.remove(query_qual)
-        stop_err(str(e))
-
-    # check SHRiMP output: count number of lines
-    num_hits = 0
-    if shrimp_outfile:
-        for i, line in enumerate(file(shrimp_outfile)):
-            line = line.rstrip('\r\n')
-            if not line or line.startswith('#'): continue
-            try:
-                fields = line.split()
-                num_hits += 1
-            except Exception, e:
-                stop_err(str(e))
-
-    if num_hits == 0:   # no hits generated
-        err_msg = ''
-        if shrimp_log:
-            for i, line in enumerate(file(shrimp_log)):
-                if line.startswith('error'):            # deal with memory error:
-                    err_msg += line                     # error: realloc failed: Cannot allocate memory
-                if re.search('Reads Matched', line):    # deal with zero hits
-                    if int(line[8:].split()[2]) == 0:
-                        err_msg = 'Zero hits found.\n'
-        stop_err('SHRiMP Failed due to:\n' + err_msg)
-
-
-    # remove temp. files
-    if os.path.exists(shrimp_log): os.remove(shrimp_log)
-
-
-if __name__ == '__main__': __main__()
-
--- a/tools/metag_tools/shrimp_color_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-<tool id="shrimp_color_wrapper" name="SHRiMP for Color-space" version="1.0.0">
-  <description>reads mapping against reference sequence </description>
-  <command interpreter="python">
-    #if $param.skip_or_full=="skip" #shrimp_color_wrapper.py $input_target $input_query $output1
-    #else                           #shrimp_color_wrapper.py $input_target $input_query $output1 $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_crossover_penalty $param.sw_full_hit_threshold $param.sw_vector_hit_threshold
-    #end if#
-  </command>
-    <inputs>
-        <page>
-        <param name="input_query" type="data" format="csfasta" label="Align sequencing reads" help="No dataset? Read tip below"/>
-        <param name="input_target" type="data" format="fasta" label="against reference" />
-        <conditional name="param">
-            <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
-                <option value="skip">Commonly used</option>
-                <option value="full">Full Parameter List</option>
-            </param>
-            <when value="skip" />
-            <when value="full">
-                <param name="spaced_seed"                   type="text"     size="30"   value="1111001111"    label="Spaced Seed" />
-                <param name="seed_matches_per_window"       type="integer"  size="5"    value="2"               label="Seed Matches per Window" />
-                <param name="seed_hit_taboo_length"         type="integer"  size="5"    value="4"               label="Seed Hit Taboo Length" />
-                <param name="seed_generation_taboo_length"  type="integer"  size="5"    value="0"               label="Seed Generation Taboo Length" />
-                <param name="seed_window_length"            type="float"    size="10"   value="115.0"           label="Seed Window Length"          help="in percentage"/>
-                <param name="max_hits_per_read"             type="integer"  size="10"   value="100"             label="Maximum Hits per Read" />
-                <param name="max_read_length"               type="integer"  size="10"   value="1000"            label="Maximum Read Length" />
-                <param name="kmer"                          type="integer"  size="10"   value="-1"              label="Kmer Std. Deviation Limit"   help="-1 as None"/>
-                <param name="sw_match_value"                type="integer"  size="10"   value="100"             label="S-W Match Value" />
-                <param name="sw_mismatch_value"             type="integer"  size="10"   value="-150"            label="S-W Mismatch Value" />
-                <param name="sw_gap_open_ref"               type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Reference)" />
-                <param name="sw_gap_open_query"             type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Query)" />
-                <param name="sw_gap_ext_ref"                type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Reference)" />
-                <param name="sw_gap_ext_query"              type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Query)" />
-                <param name="sw_crossover_penalty"          type="integer"  size="10"   value="-140"            label="S-W Crossover Penalty" />
-                <param name="sw_full_hit_threshold"         type="float"    size="10"   value="68.0"            label="S-W Full Hit Threshold"      help="in percentage"/>
-                <param name="sw_vector_hit_threshold"       type="float"    size="10"   value="60.0"            label="S-W Vector Hit Threshold"    help="in percentage"/>
-            </when>
-        </conditional>
-        </page>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular"/>
-    </outputs>
-    <requirements>
-      <requirement type="binary">rmapper-cs</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <param name="skip_or_full" value="skip" />
-            <param name="input_target" value="Ssuis.fasta" ftype="fasta" />
-            <param name="input_query" value="shrimp_cs_test1.csfasta" ftype="csfasta"/>
-            <output name="output1" file="shrimp_cs_test1.out" />
-        </test>
-    </tests>
-<help>
-
-.. class:: warningmark
-
-To use this tool your dataset needs to be in the *csfasta* (as ABI SOLiD color-space sequences) format. Click pencil icon next to your dataset to set the datatype to *csfasta*.
-
-
------
-
-**What it does**
-
-SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome.
-
-
------
-
-**Input formats**
-
-A multiple color-space file, for example::
-
-    >2_263_779_F3
-    T132032030200202202003211302222202230022110222
-
-
------
-
-**Outputs**
-
-The tool returns the default SHRiMP output::
-
-
-     1                      2               3         4        5        6       7      8      9      10
-  --------------------------------------------------------------------------------------------------------------------
-    >2_263_779_F3   Streptococcus_suis      +       814344  814388      1      45      45    3660    8x19x3x2x6x4x3
-
-where::
-
-  1. (>2_263_779_F3)        - Read id
-  2. (Streptococcus_suis)   - Reference sequence id
-  3. (+)                    - Strand of the read
-  4. (814344)               - Start position of the alignment in the reference
-  5. (814388)               - End position of the alignment in the reference
-  6. (1)                    - Start position of the alignment in the read
-  7. (45)                   - End position of the alignment in the read
-  8. (45)                   - Length of the read
-  9. (3660)                 - Score
- 10. (8x19x3x2x6x4x3)       - Edit string
-
-
------
-
-**SHRiMP parameter list**
-
-The commonly used parameters with default value setting::
-
-    -s    Spaced Seed                             (default: 111111011111)
-          The spaced seed is a single contiguous string of 0's and 1's.
-          0's represent wildcards, or positions which will always be
-          considered as matching, whereas 1's dictate positions that
-          must match. A string of all 1's will result in a simple kmer scan.
-    -n    Seed Matches per Window                 (default: 2)
-          The number of seed matches per window dictates how many seeds
-          must match within some window length of the genome before that
-          region is considered for Smith-Waterman alignment. A lower
-          value will increase sensitivity while drastically increasing
-          running time. Higher values will have the opposite effect.
-    -t    Seed Hit Taboo Length                   (default: 4)
-          The seed taboo length specifies how many target genome bases
-          or colours must exist prior to a previous seed match in order
-          to count another seed match as a hit.
-    -9    Seed Generation Taboo Length            (default: 0)
-
-    -w    Seed Window Length                      (default: 115.00%)
-          This parameter specifies the genomic span in bases (or colours)
-          in which *seed_matches_per_window* must exist before the read
-          is given consideration by the Simth-Waterman alignment machinery.
-    -o    Maximum Hits per Read                   (default: 100)
-          This parameter specifies how many hits to remember for each read.
-          If more hits are encountered, ones with lower scores are dropped
-          to make room.
-    -r    Maximum Read Length                     (default: 1000)
-          This parameter specifies the maximum length of reads that will
-          be encountered in the dataset. If larger reads than the default
-          are used, an appropriate value must be passed to *rmapper*.
-    -d    Kmer Std. Deviation Limit               (default: -1 [None])
-          This option permits pruning read kmers, which occur with
-          frequencies greater than *kmer_std_dev_limit* standard
-          deviations above the average. This can shorten running
-          time at the cost of some sensitivity.
-          *Note*: A negative value disables this option.
-    -m    S-W Match Value                         (default: 100)
-          The value applied to matches during the Smith-Waterman score calculation.
-    -i    S-W Mismatch Value                      (default: -150)
-          The value applied to mismatches during the Smith-Waterman
-          score calculation.
-    -g    S-W Gap Open Penalty (Reference)        (default: -400)
-          The value applied to gap opens along the reference sequence
-          during the Smith-Waterman score calculation.
-          *Note*: Note that for backward compatibility, if -g is set
-          and -q is not set, the gap open penalty for the query will
-          be set to the same value as specified for the reference.
-    -q    S-W Gap Open Penalty (Query)            (default: -400)
-          The value applied to gap opens along the query sequence during
-          the Smith-Waterman score calculation.
-    -e    S-W Gap Extend Penalty (Reference)      (default: -70)
-          The value applied to gap extends during the Smith-Waterman score calculation.
-          *Note*: Note that for backward compatibility, if -e is set
-          and -f is not set, the gap exten penalty for the query will
-          be set to the same value as specified for the reference.
-    -f    S-W Gap Extend Penalty (Query)          (default: -70)
-          The value applied to gap extends during the Smith-Waterman score calculation.
-    -x
-    -h    S-W Full Hit Threshold                  (default: 68.00%)
-          In letter-space, this parameter determines the threshold
-          score for both vectored and full Smith-Waterman alignments.
-          Any values less than this quantity will be thrown away.
-          *Note* This option differs slightly in meaning between letter-space and color-space.
-    -v
-
-
------
-
-**Reference**
-
- **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu.
-
-</help>
-</tool>
--- a/tools/metag_tools/shrimp_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,624 +0,0 @@
-#!/usr/bin/env python
-
-"""
-TODO
-1. decrease memory usage
-2. multi-fasta fastq file, ex. 454
-3. split reads into small chuncks?
-
-SHRiMP wrapper
-
-Inputs:
-1. reference seq
-2. reads
-
-Outputs:
-1. table of 8 columns:
-         chrom   ref_loc     read_id     read_loc    ref_nuc     read_nuc    quality     coverage
-2. SHRiMP output
-
-Parameters:
-    -s    Spaced Seed                             (default: 111111011111)
-    -n    Seed Matches per Window                 (default: 2)
-    -t    Seed Hit Taboo Length                   (default: 4)
-    -9    Seed Generation Taboo Length            (default: 0)
-    -w    Seed Window Length                      (default: 115.00%)
-    -o    Maximum Hits per Read                   (default: 100)
-    -r    Maximum Read Length                     (default: 1000)
-    -d    Kmer Std. Deviation Limit               (default: -1 [None])
-
-    -m    S-W Match Value                         (default: 100)
-    -i    S-W Mismatch Value                      (default: -150)
-    -g    S-W Gap Open Penalty (Reference)        (default: -400)
-    -q    S-W Gap Open Penalty (Query)            (default: -400)
-    -e    S-W Gap Extend Penalty (Reference)      (default: -70)
-    -f    S-W Gap Extend Penalty (Query)          (default: -70)
-    -h    S-W Hit Threshold                       (default: 68.00%)
-
-Command:
-%rmapper -s spaced_seed -n seed_matches_per_window -t seed_hit_taboo_length -9 seed_generation_taboo_length -w seed_window_length -o max_hits_per_read -r max_read_length -d kmer -m sw_match_value -i sw_mismatch_value -g sw_gap_open_ref -q sw_gap_open_query -e sw_gap_ext_ref -f sw_gap_ext_query -h sw_hit_threshold <query> <target> > <output> 2> <log>
-
-SHRiMP output:
->7:2:1147:982/1 chr3    +   36586562    36586595    2   35  36  2900    3G16G13
->7:2:1147:982/1 chr3    +   95338194    95338225    4   35  36  2700    9T7C14
->7:2:587:93/1   chr3    +   14913541    14913577    1   35  36  2960    19--16
-
-"""
-
-import os, sys, tempfile, os.path, re
-
-assert sys.version_info[:2] >= (2.4)
-
-def stop_err( msg ):
-
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def reverse_complement(s):
-
-    complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":".", "-":"-"}
-    reversed_s = []
-    for i in s:
-        reversed_s.append(complement_dna[i])
-    reversed_s.reverse()
-    return "".join(reversed_s)
-
-def generate_sub_table(result_file, ref_file, score_files, table_outfile, hit_per_read, insertion_size):
-
-    invalid_editstring_char = 0
-
-    all_score_file = score_files.split(',')
-
-    if len(all_score_file) != hit_per_read: stop_err('One or more query files is missing. Please check your dataset.')
-
-    temp_table_name = tempfile.NamedTemporaryFile().name
-    temp_table = open(temp_table_name, 'w')
-
-    outfile = open(table_outfile,'w')
-
-    # reference seq: not a single fasta seq
-    refseq = {}
-    chrom_cov = {}
-    seq = ''
-
-    for i, line in enumerate(file(ref_file)):
-        line = line.rstrip()
-        if not line or line.startswith('#'): continue
-
-        if line.startswith('>'):
-            if seq:
-                if refseq.has_key(title):
-                    pass
-                else:
-                    refseq[title] = seq
-                    chrom_cov[title] = {}
-                seq = ''
-            title = line[1:]
-        else:
-            seq += line
-    if seq:
-        if not refseq.has_key(title):
-            refseq[title] = seq
-            chrom_cov[title] = {}
-
-    # find hits : one end and/or the other
-    hits = {}
-    for i, line in enumerate(file(result_file)):
-        line = line.rstrip()
-        if not line or line.startswith('#'): continue
-
-        #FORMAT: readname contigname strand contigstart contigend readstart readend readlength score editstring
-        fields = line.split('\t')
-        readname = fields[0][1:]
-        chrom = fields[1]
-        strand = fields[2]
-        chrom_start = int(fields[3]) - 1
-        chrom_end = int(fields[4])
-        read_start = fields[5]
-        read_end = fields[6]
-        read_len = fields[7]
-        score = fields[8]
-        editstring = fields[9]
-
-        if hit_per_read == 1:
-            endindex = '1'
-        else:
-            readname, endindex = readname.split('/')
-
-        if hits.has_key(readname):
-            if hits[readname].has_key(endindex):
-                hits[readname][endindex].append([strand, editstring, chrom_start, chrom_end, read_start, chrom])
-            else:
-                hits[readname][endindex] = [[strand, editstring, chrom_start, chrom_end, read_start, chrom]]
-        else:
-            hits[readname] = {}
-            hits[readname][endindex] = [[strand, editstring, chrom_start, chrom_end, read_start, chrom]]
-
-    # find score : one end and the other end
-    hits_score = {}
-    readname = ''
-    score = ''
-    for num_score_file in range(len(all_score_file)):
-        score_file = all_score_file[num_score_file]
-        for i, line in enumerate(file(score_file)):
-            line = line.rstrip()
-            if not line or line.startswith('#'): continue
-
-            if line.startswith('>'):
-                if score:
-                    if hits.has_key(readname):
-                        if len(hits[readname]) == hit_per_read:
-                            if hits_score.has_key(readname):
-                                if hits_score[readname].has_key(endindex):
-                                    pass
-                                else:
-                                    hits_score[readname][endindex] = score
-                            else:
-                                hits_score[readname] = {}
-                                hits_score[readname][endindex] = score
-                    score = ''
-                if hit_per_read == 1:
-                    readname = line[1:]
-                    endindex = '1'
-                else:
-                    readname, endindex = line[1:].split('/')
-            else:
-                score = line
-
-        if score:   # the last one
-            if hits.has_key(readname):
-                if len(hits[readname]) == hit_per_read:
-                    if hits_score.has_key(readname):
-                        if hits_score[readname].has_key(endindex):
-                            pass
-                        else:
-                            hits_score[readname][endindex] = score
-                    else:
-                        hits_score[readname] = {}
-                        hits_score[readname][endindex] = score
-
-    # call to all mappings
-    for readkey in hits.keys():
-        if len(hits[readkey]) != hit_per_read: continue
-
-        matches = []
-        match_count = 0
-
-        if hit_per_read == 1:
-            if len(hits[readkey]['1']) == 1:
-                matches = [ hits[readkey]['1'] ]
-                match_count = 1
-        else:
-            end1_data = hits[readkey]['1']
-            end2_data = hits[readkey]['2']
-
-            for i, end1_hit in enumerate(end1_data):
-                crin_strand = {'+': False, '-': False}
-                crin_insertSize = {'+': False, '-': False}
-
-                crin_strand[end1_hit[0]] = True
-                crin_insertSize[end1_hit[0]] = int(end1_hit[2])
-
-                for j, end2_hit in enumerate(end2_data):
-                    crin_strand[end2_hit[0]] = True
-                    crin_insertSize[end2_hit[0]] = int(end2_hit[2])
-
-                    if end1_hit[-1] != end2_hit[-1] : continue
-
-                    if crin_strand['+'] and crin_strand['-']:
-                        if (crin_insertSize['-'] - crin_insertSize['+']) <= insertion_size:
-                            matches.append([end1_hit, end2_hit])
-                            match_count += 1
-
-        if match_count == 1:
-
-            for x, end_data in enumerate(matches[0]):
-
-                end_strand, end_editstring, end_chr_start, end_chr_end, end_read_start, end_chrom = end_data
-                end_read_start = int(end_read_start) - 1
-
-                if end_strand == '-':
-                    refsegment = reverse_complement(refseq[end_chrom][end_chr_start:end_chr_end])
-                else:
-                    refsegment = refseq[end_chrom][end_chr_start:end_chr_end]
-
-                match_len = 0
-                editindex = 0
-                gap_read = 0
-
-                while editindex < len(end_editstring):
-
-                    editchr = end_editstring[editindex]
-                    chrA = ''
-                    chrB = ''
-                    locIndex = []
-
-                    if editchr.isdigit():
-                        editcode = ''
-
-                        while editchr.isdigit() and editindex < len(end_editstring):
-                            editcode += editchr
-                            editindex += 1
-                            if editindex < len(end_editstring): editchr = end_editstring[editindex]
-
-                        for baseIndex in range(int(editcode)):
-                            chrA += refsegment[match_len+baseIndex]
-                            chrB = chrA
-
-                        match_len += int(editcode)
-
-                    elif editchr == 'x':
-                        # crossover: inserted between the appropriate two bases
-                        # Two sequencing errors: 4x15x6 (25 matches with 2 crossovers)
-                        # Treated as errors in the reads; Do nothing.
-                        editindex += 1
-
-                    elif editchr.isalpha():
-                        editcode = editchr
-                        editindex += 1
-                        chrA = refsegment[match_len]
-                        chrB = editcode
-                        match_len += len(editcode)
-
-                    elif editchr == '-':
-                        editcode = editchr
-                        editindex += 1
-                        chrA = refsegment[match_len]
-                        chrB = editcode
-                        match_len += len(editcode)
-                        gap_read += 1
-
-                    elif editchr == '(':
-                        editcode = ''
-
-                        while editchr != ')' and editindex < len(end_editstring):
-                            if editindex < len(end_editstring): editchr = end_editstring[editindex]
-                            editcode += editchr
-                            editindex += 1
-
-                        editcode = editcode[1:-1]
-                        chrA = '-'*len(editcode)
-                        chrB = editcode
-
-                    else:
-                        invalid_editstring_char += 1
-
-                    if end_strand == '-':
-
-                        chrA = reverse_complement(chrA)
-                        chrB = reverse_complement(chrB)
-
-                    pos_line = ''
-                    rev_line = ''
-
-                    for mappingIndex in range(len(chrA)):
-                        # reference
-                        chrAx = chrA[mappingIndex]
-                        # read
-                        chrBx = chrB[mappingIndex]
-
-                        if chrAx and chrBx and chrBx.upper() != 'N':
-
-                            if end_strand == '+':
-
-                                chrom_loc = end_chr_start+match_len-len(chrA)+mappingIndex
-                                read_loc = end_read_start+match_len-len(chrA)+mappingIndex-gap_read
-
-                                if chrAx == '-': chrom_loc -= 1
-
-                                if chrBx == '-':
-                                    scoreBx = '-1'
-                                else:
-                                    scoreBx = hits_score[readkey][str(x+1)].split()[read_loc]
-
-                                # 1-based on chrom_loc and read_loc
-                                pos_line = pos_line + '\t'.join([end_chrom, str(chrom_loc+1), readkey+'/'+str(x+1), str(read_loc+1), chrAx, chrBx, scoreBx]) + '\n'
-
-                            else:
-
-                                chrom_loc = end_chr_end-match_len+mappingIndex
-                                read_loc = end_read_start+match_len-1-mappingIndex-gap_read
-
-                                if chrAx == '-': chrom_loc -= 1
-
-                                if chrBx == '-':
-                                    scoreBx = '-1'
-                                else:
-                                    scoreBx = hits_score[readkey][str(x+1)].split()[read_loc]
-
-                                # 1-based on chrom_loc and read_loc
-                                rev_line = '\t'.join([end_chrom, str(chrom_loc+1), readkey+'/'+str(x+1), str(read_loc+1), chrAx, chrBx, scoreBx]) +'\n' + rev_line
-
-                            if chrom_cov.has_key(end_chrom):
-
-                                if chrom_cov[end_chrom].has_key(chrom_loc):
-                                    chrom_cov[end_chrom][chrom_loc] += 1
-                                else:
-                                    chrom_cov[end_chrom][chrom_loc] = 1
-
-                            else:
-
-                                chrom_cov[end_chrom] = {}
-                                chrom_cov[end_chrom][chrom_loc] = 1
-
-                    if pos_line: temp_table.write('%s\n' %(pos_line.rstrip('\r\n')))
-                    if rev_line: temp_table.write('%s\n' %(rev_line.rstrip('\r\n')))
-
-    temp_table.close()
-
-    # chrom-wide coverage
-    for i, line in enumerate(open(temp_table_name)):
-
-        line = line.rstrip()
-        if not line or line.startswith('#'): continue
-
-        fields = line.split()
-        chrom = fields[0]
-        eachBp = int(fields[1])
-        readname = fields[2]
-
-        if hit_per_read == 1:
-            fields[2] = readname.split('/')[0]
-
-        if chrom_cov[chrom].has_key(eachBp):
-            outfile.write('%s\t%d\n' %('\t'.join(fields), chrom_cov[chrom][eachBp]))
-        else:
-            outfile.write('%s\t%d\n' %('\t'.join(fields), 0))
-
-    outfile.close()
-
-    if os.path.exists(temp_table_name): os.remove(temp_table_name)
-
-    if invalid_editstring_char:
-        print 'Skip ', invalid_editstring_char, ' invalid characters in editstrings'
-
-    return True
-
-def convert_fastqsolexa_to_fasta_qual(infile_name, query_fasta, query_qual):
-
-    outfile_seq = open( query_fasta, 'w' )
-    outfile_score = open( query_qual, 'w' )
-
-    seq_title_startswith = ''
-    qual_title_startswith = ''
-
-    default_coding_value = 64       # Solexa ascii-code
-    fastq_block_lines = 0
-
-    for i, line in enumerate( file( infile_name ) ):
-        line = line.rstrip()
-        if not line or line.startswith( '#' ): continue
-
-        fastq_block_lines = ( fastq_block_lines + 1 ) % 4
-        line_startswith = line[0:1]
-
-        if fastq_block_lines == 1:
-            # first line is @title_of_seq
-            if not seq_title_startswith:
-                seq_title_startswith = line_startswith
-
-            if line_startswith != seq_title_startswith:
-                outfile_seq.close()
-                outfile_score.close()
-                stop_err( 'Invalid fastqsolexa format at line %d: %s.' % ( i + 1, line ) )
-
-            read_title = line[1:]
-            outfile_seq.write( '>%s\n' % line[1:] )
-
-        elif fastq_block_lines == 2:
-            # second line is nucleotides
-            read_length = len( line )
-            outfile_seq.write( '%s\n' % line )
-
-        elif fastq_block_lines == 3:
-            # third line is +title_of_qualityscore ( might be skipped )
-            if not qual_title_startswith:
-                qual_title_startswith = line_startswith
-
-            if line_startswith != qual_title_startswith:
-                outfile_seq.close()
-                outfile_score.close()
-                stop_err( 'Invalid fastqsolexa format at line %d: %s.' % ( i + 1, line ) )
-
-            quality_title = line[1:]
-            if quality_title and read_title != quality_title:
-                outfile_seq.close()
-                outfile_score.close()
-                stop_err( 'Invalid fastqsolexa format at line %d: sequence title "%s" differes from score title "%s".' % ( i + 1, read_title, quality_title ) )
-
-            if not quality_title:
-                outfile_score.write( '>%s\n' % read_title )
-            else:
-                outfile_score.write( '>%s\n' % line[1:] )
-
-        else:
-            # fourth line is quality scores
-            qual = ''
-            fastq_integer = True
-            # peek: ascii or digits?
-            val = line.split()[0]
-            try:
-                check = int( val )
-                fastq_integer = True
-            except:
-                fastq_integer = False
-
-            if fastq_integer:
-                # digits
-                qual = line
-            else:
-                # ascii
-                quality_score_length = len( line )
-                if quality_score_length == read_length + 1:
-                    # first char is qual_score_startswith
-                    qual_score_startswith = ord( line[0:1] )
-                    line = line[1:]
-                elif quality_score_length == read_length:
-                    qual_score_startswith = default_coding_value
-                else:
-                    stop_err( 'Invalid fastqsolexa format at line %d: the number of quality scores ( %d ) is not the same as bases ( %d ).' % ( i + 1, quality_score_length, read_length ) )
-
-                for j, char in enumerate( line ):
-                    score = ord( char ) - qual_score_startswith    # 64
-                    qual = "%s%s " % ( qual, str( score ) )
-
-            outfile_score.write( '%s\n' % qual )
-
-    outfile_seq.close()
-    outfile_score.close()
-
-    return True
-
-def __main__():
-
-    # SHRiMP path
-    shrimp = 'rmapper-ls'
-
-    # I/O
-    input_target_file = sys.argv[1]                  # fasta
-    shrimp_outfile    = sys.argv[2]                # shrimp output
-    table_outfile     = sys.argv[3]                 # table output
-    single_or_paired  = sys.argv[4].split(',')
-
-    insertion_size = 600
-
-    if len(single_or_paired) == 1:                  # single or paired
-        type_of_reads = 'single'
-        hit_per_read  = 1
-        input_query   = single_or_paired[0]
-        query_fasta   = tempfile.NamedTemporaryFile().name
-        query_qual    = tempfile.NamedTemporaryFile().name
-
-    else:                                           # paired-end
-        type_of_reads    = 'paired'
-        hit_per_read     = 2
-        input_query_end1 = single_or_paired[0]
-        input_query_end2 = single_or_paired[1]
-        insertion_size = int(single_or_paired[2])
-        query_fasta_end1 = tempfile.NamedTemporaryFile().name
-        query_fasta_end2 = tempfile.NamedTemporaryFile().name
-        query_qual_end1  = tempfile.NamedTemporaryFile().name
-        query_qual_end2  = tempfile.NamedTemporaryFile().name
-
-    # SHRiMP parameters: total = 15, default values
-    spaced_seed = '111111011111'
-    seed_matches_per_window = '2'
-    seed_hit_taboo_length = '4'
-    seed_generation_taboo_length = '0'
-    seed_window_length = '115.0'
-    max_hits_per_read = '100'
-    max_read_length = '1000'
-    kmer = '-1'
-    sw_match_value = '100'
-    sw_mismatch_value = '-150'
-    sw_gap_open_ref = '-400'
-    sw_gap_open_query = '-400'
-    sw_gap_ext_ref = '-70'
-    sw_gap_ext_query = '-70'
-    sw_hit_threshold = '68.0'
-
-    # TODO: put the threshold on each of these parameters
-    if len(sys.argv) > 5:
-
-        try:
-            if sys.argv[5].isdigit():
-                spaced_seed = sys.argv[5]
-            else:
-                stop_err('Error in assigning parameter: Spaced seed.')
-        except:
-            stop_err('Spaced seed must be a combination of 1s and 0s.')
-
-        seed_matches_per_window = sys.argv[6]
-        seed_hit_taboo_length = sys.argv[7]
-        seed_generation_taboo_length = sys.argv[8]
-        seed_window_length = sys.argv[9]
-        max_hits_per_read = sys.argv[10]
-        max_read_length = sys.argv[11]
-        kmer = sys.argv[12]
-        sw_match_value = sys.argv[13]
-        sw_mismatch_value = sys.argv[14]
-        sw_gap_open_ref = sys.argv[15]
-        sw_gap_open_query = sys.argv[16]
-        sw_gap_ext_ref = sys.argv[17]
-        sw_gap_ext_query = sys.argv[18]
-        sw_hit_threshold = sys.argv[19]
-
-    # temp file for shrimp log file
-    shrimp_log = tempfile.NamedTemporaryFile().name
-
-    # convert fastq to fasta and quality score files
-    if type_of_reads == 'single':
-        return_value = convert_fastqsolexa_to_fasta_qual(input_query, query_fasta, query_qual)
-    else:
-        return_value = convert_fastqsolexa_to_fasta_qual(input_query_end1, query_fasta_end1, query_qual_end1)
-        return_value = convert_fastqsolexa_to_fasta_qual(input_query_end2, query_fasta_end2, query_qual_end2)
-
-    # SHRiMP command
-    if type_of_reads == 'single':
-        command = ' '.join([shrimp,  '-s', spaced_seed, '-n', seed_matches_per_window, '-t', seed_hit_taboo_length, '-9', seed_generation_taboo_length, '-w', seed_window_length, '-o', max_hits_per_read, '-r', max_read_length, '-d', kmer, '-m', sw_match_value, '-i', sw_mismatch_value, '-g', sw_gap_open_ref, '-q', sw_gap_open_query, '-e', sw_gap_ext_ref, '-f', sw_gap_ext_query, '-h', sw_hit_threshold, query_fasta, input_target_file, '>', shrimp_outfile, '2>', shrimp_log])
-
-        try:
-            os.system(command)
-        except Exception, e:
-            if os.path.exists(query_fasta): os.remove(query_fasta)
-            if os.path.exists(query_qual): os.remove(query_qual)
-            stop_err(str(e))
-
-    else: # paired
-        command_end1 = ' '.join([shrimp, '-s', spaced_seed, '-n', seed_matches_per_window, '-t', seed_hit_taboo_length, '-9', seed_generation_taboo_length, '-w', seed_window_length, '-o', max_hits_per_read, '-r', max_read_length, '-d', kmer, '-m', sw_match_value, '-i', sw_mismatch_value, '-g', sw_gap_open_ref, '-q', sw_gap_open_query, '-e', sw_gap_ext_ref, '-f', sw_gap_ext_query, '-h', sw_hit_threshold, query_fasta_end1, input_target_file, '>', shrimp_outfile, '2>', shrimp_log])
-        command_end2 = ' '.join([shrimp, '-s', spaced_seed, '-n', seed_matches_per_window, '-t', seed_hit_taboo_length, '-9', seed_generation_taboo_length, '-w', seed_window_length, '-o', max_hits_per_read, '-r', max_read_length, '-d', kmer, '-m', sw_match_value, '-i', sw_mismatch_value, '-g', sw_gap_open_ref, '-q', sw_gap_open_query, '-e', sw_gap_ext_ref, '-f', sw_gap_ext_query, '-h', sw_hit_threshold, query_fasta_end2, input_target_file, '>>', shrimp_outfile, '2>>', shrimp_log])
-
-        try:
-            os.system(command_end1)
-            os.system(command_end2)
-        except Exception, e:
-            if os.path.exists(query_fasta_end1): os.remove(query_fasta_end1)
-            if os.path.exists(query_fasta_end2): os.remove(query_fasta_end2)
-            if os.path.exists(query_qual_end1): os.remove(query_qual_end1)
-            if os.path.exists(query_qual_end2): os.remove(query_qual_end2)
-            stop_err(str(e))
-
-    # check SHRiMP output: count number of lines
-    num_hits = 0
-    if shrimp_outfile:
-        for i, line in enumerate(file(shrimp_outfile)):
-            line = line.rstrip('\r\n')
-            if not line or line.startswith('#'): continue
-            try:
-                fields = line.split()
-                num_hits += 1
-            except Exception, e:
-                stop_err(str(e))
-
-    if num_hits == 0:   # no hits generated
-        err_msg = ''
-        if shrimp_log:
-            for i, line in enumerate(file(shrimp_log)):
-                if line.startswith('error'):            # deal with memory error:
-                    err_msg += line                     # error: realloc failed: Cannot allocate memory
-                if re.search('Reads Matched', line):    # deal with zero hits
-                    if int(line[8:].split()[2]) == 0:
-                        err_msg = 'Zero hits found.\n'
-        stop_err('SHRiMP Failed due to:\n' + err_msg)
-
-    # convert to table
-    if type_of_reads == 'single':
-        return_value = generate_sub_table(shrimp_outfile, input_target_file, query_qual, table_outfile, hit_per_read, insertion_size)
-    else:
-        return_value = generate_sub_table(shrimp_outfile, input_target_file, query_qual_end1+','+query_qual_end2, table_outfile, hit_per_read, insertion_size)
-
-    # remove temp. files
-    if type_of_reads == 'single':
-        if os.path.exists(query_fasta): os.remove(query_fasta)
-        if os.path.exists(query_qual): os.remove(query_qual)
-    else:
-        if os.path.exists(query_fasta_end1): os.remove(query_fasta_end1)
-        if os.path.exists(query_fasta_end2): os.remove(query_fasta_end2)
-        if os.path.exists(query_qual_end1): os.remove(query_qual_end1)
-        if os.path.exists(query_qual_end2): os.remove(query_qual_end2)
-
-    if os.path.exists(shrimp_log): os.remove(shrimp_log)
-
-
-if __name__ == '__main__': __main__()
-
--- a/tools/metag_tools/shrimp_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,279 +0,0 @@
-<tool id="shrimp_wrapper" name="SHRiMP for Letter-space" version="1.0.0">
-  <description>reads mapping against reference sequence </description>
-  <command interpreter="python">
-    #if     ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $input_query
-    #elif   ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size
-    #elif   ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $input_query                                                              $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold
-    #elif   ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold
-    #end if#
-  </command>
-    <inputs>
-        <page>
-        <conditional name="type_of_reads">
-            <param name="single_or_paired" type="select" label="Single- or Paired-ends">
-                <option value="single">Single-end</option>
-                <option value="paired">Paired-end</option>
-            </param>
-            <when value="single">
-                <param name="input_query" type="data" format="fastqsolexa" label="Align sequencing reads" help="No dataset? Read tip below"/>
-            </when>
-            <when value="paired">
-                <param name="insertion_size" type="integer" size="5" value="600" label="Insertion length between two ends" help="bp" />
-                <param name="input1" type="data" format="fastqsolexa" label="Align sequencing reads, one end" />
-                <param name="input2" type="data" format="fastqsolexa" label="and the other end" />
-            </when>
-        </conditional>
-        <param name="input_target" type="data" format="fasta" label="against reference" />
-        <conditional name="param">
-            <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
-                <option value="skip">Commonly used</option>
-                <option value="full">Full Parameter List</option>
-            </param>
-            <when value="skip" />
-            <when value="full">
-                <param name="spaced_seed"                   type="text"     size="30"   value="111111011111"    label="Spaced Seed" />
-                <param name="seed_matches_per_window"       type="integer"  size="5"    value="2"               label="Seed Matches per Window" />
-                <param name="seed_hit_taboo_length"         type="integer"  size="5"    value="4"               label="Seed Hit Taboo Length" />
-                <param name="seed_generation_taboo_length"  type="integer"  size="5"    value="0"               label="Seed Generation Taboo Length" />
-                <param name="seed_window_length"            type="float"    size="10"   value="115.0"           label="Seed Window Length"          help="in percentage"/>
-                <param name="max_hits_per_read"             type="integer"  size="10"   value="100"             label="Maximum Hits per Read" />
-                <param name="max_read_length"               type="integer"  size="10"   value="1000"            label="Maximum Read Length" />
-                <param name="kmer"                          type="integer"  size="10"   value="-1"              label="Kmer Std. Deviation Limit"   help="-1 as None"/>
-                <param name="sw_match_value"                type="integer"  size="10"   value="100"             label="S-W Match Value" />
-                <param name="sw_mismatch_value"             type="integer"  size="10"   value="-150"            label="S-W Mismatch Value" />
-                <param name="sw_gap_open_ref"               type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Reference)" />
-                <param name="sw_gap_open_query"             type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Query)" />
-                <param name="sw_gap_ext_ref"                type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Reference)" />
-                <param name="sw_gap_ext_query"              type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Query)" />
-                <param name="sw_hit_threshold"              type="float"    size="10"   value="68.0"            label="S-W Hit Threshold"           help="in percentage"/>
-            </when>
-        </conditional>
-        </page>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular"/>
-        <data name="output2" format="tabular"/>
-    </outputs>
-    <requirements>
-      <requirement type="binary">rmapper-ls</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <param name="single_or_paired" value="single" />
-            <param name="skip_or_full" value="skip" />
-            <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
-            <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
-            <output name="output1" file="shrimp_wrapper_test1.out1" />
-        </test>
-        <!--
-        <test>
-            <param name="single_or_paired" value="paired" />
-            <param name="skip_or_full" value="skip" />
-            <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
-            <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa" />
-            <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa" />
-            <param name="insertion_size" value="600" />
-            <output name="output1" file="shrimp_wrapper_test2.out1" />
-        </test>
-        <test>
-            <param name="single_or_paired" value="single" />
-            <param name="skip_or_full" value="full" />
-            <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
-            <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
-            <param name="spaced_seed" value="111111011111" />
-            <param name="seed_matches_per_window" value="2" />
-            <param name="seed_hit_taboo_length" value="4" />
-            <param name="seed_generation_taboo_length" value="0" />
-            <param name="seed_window_length" value="115.0" />
-            <param name="max_hits_per_read" value="100" />
-            <param name="max_read_length" value="1000" />
-            <param name="kmer" value="-1" />
-            <param name="sw_match_value" value="100" />
-            <param name="sw_mismatch_value" value="-150" />
-            <param name="sw_gap_open_ref" value="-400" />
-            <param name="sw_gap_open_query" value="-400" />
-            <param name="sw_gap_ext_ref" value="-70" />
-            <param name="sw_gap_ext_query" value="-70" />
-            <param name="sw_hit_threshold" value="68.0" />
-            <output name="output1" file="shrimp_wrapper_test1.out1" />
-        </test>
-        <test>
-            <param name="single_or_paired" value="paired" />
-            <param name="skip_or_full" value="full" />
-            <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
-            <param name="spaced_seed" value="111111011111" />
-            <param name="seed_matches_per_window" value="2" />
-            <param name="seed_hit_taboo_length" value="4" />
-            <param name="seed_generation_taboo_length" value="0" />
-            <param name="seed_window_length" value="115.0" />
-            <param name="max_hits_per_read" value="100" />
-            <param name="max_read_length" value="1000" />
-            <param name="kmer" value="-1" />
-            <param name="sw_match_value" value="100" />
-            <param name="sw_mismatch_value" value="-150" />
-            <param name="sw_gap_open_ref" value="-400" />
-            <param name="sw_gap_open_query" value="-400" />
-            <param name="sw_gap_ext_ref" value="-70" />
-            <param name="sw_gap_ext_query" value="-70" />
-            <param name="sw_hit_threshold" value="68.0" />
-            <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa"/>
-            <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa"/>
-            <param name="insertion_size" value="600" />
-            <output name="output1" file="shrimp_wrapper_test2.out1" />
-        </test>
-        -->
-    </tests>
-<help>
-
-.. class:: warningmark
-
-IMPORTANT: This tool currently only supports data where the quality scores are integers or ASCII quality scores with base 64. Click pencil icon next to your dataset to set datatype to *fastqsolexa*.
-
-
------
-
-**What it does**
-
-SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome.
-
-This wrapper post-processes the default SHRiMP/rmapper-ls output and generates a table with all information from reads and reference for the mapping. The tool takes single- or paired-end reads. For single-end reads, only uniquely mapped alignment is considered. In paired-end reads, only pairs that meet the following criteria will be used to generate the table: 1). the ends fall within the insertion size; 2). the ends are mapped at the opposite directions. If there are still multiple mappings after applying the criteria, this paired-end read will be discarded.
-
-
------
-
-**Input formats**
-
-A multiple-fastq file, for example::
-
-    @seq1
-    TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
-    +seq1
-    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-
-
------
-
-**Outputs**
-
-The tool gives two outputs.
-
-**Table output**
-
-Table output contains 8 columns::
-
-     1     2        3       4     5     6     7     8
-  ----------------------------------------------------
-  chrM   14711     seq1     0     T     A    40     1
-  chrM   14712     seq1     1     T     T    40     1
-
-where::
-
-  1. (chrM)   - Reference sequence id
-  2. (14711)  - Position of the mapping in the reference
-  3. (seq1)   - Read id
-  4. (0)      - Position of the mapping in the read
-  5. (T)      - Nucleotide in the reference
-  6. (A)      - Nucleotide in the read
-  7. (40)     - Quality score for the nucleotide in the position of the read
-  8. (1)      - The number of times this position is covered by reads
-
-
-**SHRiMP output**
-
-This is the default output from SHRiMP/rmapper-ls::
-
-     1     2     3       4      5      6     7     8      9      10
-  -------------------------------------------------------------------
-   seq1  chrM    +     3644    3679    1    36     36    3600    36
-
-where::
-
-  1. (seq1)   - Read id
-  2. (chrM)   - Reference sequence id
-  3. (+)      - Strand of the read
-  4. (3466)   - Start position of the alignment in the reference
-  5. (3679)   - End position of the alignment in the reference
-  6. (1)      - Start position of the alignment in the read
-  7. (36)     - End position of the alignment in the read
-  8. (36)     - Length of the read
-  9. (3600)   - Score
- 10. (36)     - Edit string
-
-
------
-
-**SHRiMP parameter list**
-
-The commonly used parameters with default value setting::
-
-    -s    Spaced Seed                             (default: 111111011111)
-          The spaced seed is a single contiguous string of 0's and 1's.
-          0's represent wildcards, or positions which will always be
-          considered as matching, whereas 1's dictate positions that
-          must match. A string of all 1's will result in a simple kmer scan.
-    -n    Seed Matches per Window                 (default: 2)
-          The number of seed matches per window dictates how many seeds
-          must match within some window length of the genome before that
-          region is considered for Smith-Waterman alignment. A lower
-          value will increase sensitivity while drastically increasing
-          running time. Higher values will have the opposite effect.
-    -t    Seed Hit Taboo Length                   (default: 4)
-          The seed taboo length specifies how many target genome bases
-          or colors must exist prior to a previous seed match in order
-          to count another seed match as a hit.
-    -9    Seed Generation Taboo Length            (default: 0)
-
-    -w    Seed Window Length                      (default: 115.00%)
-          This parameter specifies the genomic span in bases (or colours)
-          in which *seed_matches_per_window* must exist before the read
-          is given consideration by the Simth-Waterman alignment machinery.
-    -o    Maximum Hits per Read                   (default: 100)
-          This parameter specifies how many hits to remember for each read.
-          If more hits are encountered, ones with lower scores are dropped
-          to make room.
-    -r    Maximum Read Length                     (default: 1000)
-          This parameter specifies the maximum length of reads that will
-          be encountered in the dataset. If larger reads than the default
-          are used, an appropriate value must be passed to *rmapper*.
-    -d    Kmer Std. Deviation Limit               (default: -1 [None])
-          This option permits pruning read kmers, which occur with
-          frequencies greater than *kmer_std_dev_limit* standard
-          deviations above the average. This can shorten running
-          time at the cost of some sensitivity.
-          *Note*: A negative value disables this option.
-    -m    S-W Match Value                         (default: 100)
-          The value applied to matches during the Smith-Waterman score calculation.
-    -i    S-W Mismatch Value                      (default: -150)
-          The value applied to mismatches during the Smith-Waterman
-          score calculation.
-    -g    S-W Gap Open Penalty (Reference)        (default: -400)
-          The value applied to gap opens along the reference sequence
-          during the Smith-Waterman score calculation.
-          *Note*: Note that for backward compatibility, if -g is set
-          and -q is not set, the gap open penalty for the query will
-          be set to the same value as specified for the reference.
-    -q    S-W Gap Open Penalty (Query)            (default: -400)
-          The value applied to gap opens along the query sequence during
-          the Smith-Waterman score calculation.
-    -e    S-W Gap Extend Penalty (Reference)      (default: -70)
-          The value applied to gap extends during the Smith-Waterman score calculation.
-          *Note*: Note that for backward compatibility, if -e is set
-          and -f is not set, the gap exten penalty for the query will
-          be set to the same value as specified for the reference.
-    -f    S-W Gap Extend Penalty (Query)          (default: -70)
-          The value applied to gap extends during the Smith-Waterman score calculation.
-    -h    S-W Hit Threshold                       (default: 68.00%)
-          In letter-space, this parameter determines the threshold
-          score for both vectored and full Smith-Waterman alignments.
-          Any values less than this quantity will be thrown away.
-          *Note* This option differs slightly in meaning between letter-space and color-space.
-
-
------
-
-**Reference**
-
- **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu.
-
-</help>
-</tool>
--- a/tools/metag_tools/split_paired_reads.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Split fixed length paired end reads
-"""
-
-import os, sys
-
-if __name__ == '__main__':
-
-    infile = sys.argv[1]
-    outfile_end1 = open(sys.argv[2], 'w')
-    outfile_end2 = open(sys.argv[3], 'w')
-
-    i = 0
-
-    for line in file( infile ):
-        line = line.rstrip()
-
-        if not line:
-            continue
-
-        end1 = ''
-        end2 = ''
-
-        line_index = i % 4
-
-        if line_index == 0:
-            end1 = line + '/1'
-            end2 = line + '/2'
-
-        elif line_index == 1:
-            seq_len = len(line)/2
-            end1 = line[0:seq_len]
-            end2 = line[seq_len:]
-
-        elif line_index == 2:
-            end1 = line + '/1'
-            end2 = line + '/2'
-
-        else:
-            qual_len = len(line)/2
-            end1 = line[0:qual_len]
-            end2 = line[qual_len:]
-
-        outfile_end1.write('%s\n' %(end1))
-        outfile_end2.write('%s\n' %(end2))
-
-        i += 1
-
-    if  i % 4 != 0  :
-        sys.stderr.write("WARNING: Number of lines in the input file was not divisible by 4.\nCheck consistency of the input fastq file.\n")
-    outfile_end1.close()
-    outfile_end2.close()
\ No newline at end of file
--- a/tools/metag_tools/split_paired_reads.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<tool id="split_paired_reads" name="Split paired end reads" version="1.0.0">
-  <description></description>
-  <command interpreter="python">
-    split_paired_reads.py $input $output1 $output2
-  </command>
-    <inputs>
-        <param name="input" type="data" format="fastqsanger" label="Your paired-end file" />
-    </inputs>
-    <outputs>
-        <data name="output1" format="fastqsanger"/>
-        <data name="output2" format="fastqsanger"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="input" value="3.fastqsanger" ftype="fastqsanger"/>
-            <output name="output1" file="split_pair_reads_1.fastqsanger" ftype="fastqsanger"/>
-            <output name="output2" file="split_pair_reads_2.fastqsanger" ftype="fastqsanger"/>
-        </test>
-    </tests>
-<help>
-
-**What it does**
-
-Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length.
-
------
-
-**Input formats**
-
-A multiple-fastq file, for example::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758
-    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758
-    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
-
-
------
-
-**Outputs**
-
-One end::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
-    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
-    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-
-The other end::
-
-    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
-    GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
-    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
-    hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
-
-</help>
-</tool>
--- a/tools/multivariate_stats/cca.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,159 +0,0 @@
-#!/usr/bin/env python
-
-from galaxy import eggs
-import sys, string
-from rpy import *
-import numpy
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-infile = sys.argv[1]
-x_cols = sys.argv[2].split(',')
-y_cols = sys.argv[3].split(',')
-
-x_scale = x_center = "FALSE"
-if sys.argv[4] == 'both':
-    x_scale = x_center = "TRUE"
-elif sys.argv[4] == 'center':
-    x_center = "TRUE"
-elif sys.argv[4] == 'scale':
-    x_scale = "TRUE"
-
-y_scale = y_center = "FALSE"
-if sys.argv[5] == 'both':
-    y_scale = y_center = "TRUE"
-elif sys.argv[5] == 'center':
-    y_center = "TRUE"
-elif sys.argv[5] == 'scale':
-    y_scale = "TRUE"
-
-std_scores = "FALSE"
-if sys.argv[6] == "yes":
-    std_scores = "TRUE"
-
-outfile = sys.argv[7]
-outfile2 = sys.argv[8]
-
-fout = open(outfile,'w')
-elems = []
-for i, line in enumerate( file ( infile )):
-    line = line.rstrip('\r\n')
-    if len( line )>0 and not line.startswith( '#' ):
-        elems = line.split( '\t' )
-        break
-    if i == 30:
-        break # Hopefully we'll never get here...
-
-if len( elems )<1:
-    stop_err( "The data in your input dataset is either missing or not formatted properly." )
-
-x_vals = []
-
-for k,col in enumerate(x_cols):
-    x_cols[k] = int(col)-1
-    x_vals.append([])
-
-y_vals = []
-
-for k,col in enumerate(y_cols):
-    y_cols[k] = int(col)-1
-    y_vals.append([])
-
-skipped = 0
-for ind,line in enumerate( file( infile )):
-    if line and not line.startswith( '#' ):
-        try:
-            fields = line.strip().split("\t")
-            valid_line = True
-            for col in x_cols+y_cols:
-                try:
-                    assert float(fields[col])
-                except:
-                    skipped += 1
-                    valid_line = False
-                    break
-            if valid_line:
-                for k,col in enumerate(x_cols):
-                    try:
-                        xval = float(fields[col])
-                    except:
-                        xval = NaN#
-                    x_vals[k].append(xval)
-                for k,col in enumerate(y_cols):
-                    try:
-                        yval = float(fields[col])
-                    except:
-                        yval = NaN#
-                    y_vals[k].append(yval)
-        except:
-            skipped += 1
-
-x_vals1 = numpy.asarray(x_vals).transpose()
-y_vals1 = numpy.asarray(y_vals).transpose()
-
-x_dat= r.list(array(x_vals1))
-y_dat= r.list(array(y_vals1))
-
-try:
-    r.suppressWarnings(r.library("yacca"))
-except:
-    stop_err("Missing R library yacca.")
-
-set_default_mode(NO_CONVERSION)
-try:
-    xcolnames = ["c%d" %(el+1) for el in x_cols]
-    ycolnames = ["c%d" %(el+1) for el in y_cols]
-    cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
-    ftest = r.F_test_cca(cc)
-except RException, rex:
-    stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
-
-set_default_mode(BASIC_CONVERSION)
-summary = r.summary(cc)
-
-ncomps = len(summary['corr'])
-comps = summary['corr'].keys()
-corr = summary['corr'].values()
-xlab = summary['xlab']
-ylab = summary['ylab']
-
-for i in range(ncomps):
-    corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
-
-ftest=ftest.as_py()
-print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
-print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
-print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
-
-print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for i,val in enumerate(summary['xcoef']):
-    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for i,val in enumerate(summary['ycoef']):
-    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for i,val in enumerate(summary['xstructcorr']):
-    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for i,val in enumerate(summary['ystructcorr']):
-    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for i,val in enumerate(summary['xcrosscorr']):
-    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for i,val in enumerate(summary['ycrosscorr']):
-    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
-
-r.pdf( outfile2, 8, 8 )
-#r.plot(cc)
-for i in range(ncomps):
-    r.helio_plot(cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
-r.dev_off()
\ No newline at end of file
--- a/tools/multivariate_stats/cca.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-<tool id="cca1" name="Canonical Correlation Analysis" version="1.0.0">
-  <description> </description>
-  <command interpreter="python">
-    cca.py
-      $input1
-      $x_cols
-      $y_cols
-      $x_scale
-      $y_scale
-      $std_scores
-      $out_file1
-      $out_file2
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="x_cols" label="Select columns containing X variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-    <param name="y_cols" label="Select columns containing Y variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-    <param name="x_scale" type="select" label="Type of Scaling for X variables" help="Can be used to center and/or scale variables">
-        <option value="none" selected="true">None</option>
-        <option value="center">Center only</option>
-        <option value="scale">Scale only</option>
-        <option value="both">Center and Scale</option>
-    </param>
-    <param name="y_scale" type="select" label="Type of Scaling for Y variables" help="Can be used to center and/or scale variables">
-        <option value="none" selected="true">None</option>
-        <option value="center">Center only</option>
-        <option value="scale">Scale only</option>
-        <option value="both">Center and Scale</option>
-    </param>
-    <param name="std_scores" type="select" label="Report standardized scores?" help="Selecting 'Yes' will rescale scores (and coefficients) to produce scores of unit variance">
-        <option value="no" selected="true">No</option>
-        <option value="yes">Yes</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-    <data format="pdf" name="out_file2" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-        <param name="input1" value="iris.tabular"/>
-        <param name="x_cols" value="3,4"/>
-        <param name="y_cols" value="1,2"/>
-        <param name="x_scale" value="both"/>
-        <param name="y_scale" value="scale"/>
-        <param name="std_scores" value="yes"/>
-        <output name="out_file1" file="cca_out1.tabular"/>
-        <output name="out_file2" file="cca_out2.pdf"/>
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool uses functions from 'yacca' library from R statistical package to perform Canonical Correlation Analysis (CCA) on the input data. It outputs two files, one containing the summary statistics of the performed CCA, and the other containing helioplots, which display structural loadings of X and Y variables on different canonical components.
-
-*Carter T. Butts (2009). yacca: Yet Another Canonical Correlation Analysis Package. R package version 1.1.*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- This tool currently treats all predictor and response variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results.
-
-- Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
-
-- The summary statistics in the output are described below:
-
-  - correlation: Canonical correlation between the canonical variates (i.e. transformed variables)
-  - F-statistic: F-value obtained from F Test for Canonical Correlations Using Rao's Approximation
-  - p-value: denotes significance of canonical correlations
-  - Coefficients: represent the coefficients of X and Y variables on each canonical variate
-  - Loadings: represent the correlations between the original variables in each set and their respective canonical variates
-  - CrossLoadings: represent the correlations between the original variables in each set and the opposite canonical variates
-
-  </help>
-</tool>
--- a/tools/multivariate_stats/kcca.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,146 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Run kernel CCA using kcca() from R 'kernlab' package
-
-usage: %prog [options]
-   -i, --input=i: Input file
-   -o, --output1=o: Summary output
-   -x, --x_cols=x: X-Variable columns
-   -y, --y_cols=y: Y-Variable columns
-   -k, --kernel=k: Kernel function
-   -f, --features=f: Number of canonical components to return
-   -s, --sigma=s: sigma
-   -d, --degree=d: degree
-   -l, --scale=l: scale
-   -t, --offset=t: offset
-   -r, --order=r: order
-
-usage: %prog input output1 x_cols y_cols kernel features sigma(or_None) degree(or_None) scale(or_None) offset(or_None) order(or_None)
-"""
-
-from galaxy import eggs
-import sys, string
-from rpy import *
-import numpy
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-#Parse Command Line
-options, args = doc_optparse.parse( __doc__ )
-#{'options= kernel': 'rbfdot', 'var_cols': '1,2,3,4', 'degree': 'None', 'output2': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_260.dat', 'output1': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_259.dat', 'scale': 'None', 'offset': 'None', 'input': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_256.dat', 'sigma': '1.0', 'order': 'None'}
-
-infile = options.input
-x_cols = options.x_cols.split(',')
-y_cols = options.y_cols.split(',')
-kernel = options.kernel
-outfile = options.output1
-ncomps = int(options.features)
-fout = open(outfile,'w')
-
-if ncomps < 1:
-    print "You chose to return '0' canonical components. Please try rerunning the tool with number of components = 1 or more."
-    sys.exit()
-elems = []
-for i, line in enumerate( file ( infile )):
-    line = line.rstrip('\r\n')
-    if len( line )>0 and not line.startswith( '#' ):
-        elems = line.split( '\t' )
-        break
-    if i == 30:
-        break # Hopefully we'll never get here...
-
-if len( elems )<1:
-    stop_err( "The data in your input dataset is either missing or not formatted properly." )
-
-x_vals = []
-for k,col in enumerate(x_cols):
-    x_cols[k] = int(col)-1
-    x_vals.append([])
-y_vals = []
-for k,col in enumerate(y_cols):
-    y_cols[k] = int(col)-1
-    y_vals.append([])
-NA = 'NA'
-skipped = 0
-for ind,line in enumerate( file( infile )):
-    if line and not line.startswith( '#' ):
-        try:
-            fields = line.strip().split("\t")
-            valid_line = True
-            for col in x_cols+y_cols:
-                try:
-                    assert float(fields[col])
-                except:
-                    skipped += 1
-                    valid_line = False
-                    break
-            if valid_line:
-                for k,col in enumerate(x_cols):
-                    try:
-                        xval = float(fields[col])
-                    except:
-                        xval = NaN#
-                    x_vals[k].append(xval)
-                for k,col in enumerate(y_cols):
-                    try:
-                        yval = float(fields[col])
-                    except:
-                        yval = NaN#
-                    y_vals[k].append(yval)
-        except:
-            skipped += 1
-
-x_vals1 = numpy.asarray(x_vals).transpose()
-y_vals1 = numpy.asarray(y_vals).transpose()
-
-x_dat= r.list(array(x_vals1))
-y_dat= r.list(array(y_vals1))
-
-try:
-    r.suppressWarnings(r.library('kernlab'))
-except:
-    stop_err('Missing R library kernlab')
-
-set_default_mode(NO_CONVERSION)
-if kernel=="rbfdot" or kernel=="anovadot":
-    pars = r.list(sigma=float(options.sigma))
-elif kernel=="polydot":
-    pars = r.list(degree=float(options.degree),scale=float(options.scale),offset=float(options.offset))
-elif kernel=="tanhdot":
-    pars = r.list(scale=float(options.scale),offset=float(options.offset))
-elif kernel=="besseldot":
-    pars = r.list(degree=float(options.degree),sigma=float(options.sigma),order=float(options.order))
-elif kernel=="anovadot":
-    pars = r.list(degree=float(options.degree),sigma=float(options.sigma))
-else:
-    pars = rlist()
-
-try:
-    kcc = r.kcca(x=x_dat, y=y_dat, kernel=kernel, kpar=pars, ncomps=ncomps)
-except RException, rex:
-    stop_err("Encountered error while performing kCCA on the input data: %s" %(rex))
-
-set_default_mode(BASIC_CONVERSION)
-kcor = r.kcor(kcc)
-if ncomps == 1:
-    kcor = [kcor]
-xcoef = r.xcoef(kcc)
-ycoef = r.ycoef(kcc)
-
-print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-
-print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in kcor]))
-
-print >>fout, "#Estimated X-coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for obs,val in enumerate(xcoef):
-    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#Estimated Y-coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for obs,val in enumerate(ycoef):
-    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in val]))
--- a/tools/multivariate_stats/kcca.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,150 +0,0 @@
-<tool id="kcca1" name="Kernel Canonical Correlation Analysis" version="1.0.0">
-  <description> </description>
-  <command interpreter="python">
-    kcca.py
-      --input=$input1
-      --output1=$out_file1
-      --x_cols=$x_cols
-      --y_cols=$y_cols
-      --kernel=$kernelChoice.kernel
-      --features=$features
-      #if $kernelChoice.kernel == "rbfdot" or $kernelChoice.kernel == "anovadot":
-      --sigma=$kernelChoice.sigma
-      --degree="None"
-      --scale="None"
-      --offset="None"
-      --order="None"
-      #elif $kernelChoice.kernel == "polydot":
-      --sigma="None"
-      --degree=$kernelChoice.degree
-      --scale=$kernelChoice.scale
-      --offset=$kernelChoice.offset
-      --order="None"
-      #elif $kernelChoice.kernel == "tanhdot":
-      --sigma="None"
-      --degree="None"
-      --scale=$kernelChoice.scale
-      --offset=$kernelChoice.offset
-      --order="None"
-      #elif $kernelChoice.kernel == "besseldot":
-      --sigma=$kernelChoice.sigma
-      --degree=$kernelChoice.degree
-      --scale="None"
-      --offset="None"
-      --order=$kernelChoice.order
-      #elif $kernelChoice.kernel == "anovadot":
-      --sigma=$kernelChoice.sigma
-      --degree=$kernelChoice.degree
-      --scale="None"
-      --offset="None"
-      --order="None"
-      #else:
-      --sigma="None"
-      --degree="None"
-      --scale="None"
-      --offset="None"
-      --order="None"
-      #end if
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="x_cols" label="Select columns containing X variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-    <param name="y_cols" label="Select columns containing Y variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-    <param name="features" size="10" type="integer" value="2" label="Number of canonical components to return" help="Enter an integer value greater than 0"/>
-    <conditional name="kernelChoice">
-        <param name="kernel" type="select" label="Kernel function">
-            <option value="rbfdot" selected="true">Gaussian Radial Basis Function</option>
-            <option value="polydot">Polynomial</option>
-            <option value="vanilladot">Linear</option>
-            <option value="tanhdot">Hyperbolic</option>
-            <option value="laplacedot">Laplacian</option>
-            <option value="besseldot">Bessel</option>
-            <option value="anovadot">ANOVA Radial Basis Function</option>
-            <option value="splinedot">Spline</option>
-        </param>
-        <when value="vanilladot" />
-        <when value="splinedot" />
-        <when value="rbfdot">
-            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
-        </when>
-        <when value="laplacedot">
-            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
-        </when>
-        <when value="polydot">
-            <param name="degree" size="10" type="float" value="1" label="degree" />
-            <param name="scale" size="10" type="float" value="1" label="scale" />
-            <param name="offset" size="10" type="float" value="1" label="offset" />
-        </when>
-        <when value="tanhdot">
-            <param name="scale" size="10" type="float" value="1" label="scale" />
-            <param name="offset" size="10" type="float" value="1" label="offset" />
-        </when>
-        <when value="besseldot">
-            <param name="sigma" size="10" type="float" value="1" label="sigma" />
-            <param name="order" size="10" type="float" value="1" label="order" />
-            <param name="degree" size="10" type="float" value="1" label="degree" />
-        </when>
-        <when value="anovadot">
-            <param name="sigma" size="10" type="float" value="1" label="sigma" />
-            <param name="degree" size="10" type="float" value="1" label="degree" />
-        </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-        <param name="input1" value="iris.tabular"/>
-        <param name="x_cols" value="1,2"/>
-        <param name="y_cols" value="3,4"/>
-        <param name="kernel" value="anovadot"/>
-        <param name="features" value="4"/>
-        <param name="sigma" value="0.1"/>
-        <param name="degree" value="2"/>
-        <output name="out_file1" file="kcca_out1.tabular" compare="re_match"/>
-    </test>
-    <test>
-        <param name="input1" value="iris.tabular"/>
-        <param name="x_cols" value="3,4"/>
-        <param name="y_cols" value="1,2"/>
-        <param name="kernel" value="rbfdot"/>
-        <param name="features" value="2"/>
-        <param name="sigma" value="0.5"/>
-        <output name="out_file1" file="kcca_out2.tabular" compare="re_match"/>
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool uses functions from 'kernlab' library from R statistical package to perform Kernel Canonical Correlation Analysis (kCCA) on the input data.
-
-*Alexandros Karatzoglou, Alex Smola, Kurt Hornik, Achim Zeileis (2004). kernlab - An S4 Package for Kernel Methods in R. Journal of Statistical Software 11(9), 1-20. URL http://www.jstatsoft.org/v11/i09/*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
-
-  </help>
-</tool>
--- a/tools/multivariate_stats/kpca.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Run kernel PCA using kpca() from R 'kernlab' package
-
-usage: %prog [options]
-   -i, --input=i: Input file
-   -o, --output1=o: Summary output
-   -p, --output2=p: Figures output
-   -c, --var_cols=c: Variable columns
-   -k, --kernel=k: Kernel function
-   -f, --features=f: Number of principal components to return
-   -s, --sigma=s: sigma
-   -d, --degree=d: degree
-   -l, --scale=l: scale
-   -t, --offset=t: offset
-   -r, --order=r: order
-
-usage: %prog input output1 output2 var_cols kernel features sigma(or_None) degree(or_None) scale(or_None) offset(or_None) order(or_None)
-"""
-
-from galaxy import eggs
-import sys, string
-from rpy import *
-import numpy
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-#Parse Command Line
-options, args = doc_optparse.parse( __doc__ )
-#{'options= kernel': 'rbfdot', 'var_cols': '1,2,3,4', 'degree': 'None', 'output2': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_260.dat', 'output1': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_259.dat', 'scale': 'None', 'offset': 'None', 'input': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_256.dat', 'sigma': '1.0', 'order': 'None'}
-
-infile = options.input
-x_cols = options.var_cols.split(',')
-kernel = options.kernel
-outfile = options.output1
-outfile2 = options.output2
-ncomps = int(options.features)
-fout = open(outfile,'w')
-
-elems = []
-for i, line in enumerate( file ( infile )):
-    line = line.rstrip('\r\n')
-    if len( line )>0 and not line.startswith( '#' ):
-        elems = line.split( '\t' )
-        break
-    if i == 30:
-        break # Hopefully we'll never get here...
-
-if len( elems )<1:
-    stop_err( "The data in your input dataset is either missing or not formatted properly." )
-
-x_vals = []
-
-for k,col in enumerate(x_cols):
-    x_cols[k] = int(col)-1
-    x_vals.append([])
-
-NA = 'NA'
-skipped = 0
-for ind,line in enumerate( file( infile )):
-    if line and not line.startswith( '#' ):
-        try:
-            fields = line.strip().split("\t")
-            for k,col in enumerate(x_cols):
-                try:
-                    xval = float(fields[col])
-                except:
-                    #xval = r('NA')
-                    xval = NaN#
-                x_vals[k].append(xval)
-        except:
-            skipped += 1
-
-x_vals1 = numpy.asarray(x_vals).transpose()
-dat= r.list(array(x_vals1))
-
-try:
-    r.suppressWarnings(r.library('kernlab'))
-except:
-    stop_err('Missing R library kernlab')
-
-set_default_mode(NO_CONVERSION)
-if kernel=="rbfdot" or kernel=="anovadot":
-    pars = r.list(sigma=float(options.sigma))
-elif kernel=="polydot":
-    pars = r.list(degree=float(options.degree),scale=float(options.scale),offset=float(options.offset))
-elif kernel=="tanhdot":
-    pars = r.list(scale=float(options.scale),offset=float(options.offset))
-elif kernel=="besseldot":
-    pars = r.list(degree=float(options.degree),sigma=float(options.sigma),order=float(options.order))
-elif kernel=="anovadot":
-    pars = r.list(degree=float(options.degree),sigma=float(options.sigma))
-else:
-    pars = r.list()
-
-try:
-    kpc = r.kpca(x=r.na_exclude(dat), kernel=kernel, kpar=pars, features=ncomps)
-except RException, rex:
-    stop_err("Encountered error while performing kPCA on the input data: %s" %(rex))
-set_default_mode(BASIC_CONVERSION)
-
-eig = r.eig(kpc)
-pcv = r.pcv(kpc)
-rotated = r.rotated(kpc)
-
-comps = eig.keys()
-eigv = eig.values()
-for i in range(ncomps):
-    eigv[comps.index('Comp.%s' %(i+1))] = eig.values()[i]
-
-print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-
-print >>fout, "#Eigenvalue\t%s" %("\t".join(["%.4g" % el for el in eig.values()]))
-
-print >>fout, "#Principal component vectors\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for obs,val in enumerate(pcv):
-    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#Rotated values\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-for obs,val in enumerate(rotated):
-    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in val]))
-
-r.pdf( outfile2, 8, 8 )
-if ncomps != 1:
-    r.pairs(rotated,labels=r.list(range(1,ncomps+1)),main="Scatterplot of rotated values")
-else:
-    r.plot(rotated, ylab='Comp.1', main="Scatterplot of rotated values")
-r.dev_off()
\ No newline at end of file
--- a/tools/multivariate_stats/kpca.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,140 +0,0 @@
-<tool id="kpca1" name="Kernel Principal Component Analysis" version="1.0.0">
-  <description> </description>
-  <command interpreter="python">
-    kpca.py
-      --input=$input1
-      --output1=$out_file1
-      --output2=$out_file2
-      --var_cols=$var_cols
-      --kernel=$kernelChoice.kernel
-      --features=$features
-      #if $kernelChoice.kernel == "rbfdot" or $kernelChoice.kernel == "anovadot":
-      --sigma=$kernelChoice.sigma
-      --degree="None"
-      --scale="None"
-      --offset="None"
-      --order="None"
-      #elif $kernelChoice.kernel == "polydot":
-      --sigma="None"
-      --degree=$kernelChoice.degree
-      --scale=$kernelChoice.scale
-      --offset=$kernelChoice.offset
-      --order="None"
-      #elif $kernelChoice.kernel == "tanhdot":
-      --sigma="None"
-      --degree="None"
-      --scale=$kernelChoice.scale
-      --offset=$kernelChoice.offset
-      --order="None"
-      #elif $kernelChoice.kernel == "besseldot":
-      --sigma=$kernelChoice.sigma
-      --degree=$kernelChoice.degree
-      --scale="None"
-      --offset="None"
-      --order=$kernelChoice.order
-      #elif $kernelChoice.kernel == "anovadot":
-      --sigma=$kernelChoice.sigma
-      --degree=$kernelChoice.degree
-      --scale="None"
-      --offset="None"
-      --order="None"
-      #else:
-      --sigma="None"
-      --degree="None"
-      --scale="None"
-      --offset="None"
-      --order="None"
-      #end if
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="var_cols" label="Select columns containing input variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-    <param name="features" size="10" type="integer" value="2" label="Number of principal components to return" help="To return all, enter 0"/>
-    <conditional name="kernelChoice">
-        <param name="kernel" type="select" label="Kernel function">
-            <option value="rbfdot" selected="true">Gaussian Radial Basis Function</option>
-            <option value="polydot">Polynomial</option>
-            <option value="vanilladot">Linear</option>
-            <option value="tanhdot">Hyperbolic</option>
-            <option value="laplacedot">Laplacian</option>
-            <option value="besseldot">Bessel</option>
-            <option value="anovadot">ANOVA Radial Basis Function</option>
-            <option value="splinedot">Spline</option>
-        </param>
-        <when value="vanilladot" />
-        <when value="splinedot" />
-        <when value="rbfdot">
-            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
-        </when>
-        <when value="laplacedot">
-            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
-        </when>
-        <when value="polydot">
-            <param name="degree" size="10" type="integer" value="1" label="degree" />
-            <param name="scale" size="10" type="integer" value="1" label="scale" />
-            <param name="offset" size="10" type="integer" value="1" label="offset" />
-        </when>
-        <when value="tanhdot">
-            <param name="scale" size="10" type="integer" value="1" label="scale" />
-            <param name="offset" size="10" type="integer" value="1" label="offset" />
-        </when>
-        <when value="besseldot">
-            <param name="sigma" size="10" type="integer" value="1" label="sigma" />
-            <param name="order" size="10" type="integer" value="1" label="order" />
-            <param name="degree" size="10" type="integer" value="1" label="degree" />
-        </when>
-        <when value="anovadot">
-            <param name="sigma" size="10" type="integer" value="1" label="sigma" />
-            <param name="degree" size="10" type="integer" value="1" label="degree" />
-        </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-    <data format="pdf" name="out_file2" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-        <param name="input1" value="iris.tabular"/>
-        <param name="var_cols" value="1,2,3,4"/>
-        <param name="kernel" value="polydot"/>
-        <param name="features" value="2"/>
-        <param name="offset" value="0"/>
-        <param name="scale" value="1"/>
-        <param name="degree" value="2"/>
-        <output name="out_file1" file="kpca_out1.tabular"/>
-        <output name="out_file2" file="kpca_out2.pdf"/>
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool uses functions from 'kernlab' library from R statistical package to perform Kernel Principal Component Analysis (kPCA) on the input data. It outputs two files, one containing the summary statistics of the performed kPCA, and the other containing a scatterplot matrix of rotated values reported by kPCA.
-
-*Alexandros Karatzoglou, Alex Smola, Kurt Hornik, Achim Zeileis (2004). kernlab - An S4 Package for Kernel Methods in R. Journal of Statistical Software 11(9), 1-20. URL http://www.jstatsoft.org/v11/i09/*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
-
-  </help>
-</tool>
--- a/tools/multivariate_stats/pca.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-#!/usr/bin/env python
-
-from galaxy import eggs
-import sys, string
-from rpy import *
-import numpy
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-infile = sys.argv[1]
-x_cols = sys.argv[2].split(',')
-method = sys.argv[3]
-outfile = sys.argv[4]
-outfile2 = sys.argv[5]
-
-if method == 'svd':
-    scale = center = "FALSE"
-    if sys.argv[6] == 'both':
-        scale = center = "TRUE"
-    elif sys.argv[6] == 'center':
-        center = "TRUE"
-    elif sys.argv[6] == 'scale':
-        scale = "TRUE"
-
-fout = open(outfile,'w')
-elems = []
-for i, line in enumerate( file ( infile )):
-    line = line.rstrip('\r\n')
-    if len( line )>0 and not line.startswith( '#' ):
-        elems = line.split( '\t' )
-        break
-    if i == 30:
-        break # Hopefully we'll never get here...
-
-if len( elems )<1:
-    stop_err( "The data in your input dataset is either missing or not formatted properly." )
-
-x_vals = []
-
-for k,col in enumerate(x_cols):
-    x_cols[k] = int(col)-1
-    x_vals.append([])
-
-NA = 'NA'
-skipped = 0
-for ind,line in enumerate( file( infile )):
-    if line and not line.startswith( '#' ):
-        try:
-            fields = line.strip().split("\t")
-            valid_line = True
-            for k,col in enumerate(x_cols):
-                try:
-                    xval = float(fields[col])
-                except:
-                    skipped += 1
-                    valid_line = False
-                    break
-            if valid_line:
-                for k,col in enumerate(x_cols):
-                    xval = float(fields[col])
-                    x_vals[k].append(xval)
-        except:
-            skipped += 1
-
-x_vals1 = numpy.asarray(x_vals).transpose()
-dat= r.list(array(x_vals1))
-
-set_default_mode(NO_CONVERSION)
-try:
-    if method == "cor":
-        pc = r.princomp(r.na_exclude(dat), cor = r("TRUE"))
-    elif method == "cov":
-        pc = r.princomp(r.na_exclude(dat), cor = r("FALSE"))
-    elif method=="svd":
-        pc = r.prcomp(r.na_exclude(dat), center = r(center), scale = r(scale))
-except RException, rex:
-    stop_err("Encountered error while performing PCA on the input data: %s" %(rex))
-
-set_default_mode(BASIC_CONVERSION)
-summary = r.summary(pc, loadings="TRUE")
-ncomps = len(summary['sdev'])
-
-if type(summary['sdev']) == type({}):
-    comps_unsorted = summary['sdev'].keys()
-    comps=[]
-    sd = summary['sdev'].values()
-    for i in range(ncomps):
-        sd[i] = summary['sdev'].values()[comps_unsorted.index('Comp.%s' %(i+1))]
-        comps.append('Comp.%s' %(i+1))
-elif type(summary['sdev']) == type([]):
-    comps=[]
-    for i in range(ncomps):
-        comps.append('Comp.%s' %(i+1))
-        sd = summary['sdev']
-
-print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-print >>fout, "#Std. deviation\t%s" %("\t".join(["%.4g" % el for el in sd]))
-total_var = 0
-vars = []
-for s in sd:
-    var = s*s
-    total_var += var
-    vars.append(var)
-for i,var in enumerate(vars):
-    vars[i] = vars[i]/total_var
-
-print >>fout, "#Proportion of variance explained\t%s" %("\t".join(["%.4g" % el for el in vars]))
-
-print >>fout, "#Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-xcolnames = ["c%d" %(el+1) for el in x_cols]
-if 'loadings' in summary: #in case of princomp
-    loadings = 'loadings'
-elif 'rotation' in summary: #in case of prcomp
-    loadings = 'rotation'
-for i,val in enumerate(summary[loadings]):
-    print >>fout, "%s\t%s" %(xcolnames[i], "\t".join(["%.4g" % el for el in val]))
-
-print >>fout, "#Scores\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
-if 'scores' in summary: #in case of princomp
-    scores = 'scores'
-elif 'x' in summary: #in case of prcomp
-    scores = 'x'
-for obs,sc in enumerate(summary[scores]):
-    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in sc]))
-
-r.pdf( outfile2, 8, 8 )
-r.biplot(pc)
-r.dev_off()
\ No newline at end of file
--- a/tools/multivariate_stats/pca.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-<tool id="pca1" name="Principal Component Analysis" version="1.0.2">
-  <description> </description>
-  <command interpreter="python">
-    pca.py
-      $input1
-      $var_cols
-      $methodChoice.method
-      $out_file1
-      $out_file2
-      #if $methodChoice.method == "svd":
-      $methodChoice.scale
-      #end if
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="var_cols" label="Select columns containing input variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-    <conditional name="methodChoice">
-        <param name="method" type="select" label="Method" help="The correlation matrix can only be used if there are no constant variables">
-            <option value="cor" selected="true">Eigenvectors of Correlation (princomp)</option>
-            <option value="cov">Eigenvectors of Covariance (princomp)</option>
-            <option value="svd">Singular Value Decomposition (prcomp)</option>
-        </param>
-        <when value="cor" />
-        <when value="cov" />
-        <when value="svd">
-            <param name="scale" type="select" label="Centering and Scaling" help="Can be used to center and/or scale variables">
-                <option value="none" selected="true">None</option>
-                <option value="center">Center only</option>
-                <option value="scale">Scale only</option>
-                <option value="both">Center and Scale</option>
-            </param>
-        </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-    <data format="pdf" name="out_file2" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-        <param name="input1" value="iris.tabular"/>
-        <param name="var_cols" value="1,2,3,4"/>
-        <param name="method" value="cor"/>
-        <output name="out_file1" file="pca_out1.tabular"/>
-        <output name="out_file2" file="pca_out2.pdf"/>
-    </test>
-    <test>
-        <param name="input1" value="iris.tabular"/>
-        <param name="var_cols" value="1,2,3,4"/>
-        <param name="method" value="cov"/>
-        <output name="out_file1" file="pca_out3.tabular"/>
-        <output name="out_file2" file="pca_out4.pdf"/>
-    </test>
-    <test>
-        <param name="input1" value="iris.tabular"/>
-        <param name="var_cols" value="1,2,3,4"/>
-        <param name="method" value="svd"/>
-        <param name="scale" value="both"/>
-        <output name="out_file1" file="pca_out5.tabular"/>
-        <output name="out_file2" file="pca_out6.pdf"/>
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool performs Principal Component Analysis on the given numeric input data using functions from R statistical package - 'princomp' function (for Eigenvector based solution) and 'prcomp' function (for Singular value decomposition based solution). It outputs two files, one containing the summary statistics of PCA, and the other containing biplots of the observations and principal components.
-
-*R Development Core Team (2009). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
-
-- The summary statistics in the output are described below:
-
-  - Std. deviation: Standard deviations of the principal components
-  - Loadings: a list of eigen-vectors/variable loadings
-  - Scores: Scores of the input data on the principal components
-
-  </help>
-</tool>
--- a/tools/mutation/visualize.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,391 +0,0 @@
-#!/usr/bin/env python
-
-'''
-Mutation Visualizer tool
-'''
-
-from __future__ import division
-
-import sys, csv, os, math
-import optparse
-
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "SVGFig" )
-import svgfig as svg
-
-
-SVGPan = """
-/**
- *  SVGPan library 1.2
- * ====================
- *
- * Given an unique existing element with id "viewport", including the
- * the library into any SVG adds the following capabilities:
- *
- *  - Mouse panning
- *  - Mouse zooming (using the wheel)
- *  - Object dargging
- *
- * Known issues:
- *
- *  - Zooming (while panning) on Safari has still some issues
- *
- * Releases:
- *
- * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
- *      Fixed a bug with browser mouse handler interaction
- *
- * 1.1, Wed Feb  3 17:39:33 GMT 2010, Zeng Xiaohui
- *      Updated the zoom code to support the mouse wheel on Safari/Chrome
- *
- * 1.0, Andrea Leofreddi
- *      First release
- *
- * This code is licensed under the following BSD license:
- *
- * Copyright 2009-2010 Andrea Leofreddi (a.leofreddi@itcharm.com). All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification, are
- * permitted provided that the following conditions are met:
- *
- *    1. Redistributions of source code must retain the above copyright notice, this list of
- *       conditions and the following disclaimer.
- *
- *    2. Redistributions in binary form must reproduce the above copyright notice, this list
- *       of conditions and the following disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those of the
- * authors and should not be interpreted as representing official policies, either expressed
- * or implied, of Andrea Leofreddi.
- */
-
-var root = document.documentElement;
-
-var state = 'none', stateTarget, stateOrigin, stateTf;
-
-setupHandlers(root);
-
-/**
- * Register handlers
- */
-function setupHandlers(root){
-        setAttributes(root, {
-                "onmouseup" : "add(evt)",
-                "onmousedown" : "handleMouseDown(evt)",
-                "onmousemove" : "handleMouseMove(evt)",
-                "onmouseup" : "handleMouseUp(evt)",
-                //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
-        });
-
-        if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
-                window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
-        else
-                window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
-}
-
-/**
- * Instance an SVGPoint object with given event coordinates.
- */
-function getEventPoint(evt) {
-        var p = root.createSVGPoint();
-
-        p.x = evt.clientX;
-        p.y = evt.clientY;
-
-        return p;
-}
-
-/**
- * Sets the current transform matrix of an element.
- */
-function setCTM(element, matrix) {
-        var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
-
-        element.setAttribute("transform", s);
-}
-
-/**
- * Dumps a matrix to a string (useful for debug).
- */
-function dumpMatrix(matrix) {
-        var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\\n  " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\\n  0, 0, 1 ]";
-
-        return s;
-}
-
-/**
- * Sets attributes of an element.
- */
-function setAttributes(element, attributes){
-        for (i in attributes)
-                element.setAttributeNS(null, i, attributes[i]);
-}
-
-/**
- * Handle mouse move event.
- */
-function handleMouseWheel(evt) {
-        if(evt.preventDefault)
-                evt.preventDefault();
-
-        evt.returnValue = false;
-
-        var svgDoc = evt.target.ownerDocument;
-
-        var delta;
-
-        if(evt.wheelDelta)
-                delta = evt.wheelDelta / 3600; // Chrome/Safari
-        else
-                delta = evt.detail / -90; // Mozilla
-
-        var z = 1 + delta; // Zoom factor: 0.9/1.1
-
-        var g = svgDoc.getElementById("viewport");
-
-        var p = getEventPoint(evt);
-
-        p = p.matrixTransform(g.getCTM().inverse());
-
-        // Compute new scale matrix in current mouse position
-        var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
-
-        setCTM(g, g.getCTM().multiply(k));
-
-        stateTf = stateTf.multiply(k.inverse());
-}
-
-/**
- * Handle mouse move event.
- */
-function handleMouseMove(evt) {
-        if(evt.preventDefault)
-                evt.preventDefault();
-
-        evt.returnValue = false;
-
-        var svgDoc = evt.target.ownerDocument;
-
-        var g = svgDoc.getElementById("viewport");
-
-        if(state == 'pan') {
-                // Pan mode
-                var p = getEventPoint(evt).matrixTransform(stateTf);
-
-                setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
-        } else if(state == 'move') {
-                // Move mode
-                var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
-
-                setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
-
-                stateOrigin = p;
-        }
-}
-
-/**
- * Handle click event.
- */
-function handleMouseDown(evt) {
-        if(evt.preventDefault)
-                evt.preventDefault();
-
-        evt.returnValue = false;
-
-        var svgDoc = evt.target.ownerDocument;
-
-        var g = svgDoc.getElementById("viewport");
-
-        if(evt.target.tagName == "svg") {
-                // Pan mode
-                state = 'pan';
-
-                stateTf = g.getCTM().inverse();
-
-                stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
-        }
-        /*else {
-                // Move mode
-                state = 'move';
-
-                stateTarget = evt.target;
-
-                stateTf = g.getCTM().inverse();
-
-                stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
-        }*/
-}
-/**
- * Handle mouse button release event.
- */
-function handleMouseUp(evt) {
-        if(evt.preventDefault)
-                evt.preventDefault();
-
-        evt.returnValue = false;
-
-        var svgDoc = evt.target.ownerDocument;
-
-        if(state == 'pan' || state == 'move') {
-                // Quit pan mode
-                state = '';
-        }
-}
-"""
-
-COLS_PER_SAMPLE = 7
-HEADER_COLS = 4
-
-HEIGHT = 6
-WIDTH = 12
-BAR_WIDTH = 1.5
-GAP = 2
-
-
-colors = {'A':'blue', 'C':'green', 'G':'orange', 'T':'red'}
-bases = ['A', 'C', 'G', 'T' ]
-
-def stop_error(message):
-    print >> sys.stderr, message
-    sys.exit(1)
-
-def validate_bases(n_a, n_c, n_g, n_t, total):
-    if n_a > total:
-        return 'A'
-    elif n_c > total:
-        return 'C'
-    elif n_g > total:
-        return 'G'
-    elif n_t > total:
-        return 'T'
-    return None
-
-def main(opts, args):
-    s = svg.SVG('g', id='viewport')
-
-    # display legend
-    for i, b in enumerate( bases ):
-        bt = svg.SVG("tspan", b, style="font-family:Verdana;font-size:20%")
-        s.append(svg.SVG("text", bt, x=12+(i*10), y=3, stroke="none", fill="black"))
-        s.append(svg.SVG("rect", x=14+(i*10), y=0, width=4, height=3,
-                         stroke="none", fill=colors[b], fill_opacity=0.5))
-
-    reader = open(opts.input_file, 'U')
-
-    samples = []
-    for i in range(int(len(args)/3)):
-        index = i*3
-        samples.append(dict(name=args[index],
-                            a_col=args[index+1],
-                            totals_col=args[index+2]))
-
-    if opts.zoom == 'interactive':
-        y = 35
-    else:
-        y = 25
-    for i, sample in enumerate(samples):
-        x = 23+(i*(WIDTH+GAP))
-        t = svg.SVG("text", svg.SVG("tspan", sample['name'], style="font-family:Verdana;font-size:25%"),
-                    x=x, y=y, transform="rotate(-90 %i,%i)" % (x, y), stroke="none", fill="black")
-        s.append(t)
-
-    count=1
-    for line in reader:
-        row = line.split('\t')
-        highlighted_position = False
-        show_pos = True
-        position = row[int(opts.position_col)-1]
-        ref = row[int(opts.ref_col)-1].strip().upper()
-        # validate
-        if ref not in bases:
-            stop_error( "The reference column (col%s) contains invalid character '%s' at row %i of the dataset." % ( opts.ref_col, ref, count ) )
-        # display positions
-        if opts.zoom == 'interactive':
-            textx = 0
-        else:
-            textx = 7
-        bt = svg.SVG("tspan", str(position), style="font-family:Verdana;font-size:25%")
-        s.append(svg.SVG("text", bt, x=textx, y=34+(count*(HEIGHT+GAP)), stroke="none", fill="black"))
-        s.append(svg.SVG("rect", x=0, y=30+(count*(HEIGHT+GAP)), width=14, height=HEIGHT,
-                         stroke='none', fill=colors[ref.upper()], fill_opacity=0.2))
-
-        for sample_index, sample in enumerate(samples):
-            n_a = int(row[int(sample['a_col'])-1])
-            n_c = int(row[int(sample['a_col'])+1-1])
-            n_g = int(row[int(sample['a_col'])+2-1])
-            n_t = int(row[int(sample['a_col'])+3-1])
-            total = int(row[int(sample['totals_col'])-1])
-            # validate
-            base_error = validate_bases(n_a, n_c, n_g, n_t, total)
-            if base_error:
-                stop_error("For sample %i (%s), the number of base %s reads is more than the coverage on row %i." % (sample_index+1,
-                                                                                                                     sample['name'],
-                                                                                                                     base_error,
-                                                                                                                     count))
-
-            if total:
-                x = 16+(sample_index*(WIDTH+GAP))
-                y = 30+(count*(HEIGHT+GAP))
-                width = WIDTH
-                height = HEIGHT
-                if count%2:
-                    s.append(svg.SVG("rect", x=x, y=y, width=width, height=height,
-                                     stroke='none', fill='grey', fill_opacity=0.25))
-                else:
-                    s.append(svg.SVG("rect", x=x, y=y, width=width, height=height,
-                                     stroke='none', fill='grey', fill_opacity=0.25))
-
-                for base, value in enumerate([n_a, n_c, n_g, n_t]):
-                    width = int(math.ceil(value / total * WIDTH))
-                    s.append(svg.SVG("rect", x=x, y=y, width=width, height=BAR_WIDTH,
-                                     stroke='none', fill=colors[bases[base]], fill_opacity=0.6))
-                    y = y + BAR_WIDTH
-
-        count=count+1
-
-    if opts.zoom == 'interactive':
-        canv = svg.canvas(s)
-        canv.save(opts.output_file)
-        import fileinput
-        flag = False
-        for line in fileinput.input(opts.output_file, inplace=1):
-            if line.startswith('<svg'):
-                print '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">'
-                flag = True
-                continue
-            else:
-                if flag:
-                    print '<script type="text/javascript">%s</script>' % SVGPan
-                flag = False
-            print line,
-    else:
-        zoom = int(opts.zoom)
-        w = "%ipx" % (x*(10+zoom))
-        h = "%ipx" % (y*(2+zoom))
-        canv = svg.canvas(s, width=w, height=h, viewBox="0 0 %i %i" %(x+100, y+100))
-        canv.save(opts.output_file)
-
-if __name__ == '__main__':
-    parser = optparse.OptionParser()
-    parser.add_option('-i', '--input-file', dest='input_file', action='store')
-    parser.add_option('-o', '--output-file', dest='output_file', action='store')
-    parser.add_option('-z', '--zoom', dest='zoom', action='store', default='1')
-    parser.add_option('-p', '--position_col', dest='position_col', action='store', default='c0')
-    parser.add_option('-r', '--ref_col', dest='ref_col', action='store', default='c1')
-    (opts, args) = parser.parse_args()
-    main(opts, args)
-    sys.exit(1)
-
-
\ No newline at end of file
--- a/tools/mutation/visualize.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-<tool id="mutation_visualize" name="Mutation Visualization" version="1.0.0">
-  <description></description>
-  <command interpreter="python">
-    visualize.py
-        --input-file=$input1
-        --output-file=$out_file1
-        --zoom=$zoom_value
-        --position_col=$position_col
-        --ref_col=$ref_col
-        #for $f in $sample_chooser:
-            "${f.name}"
-            ${f.a_col}
-            ${f.totals_col}
-        #end for
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Compare sequences in"></param>
-    <param name="position_col" type="data_column" data_ref="input1" multiple="false" numerical="false" label="Position Column" help="" />
-    <param name="ref_col" type="data_column" data_ref="input1" multiple="false" numerical="false" label="Reference Base Column" help="" />
-
-    <repeat name="sample_chooser" title="Sample">
-      <param name="name" type="text" label="Label" help="Optional" />
-      <param name="a_col" type="data_column" data_ref="input1" multiple="false" numerical="false" label="Base A Column" help="" />
-      <param name="totals_col" type="data_column" data_ref="input1" multiple="false" numerical="false" label="Coverage Column" help="" />
-    </repeat>
-
-    <param name="zoom_value" type="select" label="Zoom">
-        <option value="interactive">Interactive</option>
-        <option value="1">1x</option>
-        <option value="2">2x</option>
-        <option value="3">3x</option>
-        <option value="4">4x</option>
-        <option value="5">5x</option>
-        <option value="6">6x</option>
-        <option value="7">7x</option>
-        <option value="8">8x</option>
-        <option value="9">9x</option>
-        <option value="10">10x</option>
-    </param>
-
-  </inputs>
-  <outputs>
-    <data format="svg" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="mutation_data1.txt" ftype="tabular" />
-      <param name="position_col" value="2" />
-      <param name="ref_col" value="4" />
-      <param name="zoom_value" value="interactive" />
-      <param name="name" value="s1" />
-      <param name="a_col" value="5" />
-      <param name="totals_col" value="9" />
-      <output name="output" file="mutation_data1_interactive.svg" ftype="svg" />
-    </test>
-    <test>
-      <param name="input1" value="mutation_data1.txt" ftype="tabular" />
-      <param name="position_col" value="2" />
-      <param name="ref_col" value="4" />
-      <param name="zoom_value" value="3" />
-      <param name="name" value="s1" />
-      <param name="a_col" value="5" />
-      <param name="totals_col" value="9" />
-      <output name="output" file="mutation_data1_zoom3x.svg" ftype="svg" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool allows you to visualize mutations described in a tabular input file. It generates an SVG image which can be viewed in any web browser.
-
-You will need to specify the position and reference columns in the input file. Then click on the 'Add new Sample' to add samples in the input file that you would like to visualize. For each sample you select, specify the column for base 'A', the totals column and enter a name.
-This tool assumes the columns specifying bases A, C, G, T are placed consecutively and in that order in an input file.
-
-Interactivity: If interactive zoom option is selected, then the resultant image can be zoomed in or out using the scroll mouse wheel and can be panned by dragging the image using left mouse button.
-
------
-
-**General Example**
-
-Given the input file::
-
-  chrM    72      73      G   26394   4       49  0   26447   26398   1   23389   3       45  0   23437   23392   1
-  chrM    149     150     T   11      50422   2   96  50531   50435   1   4       45417   1   65  45487   45422   1
-
-To visualize the two samples in the input file, the following parameters are selected before running the tool::
-
-  Position column:        2
-  Reference Base column:  4
-  Sample 1 Label:         gm blood
-  Sample 1 Base A column: 5
-  Sample 1 Totals column: 9
-  Sample 2 Label:         gm cheek
-  Sample 2 Base A column: 12
-  Sample 2 Totals column: 16
-
-Visualization output:
-
-.. image:: ./static/images/mutation_visualization_example.png
-   :width: 150
-
-Here the left-most column represents the position and the background color is the reference base color. Each column on its right describe each sample.
-In the output above, the blue bar is the longest, which means that base A is maximum in position 72 for both the samples.
-
-  </help>
-</tool>
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,254 +0,0 @@
-#!/usr/bin/env python
-"""Convert a BLAST XML file to 12 column tabular output
-
-Takes three command line options, input BLAST XML filename, output tabular
-BLAST filename, output format (std for standard 12 columns, or ext for the
-extended 24 columns offered in the BLAST+ wrappers).
-
-The 12 columns output are 'qseqid sseqid pident length mismatch gapopen qstart
-qend sstart send evalue bitscore' or 'std' at the BLAST+ command line, which
-mean:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The additional columns offered in the Galaxy BLAST+ wrappers are:
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-Most of these fields are given explicitly in the XML file, others some like
-the percentage identity and the number of gap openings must be calculated.
-
-Be aware that the sequence in the extended tabular output or XML direct from
-BLAST+ may or may not use XXXX masking on regions of low complexity. This
-can throw the off the calculation of percentage identity and gap openings.
-[In fact, both BLAST 2.2.24+ and 2.2.25+ have a subtle bug in this regard,
-with these numbers changing depending on whether or not the low complexity
-filter is used.]
-
-This script attempts to produce identical output to what BLAST+ would have done.
-However, check this with "diff -b ..." since BLAST+ sometimes includes an extra
-space character (probably a bug).
-"""
-import sys
-import re
-
-if sys.version_info[:2] >= ( 2, 5 ):
-    import xml.etree.cElementTree as ElementTree
-else:
-    from galaxy import eggs
-    import pkg_resources; pkg_resources.require( "elementtree" )
-    from elementtree import ElementTree
-
-def stop_err( msg ):
-    sys.stderr.write("%s\n" % msg)
-    sys.exit(1)
-
-#Parse Command Line
-try:
-    in_file, out_file, out_fmt = sys.argv[1:]
-except:
-    stop_err("Expect 3 arguments: input BLAST XML file, output tabular file, out format (std or ext)")
-
-if out_fmt == "std":
-    extended = False
-elif out_fmt == "x22":
-    stop_err("Format argument x22 has been replaced with ext (extended 24 columns)")
-elif out_fmt == "ext":
-    extended = True
-else:
-    stop_err("Format argument should be std (12 column) or ext (extended 24 columns)")
-
-
-# get an iterable
-try:
-    context = ElementTree.iterparse(in_file, events=("start", "end"))
-except:
-    stop_err("Invalid data format.")
-# turn it into an iterator
-context = iter(context)
-# get the root element
-try:
-    event, root = context.next()
-except:
-    stop_err( "Invalid data format." )
-
-
-re_default_query_id = re.compile("^Query_\d+$")
-assert re_default_query_id.match("Query_101")
-assert not re_default_query_id.match("Query_101a")
-assert not re_default_query_id.match("MyQuery_101")
-re_default_subject_id = re.compile("^Subject_\d+$")
-assert re_default_subject_id.match("Subject_1")
-assert not re_default_subject_id.match("Subject_")
-assert not re_default_subject_id.match("Subject_12a")
-assert not re_default_subject_id.match("TheSubject_1")
-
-
-outfile = open(out_file, 'w')
-blast_program = None
-for event, elem in context:
-    if event == "end" and elem.tag == "BlastOutput_program":
-        blast_program = elem.text
-    # for every <Iteration> tag
-    if event == "end" and elem.tag == "Iteration":
-        #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA
-        # <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
-        # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
-        # <Iteration_query-len>406</Iteration_query-len>
-        # <Iteration_hits></Iteration_hits>
-        #
-        #Or, from BLAST 2.2.24+ run online
-        # <Iteration_query-ID>Query_1</Iteration_query-ID>
-        # <Iteration_query-def>Sample</Iteration_query-def>
-        # <Iteration_query-len>516</Iteration_query-len>
-        # <Iteration_hits>...
-        qseqid = elem.findtext("Iteration_query-ID")
-        if re_default_query_id.match(qseqid):
-            #Place holder ID, take the first word of the query definition
-            qseqid = elem.findtext("Iteration_query-def").split(None,1)[0]
-        qlen = int(elem.findtext("Iteration_query-len"))
-
-        # for every <Hit> within <Iteration>
-        for hit in elem.findall("Iteration_hits/Hit"):
-            #Expecting either this,
-            # <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id>
-            # <Hit_def>RecName: Full=Rhodopsin</Hit_def>
-            # <Hit_accession>P56514</Hit_accession>
-            #or,
-            # <Hit_id>Subject_1</Hit_id>
-            # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>
-            # <Hit_accession>Subject_1</Hit_accession>
-            #
-            #apparently depending on the parse_deflines switch
-            sseqid = hit.findtext("Hit_id").split(None,1)[0]
-            hit_def = sseqid + " " + hit.findtext("Hit_def")
-            if re_default_subject_id.match(sseqid) \
-            and sseqid == hit.findtext("Hit_accession"):
-                #Place holder ID, take the first word of the subject definition
-                hit_def = hit.findtext("Hit_def")
-                sseqid = hit_def.split(None,1)[0]
-            # for every <Hsp> within <Hit>
-            for hsp in hit.findall("Hit_hsps/Hsp"):
-                nident = hsp.findtext("Hsp_identity")
-                length = hsp.findtext("Hsp_align-len")
-                pident = "%0.2f" % (100*float(nident)/float(length))
-
-                q_seq = hsp.findtext("Hsp_qseq")
-                h_seq = hsp.findtext("Hsp_hseq")
-                m_seq = hsp.findtext("Hsp_midline")
-                assert len(q_seq) == len(h_seq) == len(m_seq) == int(length)
-                gapopen = str(len(q_seq.replace('-', ' ').split())-1  + \
-                              len(h_seq.replace('-', ' ').split())-1)
-
-                mismatch = m_seq.count(' ') + m_seq.count('+') \
-                         - q_seq.count('-') - h_seq.count('-')
-                #TODO - Remove this alternative mismatch calculation and test
-                #once satisifed there are no problems
-                expected_mismatch = len(q_seq) \
-                                  - sum(1 for q,h in zip(q_seq, h_seq) \
-                                        if q == h or q == "-" or h == "-")
-                xx = sum(1 for q,h in zip(q_seq, h_seq) if q=="X" and h=="X")
-                if not (expected_mismatch - q_seq.count("X") <= int(mismatch) <= expected_mismatch + xx):
-                    stop_err("%s vs %s mismatches, expected %i <= %i <= %i" \
-                             % (qseqid, sseqid, expected_mismatch - q_seq.count("X"),
-                                int(mismatch), expected_mismatch))
-
-                #TODO - Remove this alternative identity calculation and test
-                #once satisifed there are no problems
-                expected_identity = sum(1 for q,h in zip(q_seq, h_seq) if q == h)
-                if not (expected_identity - xx <= int(nident) <= expected_identity + q_seq.count("X")):
-                    stop_err("%s vs %s identities, expected %i <= %i <= %i" \
-                             % (qseqid, sseqid, expected_identity, int(nident),
-                                expected_identity + q_seq.count("X")))
-
-
-                evalue = hsp.findtext("Hsp_evalue")
-                if evalue == "0":
-                    evalue = "0.0"
-                else:
-                    evalue = "%0.0e" % float(evalue)
-
-                bitscore = float(hsp.findtext("Hsp_bit-score"))
-                if bitscore < 100:
-                    #Seems to show one decimal place for lower scores
-                    bitscore = "%0.1f" % bitscore
-                else:
-                    #Note BLAST does not round to nearest int, it truncates
-                    bitscore = "%i" % bitscore
-
-                values = [qseqid,
-                          sseqid,
-                          pident,
-                          length, #hsp.findtext("Hsp_align-len")
-                          str(mismatch),
-                          gapopen,
-                          hsp.findtext("Hsp_query-from"), #qstart,
-                          hsp.findtext("Hsp_query-to"), #qend,
-                          hsp.findtext("Hsp_hit-from"), #sstart,
-                          hsp.findtext("Hsp_hit-to"), #send,
-                          evalue, #hsp.findtext("Hsp_evalue") in scientific notation
-                          bitscore, #hsp.findtext("Hsp_bit-score") rounded
-                          ]
-
-                if extended:
-                    sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(">"))
-                    #print hit_def, "-->", sallseqid
-                    positive = hsp.findtext("Hsp_positive")
-                    ppos = "%0.2f" % (100*float(positive)/float(length))
-                    qframe = hsp.findtext("Hsp_query-frame")
-                    sframe = hsp.findtext("Hsp_hit-frame")
-                    if blast_program == "blastp":
-                        #Probably a bug in BLASTP that they use 0 or 1 depending on format
-                        if qframe == "0": qframe = "1"
-                        if sframe == "0": sframe = "1"
-                    slen = int(hit.findtext("Hit_len"))
-                    values.extend([sallseqid,
-                                   hsp.findtext("Hsp_score"), #score,
-                                   nident,
-                                   positive,
-                                   hsp.findtext("Hsp_gaps"), #gaps,
-                                   ppos,
-                                   qframe,
-                                   sframe,
-                                   #NOTE - for blastp, XML shows original seq, tabular uses XXX masking
-                                   q_seq,
-                                   h_seq,
-                                   str(qlen),
-                                   str(slen),
-                                   ])
-                #print "\t".join(values)
-                outfile.write("\t".join(values) + "\n")
-        # prevents ElementTree from growing large datastructure
-        root.clear()
-        elem.clear()
-outfile.close()
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.8">
-    <description>Convert BLAST XML output to tabular</description>
-    <command interpreter="python">
-      blastxml_to_tabular.py $blastxml_file $tabular_file $out_format
-    </command>
-    <inputs>
-        <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/>
-        <param name="out_format" type="select" label="Output format">
-            <option value="std" selected="True">Tabular (standard 12 columns)</option>
-            <option value="ext">Tabular (extended 24 columns)</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="tabular_file" format="tabular" label="BLAST results as tabular" />
-    </outputs>
-    <requirements>
-    </requirements>
-    <tests>
-        <test>
-            <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
-            <param name="out_format" value="std" />
-            <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin.tabluar -->
-            <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
-            <param name="out_format" value="ext" />
-            <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin_22c.tabluar -->
-            <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted_ext.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="blastxml_file" value="blastp_sample.xml" ftype="blastxml" />
-            <param name="out_format" value="std" />
-            <!-- Note this has some white space differences from the actual blastp output -->
-            <output name="tabular_file" file="blastp_sample_converted.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
-            <param name="out_format" value="std" />
-            <!-- Note this has some white space differences from the actual blastx output -->
-            <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
-            <param name="out_format" value="ext" />
-            <!-- Note this has some white space and XXXX masking differences from the actual blastx output -->
-            <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted_ext.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="blastxml_file" value="blastx_sample.xml" ftype="blastxml" />
-            <param name="out_format" value="std" />
-            <!-- Note this has some white space differences from the actual blastx output -->
-            <output name="tabular_file" file="blastx_sample_converted.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" />
-            <param name="out_format" value="std" />
-            <!-- Note this has some white space differences from the actual blastp output -->
-            <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_std.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" />
-            <param name="out_format" value="ext" />
-            <!-- Note this has some white space differences from the actual blastp output -->
-            <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_ext.tabular" ftype="tabular" />
-        </test>
-    </tests>
-    <help>
-
-**What it does**
-
-NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of
-formats including tabular and a more detailed XML format. A complex workflow
-may need both the XML and the tabular output - but running BLAST twice is
-slow and wasteful.
-
-This tool takes the BLAST XML output and by default converts it into the
-standard 12 column tabular equivalent:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 22 column tabular
-BLAST output.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-Beware that the XML file (and thus the conversion) and the tabular output
-direct from BLAST+ may differ in the presence of XXXX masking on regions
-low complexity (columns 21 and 22), and thus also calculated figures like
-the percentage idenity (column 3).
-
-    </help>
-</tool>
--- a/tools/ncbi_blast_plus/hide_stderr.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-"""A simple script to redirect stderr to stdout when the return code is zero.
-
-See https://bitbucket.org/galaxy/galaxy-central/issue/325/
-
-Currently Galaxy ignores the return code from command line tools (even if it
-is non-zero which by convention indicates an error) and treats any output on
-stderr as an error (even though by convention stderr is used for errors or
-warnings).
-
-This script runs the given command line, capturing all stdout and stderr in
-memory, and gets the return code. For a zero return code, any stderr (which
-should be warnings only) is added to the stdout. That way Galaxy believes
-everything is fine. For a non-zero return code, we output stdout as is, and
-any stderr, plus the return code to ensure there is some output on stderr.
-That way Galaxy treats this as an error.
-
-Once issue 325 is fixed, this script will not be needed.
-"""
-import sys
-import subprocess
-
-#Avoid using shell=True when we call subprocess to ensure if the Python
-#script is killed, so too is the BLAST process.
-try:
-    words = []
-    for w in sys.argv[1:]:
-       if " " in w:
-           words.append('"%s"' % w)
-       else:
-           words.append(w)
-    cmd = " ".join(words)
-    child = subprocess.Popen(sys.argv[1:],
-                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-except Exception, err:
-    sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
-    sys.exit(1)
-#Use .communicate as can get deadlocks with .wait(),
-stdout, stderr = child.communicate()
-return_code = child.returncode
-
-if return_code:
-    sys.stdout.write(stdout)
-    sys.stderr.write(stderr)
-    sys.stderr.write("Return error code %i from command:\n" % return_code)
-    sys.stderr.write("%s\n" % cmd)
-else:
-    sys.stdout.write(stdout)
-    sys.stdout.write(stderr)
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,209 +0,0 @@
-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.11">
-    <description>Search nucleotide database with nucleotide query sequence(s)</description>
-    <version_command>blastn -version</version_command>
-    <command interpreter="python">hide_stderr.py
-## The command is a Cheetah template which allows some Python based syntax.
-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-blastn
--query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#else:
-  -subject "$db_opts.subject"
-#end if
--task $blast_type
--evalue $evalue_cutoff
--out $output1
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
-#if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
-$adv_opts.strand
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-$adv_opts.ungapped
-$adv_opts.parse_deflines
-## End of advanced options:
-#end if
-    </command>
-    <inputs>
-        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
-        <conditional name="db_opts">
-            <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file</option>
-            </param>
-            <when value="db">
-                <param name="database" type="select" label="Nucleotide BLAST database">
-                    <options from_file="blastdb.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-                <param name="subject" type="hidden" value="" />
-            </when>
-            <when value="file">
-                <param name="database" type="hidden" value="" />
-                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
-            </when>
-        </conditional>
-        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
-            <option value="megablast">megablast</option>
-            <option value="blastn">blastn</option>
-            <option value="blastn-short">blastn-short</option>
-            <option value="dc-megablast">dc-megablast</option>
-            <!-- Using BLAST 2.2.24+ this gives an error:
-            BLAST engine error: Program type 'vecscreen' not supported
-            <option value="vecscreen">vecscreen</option>
-            -->
-        </param>
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
-        <param name="out_format" type="select" label="Output format">
-            <option value="6" selected="True">Tabular (standard 12 columns)</option>
-            <option value="ext">Tabular (extended 24 columns)</option>
-            <option value="5">BLAST XML</option>
-            <option value="0">Pairwise text</option>
-            <option value="0 -html">Pairwise HTML</option>
-            <option value="2">Query-anchored text</option>
-            <option value="2 -html">Query-anchored HTML</option>
-            <option value="4">Flat query-anchored text</option>
-            <option value="4 -html">Flat query-anchored HTML</option>
-            <!--
-            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
-            -->
-        </param>
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />
-                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
-                    <option value="-strand both">Both</option>
-                    <option value="-strand plus">Plus (forward)</option>
-                    <option value="-strand minus">Minus (reverse complement)</option>
-                </param>
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
-            <change_format>
-                <when input="out_format" value="0" format="txt"/>
-                <when input="out_format" value="0 -html" format="html"/>
-                <when input="out_format" value="2" format="txt"/>
-                <when input="out_format" value="2 -html" format="html"/>
-                <when input="out_format" value="4" format="txt"/>
-                <when input="out_format" value="4 -html" format="html"/>
-                <when input="out_format" value="5" format="blastxml"/>
-            </change_format>
-        </data>
-    </outputs>
-    <requirements>
-        <requirement type="binary">blastn</requirement>
-    </requirements>
-    <help>
-
-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
-
-**What it does**
-
-Search a *nucleotide database* using a *nucleotide query*,
-using the NCBI BLAST+ blastn command line tool.
-Algorithms include blastn, megablast, and discontiguous megablast.
-
------
-
-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
-
--------
-
-**References**
-
-Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214.
-
-    </help>
-</tool>
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,276 +0,0 @@
-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.11">
-    <description>Search protein database with protein query sequence(s)</description>
-    <version_command>blastp -version</version_command>
-    <command interpreter="python">hide_stderr.py
-## The command is a Cheetah template which allows some Python based syntax.
-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-blastp
--query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#else:
-  -subject "$db_opts.subject"
-#end if
--task $blast_type
--evalue $evalue_cutoff
--out $output1
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
-#if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
--matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-##Ungapped disabled for now - see comments below
-##$adv_opts.ungapped
-$adv_opts.parse_deflines
-## End of advanced options:
-#end if
-    </command>
-    <inputs>
-        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
-        <conditional name="db_opts">
-            <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file</option>
-            </param>
-            <when value="db">
-                <param name="database" type="select" label="Protein BLAST database">
-                    <options from_file="blastdb_p.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-                <param name="subject" type="hidden" value="" />
-            </when>
-            <when value="file">
-                <param name="database" type="hidden" value="" />
-                <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
-            </when>
-        </conditional>
-        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
-            <option value="blastp">blastp</option>
-            <option value="blastp-short">blastp-short</option>
-        </param>
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
-        <param name="out_format" type="select" label="Output format">
-            <option value="6" selected="True">Tabular (standard 12 columns)</option>
-            <option value="ext">Tabular (extended 24 columns)</option>
-            <option value="5">BLAST XML</option>
-            <option value="0">Pairwise text</option>
-            <option value="0 -html">Pairwise HTML</option>
-            <option value="2">Query-anchored text</option>
-            <option value="2 -html">Query-anchored HTML</option>
-            <option value="4">Flat query-anchored text</option>
-            <option value="4 -html">Flat query-anchored HTML</option>
-            <!--
-            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
-            -->
-        </param>
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
-                <param name="matrix" type="select" label="Scoring matrix">
-                    <option value="BLOSUM90">BLOSUM90</option>
-                    <option value="BLOSUM80">BLOSUM80</option>
-                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
-                    <option value="BLOSUM50">BLOSUM50</option>
-                    <option value="BLOSUM45">BLOSUM45</option>
-                    <option value="PAM250">PAM250</option>
-                    <option value="PAM70">PAM70</option>
-                    <option value="PAM30">PAM30</option>
-                </param>
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!--
-                Can't use '-ungapped' on its own, error back is:
-                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
-                Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
-                -->
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
-            <change_format>
-                <when input="out_format" value="0" format="txt"/>
-                <when input="out_format" value="0 -html" format="html"/>
-                <when input="out_format" value="2" format="txt"/>
-                <when input="out_format" value="2 -html" format="html"/>
-                <when input="out_format" value="4" format="txt"/>
-                <when input="out_format" value="4 -html" format="html"/>
-                <when input="out_format" value="5" format="blastxml"/>
-            </change_format>
-        </data>
-    </outputs>
-    <requirements>
-        <requirement type="binary">blastp</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-8" />
-            <param name="blast_type" value="blastp" />
-            <param name="out_format" value="5" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="False" />
-            <param name="matrix" value="BLOSUM62" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="True" />
-            <output name="output1" file="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
-        </test>
-        <test>
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-8" />
-            <param name="blast_type" value="blastp" />
-            <param name="out_format" value="6" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="False" />
-            <param name="matrix" value="BLOSUM62" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="True" />
-            <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-8" />
-            <param name="blast_type" value="blastp" />
-            <param name="out_format" value="ext" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="False" />
-            <param name="matrix" value="BLOSUM62" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="True" />
-            <output name="output1" file="blastp_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-8" />
-            <param name="blast_type" value="blastp" />
-            <param name="out_format" value="6" />
-            <param name="adv_opts_selector" value="basic" />
-            <output name="output1" file="blastp_rhodopsin_vs_four_human.tabular" ftype="tabular" />
-        </test>
-    </tests>
-    <help>
-
-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
-
-**What it does**
-
-Search a *protein database* using a *protein query*,
-using the NCBI BLAST+ blastp command line tool.
-
------
-
-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
-
--------
-
-**References**
-
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
-
-Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
-
-    </help>
-</tool>
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,240 +0,0 @@
-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.11">
-    <description>Search protein database with translated nucleotide query sequence(s)</description>
-    <version_command>blastx -version</version_command>
-    <command interpreter="python">hide_stderr.py
-## The command is a Cheetah template which allows some Python based syntax.
-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-blastx
--query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#else:
-  -subject "$db_opts.subject"
-#end if
--evalue $evalue_cutoff
--out $output1
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
-#if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
-$adv_opts.strand
--matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-$adv_opts.ungapped
-$adv_opts.parse_deflines
-## End of advanced options:
-#end if
-    </command>
-    <inputs>
-        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
-        <conditional name="db_opts">
-            <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file</option>
-            </param>
-            <when value="db">
-                <param name="database" type="select" label="Protein BLAST database">
-                    <options from_file="blastdb_p.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-                <param name="subject" type="hidden" value="" />
-            </when>
-            <when value="file">
-                <param name="database" type="hidden" value="" />
-                <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
-            </when>
-        </conditional>
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
-        <param name="out_format" type="select" label="Output format">
-            <option value="6" selected="True">Tabular (standard 12 columns)</option>
-            <option value="ext">Tabular (extended 24 columns)</option>
-            <option value="5">BLAST XML</option>
-            <option value="0">Pairwise text</option>
-            <option value="0 -html">Pairwise HTML</option>
-            <option value="2">Query-anchored text</option>
-            <option value="2 -html">Query-anchored HTML</option>
-            <option value="4">Flat query-anchored text</option>
-            <option value="4 -html">Flat query-anchored HTML</option>
-            <!--
-            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
-            -->
-        </param>
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
-                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
-                    <option value="-strand both">Both</option>
-                    <option value="-strand plus">Plus (forward)</option>
-                    <option value="-strand minus">Minus (reverse complement)</option>
-                </param>
-                <param name="matrix" type="select" label="Scoring matrix">
-                    <option value="BLOSUM90">BLOSUM90</option>
-                    <option value="BLOSUM80">BLOSUM80</option>
-                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
-                    <option value="BLOSUM50">BLOSUM50</option>
-                    <option value="BLOSUM45">BLOSUM45</option>
-                    <option value="PAM250">PAM250</option>
-                    <option value="PAM70">PAM70</option>
-                    <option value="PAM30">PAM30</option>
-                </param>
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for blastx -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular" label="blastx on ${db_opts.db_opts_selector}">
-            <change_format>
-                <when input="out_format" value="0" format="txt"/>
-                <when input="out_format" value="0 -html" format="html"/>
-                <when input="out_format" value="2" format="txt"/>
-                <when input="out_format" value="2 -html" format="html"/>
-                <when input="out_format" value="4" format="txt"/>
-                <when input="out_format" value="4 -html" format="html"/>
-                <when input="out_format" value="5" format="blastxml"/>
-            </change_format>
-        </data>
-    </outputs>
-    <requirements>
-        <requirement type="binary">blastx</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="5" />
-            <param name="adv_opts_selector" value="basic" />
-            <output name="output1" file="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
-        </test>
-        <test>
-            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="6" />
-            <param name="adv_opts_selector" value="basic" />
-            <output name="output1" file="blastx_rhodopsin_vs_four_human.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="ext" />
-            <param name="adv_opts_selector" value="basic" />
-            <output name="output1" file="blastx_rhodopsin_vs_four_human_ext.tabular" ftype="tabular" />
-        </test>
-    </tests>
-    <help>
-
-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
-
-**What it does**
-
-Search a *protein database* using a *translated nucleotide query*,
-using the NCBI BLAST+ blastx command line tool.
-
------
-
-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
-
--------
-
-**References**
-
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
-
-    </help>
-</tool>
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,286 +0,0 @@
-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.11">
-    <description>Search translated nucleotide database with protein query sequence(s)</description>
-    <version_command>tblastn -version</version_command>
-    <command interpreter="python">hide_stderr.py
-## The command is a Cheetah template which allows some Python based syntax.
-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-tblastn
--query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#else:
-  -subject "$db_opts.subject"
-#end if
--evalue $evalue_cutoff
--out $output1
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
-#if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
--matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-##Ungapped disabled for now - see comments below
-##$adv_opts.ungapped
-$adv_opts.parse_deflines
-## End of advanced options:
-#end if
-    </command>
-    <inputs>
-        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
-        <conditional name="db_opts">
-            <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file</option>
-            </param>
-            <when value="db">
-                <param name="database" type="select" label="Nucleotide BLAST database">
-                    <options from_file="blastdb.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-                <param name="subject" type="hidden" value="" />
-            </when>
-            <when value="file">
-                <param name="database" type="hidden" value="" />
-                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
-            </when>
-        </conditional>
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
-        <param name="out_format" type="select" label="Output format">
-            <option value="6" selected="True">Tabular (standard 12 columns)</option>
-            <option value="ext">Tabular (extended 24 columns)</option>
-            <option value="5">BLAST XML</option>
-            <option value="0">Pairwise text</option>
-            <option value="0 -html">Pairwise HTML</option>
-            <option value="2">Query-anchored text</option>
-            <option value="2 -html">Query-anchored HTML</option>
-            <option value="4">Flat query-anchored text</option>
-            <option value="4 -html">Flat query-anchored HTML</option>
-            <!--
-            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
-            -->
-        </param>
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
-                <param name="matrix" type="select" label="Scoring matrix">
-                    <option value="BLOSUM90">BLOSUM90</option>
-                    <option value="BLOSUM80">BLOSUM80</option>
-                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
-                    <option value="BLOSUM50">BLOSUM50</option>
-                    <option value="BLOSUM45">BLOSUM45</option>
-                    <option value="PAM250">PAM250</option>
-                    <option value="PAM70">PAM70</option>
-                    <option value="PAM30">PAM30</option>
-                </param>
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!--
-                Can't use '-ungapped' on its own, error back is:
-                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
-                Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.'
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
-                -->
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular" label="tblastn on ${db_opts.db_opts_selector}">
-            <change_format>
-                <when input="out_format" value="0" format="txt"/>
-                <when input="out_format" value="0 -html" format="html"/>
-                <when input="out_format" value="2" format="txt"/>
-                <when input="out_format" value="2 -html" format="html"/>
-                <when input="out_format" value="4" format="txt"/>
-                <when input="out_format" value="4 -html" format="html"/>
-                <when input="out_format" value="5" format="blastxml"/>
-            </change_format>
-        </data>
-    </outputs>
-    <requirements>
-        <requirement type="binary">tblastn</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="5" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="false" />
-            <param name="matrix" value="BLOSUM80" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="false" />
-            <output name="output1" file="tblastn_four_human_vs_rhodopsin.xml" ftype="blastxml" />
-        </test>
-        <test>
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="ext" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="false" />
-            <param name="matrix" value="BLOSUM80" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="false" />
-            <output name="output1" file="tblastn_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="6" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="false" />
-            <param name="matrix" value="BLOSUM80" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="false" />
-            <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <!-- Same as above, but parse deflines - on BLAST 2.2.25+ makes no difference -->
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="6" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="false" />
-            <param name="matrix" value="BLOSUM80" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="true" />
-            <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
-        </test>
-        <test>
-            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
-            <param name="db_opts_selector" value="file" />
-            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
-            <param name="database" value="" />
-            <param name="evalue_cutoff" value="1e-10" />
-            <param name="out_format" value="0 -html" />
-            <param name="adv_opts_selector" value="advanced" />
-            <param name="filter_query" value="false" />
-            <param name="matrix" value="BLOSUM80" />
-            <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
-            <param name="parse_deflines" value="false" />
-            <output name="output1" file="tblastn_four_human_vs_rhodopsin.html" ftype="html" />
-        </test>
-    </tests>
-    <help>
-
-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
-
-**What it does**
-
-Search a *translated nucleotide database* using a *protein query*,
-using the NCBI BLAST+ tblastn command line tool.
-
------
-
-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
-
--------
-
-**References**
-
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
-
-    </help>
-</tool>
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,206 +0,0 @@
-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.11">
-    <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
-    <version_command>tblastx -version</version_command>
-    <command interpreter="python">hide_stderr.py
-## The command is a Cheetah template which allows some Python based syntax.
-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-tblastx
--query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#else:
-  -subject "$db_opts.subject"
-#end if
--evalue $evalue_cutoff
--out $output1
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
-#if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
-$adv_opts.strand
--matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-$adv_opts.parse_deflines
-## End of advanced options:
-#end if
-    </command>
-    <inputs>
-        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
-        <conditional name="db_opts">
-            <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file</option>
-            </param>
-            <when value="db">
-                <param name="database" type="select" label="Nucleotide BLAST database">
-                    <options from_file="blastdb.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-                <param name="subject" type="hidden" value="" />
-            </when>
-            <when value="file">
-                <param name="database" type="hidden" value="" />
-                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
-            </when>
-        </conditional>
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
-        <param name="out_format" type="select" label="Output format">
-            <option value="6" selected="True">Tabular (standard 12 columns)</option>
-            <option value="ext">Tabular (extended 24 columns)</option>
-            <option value="5">BLAST XML</option>
-            <option value="0">Pairwise text</option>
-            <option value="0 -html">Pairwise HTML</option>
-            <option value="2">Query-anchored text</option>
-            <option value="2 -html">Query-anchored HTML</option>
-            <option value="4">Flat query-anchored text</option>
-            <option value="4 -html">Flat query-anchored HTML</option>
-            <!--
-            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
-            -->
-        </param>
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
-                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
-                    <option value="-strand both">Both</option>
-                    <option value="-strand plus">Plus (forward)</option>
-                    <option value="-strand minus">Minus (reverse complement)</option>
-                </param>
-                <param name="matrix" type="select" label="Scoring matrix">
-                    <option value="BLOSUM90">BLOSUM90</option>
-                    <option value="BLOSUM80">BLOSUM80</option>
-                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
-                    <option value="BLOSUM50">BLOSUM50</option>
-                    <option value="BLOSUM45">BLOSUM45</option>
-                    <option value="PAM250">PAM250</option>
-                    <option value="PAM70">PAM70</option>
-                    <option value="PAM30">PAM30</option>
-                </param>
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for tblastx -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tabular" label="tblastx on ${db_opts.db_opts_selector}">
-            <change_format>
-                <when input="out_format" value="0" format="txt"/>
-                <when input="out_format" value="0 -html" format="html"/>
-                <when input="out_format" value="2" format="txt"/>
-                <when input="out_format" value="2 -html" format="html"/>
-                <when input="out_format" value="4" format="txt"/>
-                <when input="out_format" value="4 -html" format="html"/>
-                <when input="out_format" value="5" format="blastxml"/>
-            </change_format>
-        </data>
-    </outputs>
-    <requirements>
-        <requirement type="binary">tblastx</requirement>
-    </requirements>
-    <help>
-
-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
-
-**What it does**
-
-Search a *translated nucleotide database* using a *protein query*,
-using the NCBI BLAST+ tblastx command line tool.
-
------
-
-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
-
--------
-
-**References**
-
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
-
-    </help>
-</tool>
--- a/tools/new_operations/basecoverage.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-<tool id="gops_basecoverage_1" name="Base Coverage">
-  <description>of all intervals</description>
-  <command interpreter="python">gops_basecoverage.py $input1 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}</command>
-  <inputs>
-    <param format="interval" name="input1" type="data">
-      <label>Compute coverage for</label>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="txt" name="output" />
-  </outputs>
-  <code file="operation_filter.py"/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <output name="output" file="gops_basecoverage_out.txt" />
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint.interval" />
-      <output name="output" file="gops_basecoverage_out2.txt" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
-This operation counts the total bases covered by a set of intervals.  Bases that are covered by more than one interval are **not** counted more than once towards the total.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/cluster.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-<tool id="gops_cluster_1" name="Cluster">
-  <description>the intervals of a dataset</description>
-  <command interpreter="python">gops_cluster.py $input1 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -d $distance -m $minregions -o $returntype</command>
-  <inputs>
-    <param format="interval" name="input1" type="data">
-      <label>Cluster intervals of</label>
-    </param>
-    <param name="distance" size="5" type="integer" value="1" help="(bp)">
-      <label>max distance between intervals</label>
-    </param>
-    <param name="minregions" size="5" type="integer" value="2">
-      <label>min number of intervals per cluster</label>
-    </param>
-	<param name="returntype" type="select" label="Return type">
-		<option value="1">Merge clusters into single intervals</option>
-		<option value="2">Find cluster intervals; preserve comments and order</option>
-		<option value="3">Find cluster intervals; output grouped by clusters</option>
-		<option value="4">Find the smallest interval in each cluster</option>
-		<option value="5">Find the largest interval in each cluster</option>
-	</param>
-   </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input1" />
-  </outputs>
-  <code file="operation_filter.py">
-    <hook exec_after_process="exec_after_cluster" />
-  </code>
-  <tests>
-    <test>
-      <param name="input1" value="5.bed" />
-      <param name="distance" value="1" />
-      <param name="minregions" value="2" />
-      <param name="returntype" value="1" />
-      <output name="output" file="gops-cluster-1.bed" />
-    </test>
-    <test>
-      <param name="input1" value="gops_cluster_bigint.bed" />
-      <param name="distance" value="1" />
-      <param name="minregions" value="2" />
-      <param name="returntype" value="1" />
-      <output name="output" file="gops-cluster-1.bed" />
-    </test>
-    <test>
-      <param name="input1" value="5.bed" />
-      <param name="distance" value="1" />
-      <param name="minregions" value="2" />
-      <param name="returntype" value="2" />
-      <output name="output" file="gops-cluster-2.bed" />
-    </test>
-    <test>
-      <param name="input1" value="5.bed" />
-      <param name="distance" value="1" />
-      <param name="minregions" value="2" />
-      <param name="returntype" value="3" />
-      <output name="output" file="gops-cluster-3.bed" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Syntax**
-
-- **Maximum distance** is greatest distance in base pairs allowed between intervals that will be considered &quot;clustered&quot;.  **Negative** values for distance are allowed, and are useful for clustering intervals that overlap.
-- **Minimum intervals per cluster** allow a threshold to be set on the minimum number of intervals to be considered a cluster.  Any area with less than this minimum will not be included in the output.
-- **Merge clusters into single intervals** outputs intervals that span the entire cluster.
-- **Find cluster intervals; preserve comments and order** filters out non-cluster intervals while maintaining the original ordering and comments in the file.
-- **Find cluster intervals; output grouped by clusters** filters out non-cluster intervals, but outputs the cluster intervals so that they are grouped together. Comments and original ordering in the file are lost.
-
------
-
-**Example**
-
-.. image:: ./static/operation_icons/gops_cluster.gif
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/column_join.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,290 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This tool takes a tab-delimited text file as input and creates filters on columns based on certain properties. The tool will skip over invalid lines within the file, informing the user about the number of lines skipped.
-
-usage: %prog -o output -1 input1 -2 input2 -c column1[,column2[,column3[,...]]] -g hinge1[,hinge2[,hinge3[,...]]] -f <fill_options_file> [other_input1 [other_input2 [other_input3 ...]]]
-    -o, output=0: the output pileup
-    -1, input1=1: the pileup file to start with
-    -2, input2=2: the second pileup file to join
-    -g, hinge=h: the columns to be used for matching
-    -c, columns=c: the columns that should appear in the output
-    -f, fill_options_file=f: the file specifying the fill value to use
-    other_inputs: the other input files to join
-"""
-
-import optparse, os, re, struct, sys, tempfile
-
-try:
-    simple_json_exception = None
-    from galaxy import eggs
-    from galaxy.util.bunch import Bunch
-    from galaxy.util import stringify_dictionary_keys
-    import pkg_resources
-    pkg_resources.require("simplejson")
-    import simplejson
-except Exception, e:
-    simplejson_exception = e
-    simplejson = None
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def split_nums( text ):
-    """
-    Splits a string into pieces of numbers and non-numbers, like 'abc23B3' --> [ 'abc', 23, 'B', 3 ]
-    """
-    split_t = []
-    c = ''
-    n = ''
-    for ch in text:
-        try:
-            v = int( ch )
-            n += ch
-            if c:
-                split_t.append( ''.join( c ) )
-                c = ''
-        except ValueError:
-            c += ch
-            if n:
-                split_t.append( int( ''.join( n ) ) )
-                n = ''
-    if c:
-        split_t.append( ''.join( c ) )
-    if n:
-        split_t.append( int( ''.join( n ) ) )
-    return split_t
-
-def hinge_compare( hinge1, hinge2 ):
-    """
-    Compares items like 'chr10' and 'chrM' or 'scaffold2' and scaffold10' so that
-    first part handled as text but last part as number
-    """
-    split_hinge1 = hinge1.split( '\t' )
-    split_hinge2 = hinge2.split( '\t' )
-    # quick check if either hinge is empty
-    if not ''.join( split_hinge2 ):
-        if ''.join( split_hinge1 ):
-            return 1
-        elif not ''.join( split_hinge1 ):
-            return 0
-    else:
-        if not ''.join( split_hinge1 ):
-            return -1
-    # go through all parts of the hinges and compare
-    for i, sh1 in enumerate( split_hinge1 ):
-        # if these hinge segments are the same, just move on to the next ones
-        if sh1 == split_hinge2[ i ]:
-            continue
-        # check all parts of each hinge
-        h1 = split_nums( sh1 )
-        h2 = split_nums( split_hinge2[ i ] )
-        for j, h in enumerate( h1 ):
-            # if second hinge has no more parts, first is considered larger
-            if j > 0 and len( h2 ) <= j:
-                return 1
-            # if these two parts are the same, move on to next
-            if h == h2[ j ]:
-                continue
-            # do actual comparison, depending on whether letter or number
-            if type( h ) == int:
-                if type( h2[ j ] ) == int:
-                    if h > h2[ j ]:
-                        return 1
-                    elif h < h2[ j ]:
-                        return -1
-                # numbers are less than letters
-                elif type( h2[ j ] ) == str:
-                    return -1
-            elif type( h ) == str:
-                if type( h2[ j ] ) == str:
-                    if h > h2[ j ]:
-                        return 1
-                    elif h < h2[ j ]:
-                        return -1
-                # numbers are less than letters
-                elif type( h2[ j ] ) == int:
-                    return 1
-    # if all else has failed, just do basic string comparison
-    if hinge1 > hinge2:
-        return 1
-    elif hinge1 == hinge2:
-        return 0
-    elif hinge1 < hinge2:
-        return -1
-
-def hinge_sort( infile, outfile, hinge ):
-    """Given input file name, sorts logically (text vs. numeric) into provided output file name."""
-    hinge_locs = {}
-    bad_lines = []
-    fin = open( infile, 'rb' )
-    line = fin.readline()
-    while line.strip():
-        try:
-            hinge_parts = line.split( '\t' )[ :hinge ]
-            try:
-                hinge_locs[ '\t'.join( hinge_parts ) ].append( fin.tell() - len( line ) )
-            except KeyError:
-                hinge_locs[ '\t'.join( hinge_parts ) ] = [ fin.tell() - len( line ) ]
-        except ValueError:
-            bad_line.append( line )
-        line = fin.readline()
-    fin.close()
-    fin = open( infile, 'rb' )
-    fout = open( outfile, 'wb' )
-    hinge_locs_sorted = hinge_locs.keys()
-    hinge_locs_sorted.sort( hinge_compare )
-    for hinge_loc in hinge_locs_sorted:
-        locs = hinge_locs[ hinge_loc ]
-        for loc in locs:
-            fin.seek( loc )
-            fout.write( fin.readline() )
-    fout.close()
-    fin.close()
-
-def __main__():
-    parser = optparse.OptionParser()
-    parser.add_option( '-o', '--output', dest='output', help='The name of the output file' )
-    parser.add_option( '-1', '--input1', dest='input1', help='The name of the first input file' )
-    parser.add_option( '-2', '--input2', dest='input2', help='The name of the second input file' )
-    parser.add_option( '-g', '--hinge', dest='hinge', help='The "hinge" to use (the value to compare)' )
-    parser.add_option( '-c', '--columns', dest='columns', help='The columns to include in the output file' )
-    parser.add_option( '-f', '--fill_options_file', dest='fill_options_file', default=None, help='The file specifying the fill value to use' )
-    (options, args) = parser.parse_args()
-    hinge = int( options.hinge )
-    cols = [ int( c ) for c in str( options.columns ).split( ',' ) if int( c ) > hinge ]
-    inputs = [ options.input1, options.input2 ]
-    if options.fill_options_file == 'None':
-        inputs.extend( args )
-    elif len( args ) > 0:
-        inputs.extend( args )
-    fill_options = None
-    if options.fill_options_file != 'None' and options.fill_options_file is not None:
-        try:
-            if simplejson is None:
-                raise simplejson_exception
-            fill_options = Bunch( **stringify_dictionary_keys( simplejson.load( open( options.fill_options_file ) ) ) )
-        except Exception, e:
-            print 'Warning: Ignoring fill options due to simplejson error (%s).' % e
-    if fill_options is None:
-        fill_options = Bunch()
-    if 'file1_columns' not in fill_options:
-        fill_options.file1_columns = None
-    if fill_options and fill_options.file1_columns:
-        fill_empty = {}
-        for col in cols:
-            fill_empty[ col ] = fill_options.file1_columns[ col - 1 ]
-    else:
-        fill_empty = None
-    assert len( cols ) > 0, 'You need to select at least one column in addition to the hinge'
-    delimiter = '\t'
-    # make sure all files are sorted in same way, ascending
-    tmp_input_files = []
-    input_files = inputs[:]
-    for in_file in input_files:
-        tmp_file = tempfile.NamedTemporaryFile()
-        tmp_file_name = tmp_file.name
-        tmp_file.close()
-        hinge_sort( in_file, tmp_file_name, hinge )
-        tmp_file = open( tmp_file_name, 'rb' )
-        tmp_input_files.append( tmp_file )
-    # cycle through files, getting smallest line of all files one at a time
-    # also have to keep track of vertical position of extra columns
-    fout = file( options.output, 'w' )
-    old_current = ''
-    first_line = True
-    current_lines = [ f.readline().rstrip( '\r\n' ) for f in tmp_input_files ]
-    last_lines = ''.join( current_lines )
-    last_loc = -1
-    while last_lines:
-        # get the "minimum" hinge, which should come first, and the file location in list
-        hinges = [ delimiter.join( line.split( delimiter )[ :hinge ] ) for line in current_lines ]
-        hinge_dict = {}
-        for i in range( len( hinges ) ):
-            if not hinge_dict.has_key( hinges[ i ] ):
-                hinge_dict[ hinges[ i ] ] = i
-        hinges.sort( hinge_compare )
-        hinges = [ h for h in hinges if h ]
-        current, loc = hinges[0], hinge_dict[ hinges[0] ]
-        # first output empty columns for vertical alignment (account for "missing" files)
-        # write output for leading and trailing empty columns
-        # columns missing from actual file handled further below
-        current_data = []
-        if current != old_current:
-            # fill trailing empty columns with appropriate fill value
-            if not first_line:
-                if last_loc < len( inputs ) - 1:
-                    if not fill_empty:
-                        filler = [ '' for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ]
-                    else:
-                        filler = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ]
-                    fout.write( '%s%s' % ( delimiter, delimiter.join( filler ) ) )
-                # insert line break before current line
-                fout.write( '\n' )
-            # fill leading empty columns with appropriate fill value
-            if loc > 0:
-                if not fill_empty:
-                    current_data = [ '' for col in range( loc * len( cols ) ) ]
-                else:
-                    current_data = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( loc * len( cols ) ) ]
-        else:
-            if loc - last_loc > 1:
-                if not fill_empty:
-                    current_data = [ '' for col in range( ( loc - last_loc - 1 ) * len( cols ) ) ]
-                else:
-                    current_data = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( ( loc - last_loc - 1 ) * len( cols ) ) ]
-        # now output actual data
-        split_line = current_lines[ loc ].split( delimiter )
-        # fill empties within actual line if appropriate
-        if fill_empty:
-            new_split_line = split_line[:]
-            split_line = []
-            for i, item in enumerate( new_split_line ):
-                col = i + 1
-                if not item:
-                    try:
-                        split_line.append( fill_empty[ i + 1 ] )
-                    except KeyError:
-                        split_line.append( item )
-                else:
-                    split_line.append( item )
-        # add actual data to be output below
-        if ''.join( split_line ):
-            for col in cols:
-                if col > hinge:
-                    # if this column doesn't exist, add the appropriate filler or empty column
-                    try:
-                        new_item = split_line[ col - 1 ]
-                    except IndexError:
-                        if fill_empty:
-                            new_item = fill_empty[ col ]
-                        else:
-                            new_item = ''
-                    current_data.append( new_item )
-            # grab next line for selected file
-            current_lines[ loc ] = tmp_input_files[ loc ].readline().rstrip( '\r\n' )
-            # write relevant data to file
-            if current == old_current and current_data:
-                fout.write( '%s%s' % ( delimiter, delimiter.join( current_data ) ) )
-            elif current_data:
-                fout.write( '%s%s%s' % ( current, delimiter, delimiter.join( current_data ) ) )
-            last_lines = ''.join( current_lines )
-        else:
-            last_lines = None
-        last_loc = loc
-        old_current = current
-        first_line = False
-    # fill trailing empty columns for final line
-    if last_loc < len( inputs ) - 1:
-        if not fill_empty:
-            filler = [ '' for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ]
-        else:
-            filler = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ]
-        fout.write( '%s%s' % ( delimiter, delimiter.join( filler ) ) )
-    fout.write( '\n' )
-    fout.close()
-    for f in tmp_input_files:
-        os.unlink( f.name )
-
-if __name__ == "__main__" : __main__()
--- a/tools/new_operations/column_join.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,260 +0,0 @@
-<tool id="column_join" name="Column Join" version="1.1.0">
-  <description></description>
-  <command interpreter="python">
-    column_join.py
-        --output=$output
-        --input1=$input1
-        --input2=$input2
-        --hinge=$hinge
-        --columns=$columns
-        #if $fill_empty_columns.fill_empty_columns_switch == "fill_empty":
-            --fill_options_file=$fill_options_file
-        #end if
-        #for $f in $file_chooser:
-            ${f.input}
-        #end for
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="tabular" label="Choose the first file for the join" />
-    <param name="hinge" type="data_column" data_ref="input1" multiple="false" numerical="false" label="Use this column and columns to left the 'hinge' (matching data for each join)" help="All columns to left of selected column (plus selected column) will be used. Select 2 for pileup" />
-    <param name="columns" type="data_column" data_ref="input1" multiple="true" numerical="false" label="Include these column" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
-    <conditional name="fill_empty_columns">
-      <param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
-        <option value="no_fill" selected="True">No</option>
-        <option value="fill_empty">Yes</option>
-      </param>
-      <when value="no_fill" />
-      <when value="fill_empty">
-        <conditional name="do_fill_empty_columns">
-          <param name="column_fill_type" type="select" label="Fill Columns by">
-            <option value="single_fill_value" selected="True">Single fill value</option>
-            <option value="fill_value_by_column">Values by column</option>
-          </param>
-          <when value="single_fill_value">
-            <param type="text" name="fill_value" label="Fill value" value="." />
-          </when>
-          <when value="fill_value_by_column">
-            <repeat name="column_fill" title="Fill Column">
-              <param name="column_number" label="Column" type="data_column" data_ref="input1" />
-              <param type="text" name="fill_value" value="." />
-            </repeat>
-          </when>
-        </conditional>
-      </when>
-    </conditional>
-    <param name="input2" type="data" format="tabular" label="Choose the second file for the join" />
-    <repeat name="file_chooser" title="Additional Input">
-      <param name="input" label="Additional input file" type="data" format="tabular" />
-    </repeat>
-  </inputs>
-  <configfiles>
-    <configfile name="fill_options_file">&lt;%
-import simplejson
-%&gt;
-#set $__fill_options = {}
-#if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
-    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
-        #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
-    #else:
-        #set $__start_fill = ""
-    #end if
-    #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
-    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
-        #for column_fill in $fill_empty_columns['do_fill_empty_columns']['column_fill']:
-            #set $__fill_options['file1_columns'][ int( column_fill['column_number'].value ) - 1 ] = column_fill['fill_value'].value
-        #end for
-    #end if
-#end if
-${simplejson.dumps( __fill_options )}
-    </configfile>
-  </configfiles>
-  <outputs>
-    <data name="output" format="tabular" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="column_join_in1.pileup" ftype="pileup" />
-      <param name="hinge" value="2" />
-      <param name="columns" value="1,2,3,4,5,7" />
-      <param name="fill_empty_columns_switch" value="fill_empty" />
-      <param name="column_fill_type" value="single_fill_value" />
-      <param name="fill_value" value="?" />
-      <param name="input2" value="column_join_in2.pileup" ftype="pileup" />
-      <param name="input" value="column_join_in3.pileup" ftype="pileup" />
-      <output name="output" file="column_join_out1.pileup" ftype="tabular" />
-    </test>
-    <test>
-      <param name="input1" value="column_join_in4.pileup" ftype="pileup" />
-      <param name="hinge" value="2" />
-      <param name="columns" value="1,2,3,4" />
-      <param name="fill_empty_columns_switch" value="no_fill" />
-      <param name="input2" value="column_join_in5.pileup" ftype="pileup" />
-      <param name="input" value="column_join_in6.pileup" ftype="pileup" />
-      <output name="output" file="column_join_out2.pileup" ftype="tabular" />
-    </test>
-<!--  This test is failing for an unclear reason (the column values do not get
-      passed into the script), but passes in the browser
-    <test>
-      <param name="input1" value="column_join_in7.pileup" ftype="tabular" />
-      <param name="hinge" value="2" />
-      <param name="columns" value="3,4,5" />
-      <param name="fill_empty_columns_switch" value="fill_empty" />
-      <param name="column_fill_type" value="fill_value_by_column" />
-      <param name="column_number" value="5" />
-      <param name="fill_value" value="X" />
-      <param name="input2" value="column_join_in8.pileup" ftype="tabular" />
-      <param name="input" value="column_join_in9.pileup" ftype="tabular" />
-      <output name="output" file="column_join_out3.pileup" ftype="tabular" />
-    </test>
--->
-    <test>
-      <param name="input1" value="column_join_in10.pileup" ftype="pileup" />
-      <param name="hinge" value="1" />
-      <param name="columns" value="2,7" />
-      <param name="fill_empty_columns_switch" value="no_fill" />
-      <param name="input2" value="column_join_in11.pileup" ftype="pileup" />
-      <param name="input" value="column_join_in12.pileup" ftype="pileup" />
-      <output name="output" file="column_join_out4.pileup" ftype="tabular" />
-    </test>
-    <test>
-      <!-- Test for handling missing column -->
-      <param name="input1" value="column_join_in13.tabular" ftype="tabular" />
-      <param name="hinge" value="1" />
-      <param name="columns" value="5" />
-      <param name="fill_empty_columns_switch" value="fill_empty" />
-      <param name="column_fill_type" value="single_fill_value" />
-      <param name="fill_value" value="0" />
-      <param name="input2" value="column_join_in14.tabular" ftype="tabular" />
-      <param name="input" value="column_join_in15.tabular" ftype="tabular" />
-      <output name="output" file="column_join_out5.tabular" ftype="tabular" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool allows you to join several files with the same column structure into one file, removing certain columns if necessary. The user needs to select a 'hinge', which is the number of left-most columns to match on. They also need to select the columns to include in the join, which should include the hinge columns, too.
-
-Note that the files are expected to have the same number of columns. If for some reason the join column is missing (this only applies to the last column(s)), the tool attempts to handle this situation by inserting an empty item (or the appropriate filler) for that column on that row. This could lead to the situation where a row has a hinge but entirely empty or filled columns, if the hinge exists in at least one file but every file that has it is missing the join column. Also, note that the tool does not distinguish between a file missing the hinge altogether and a file having the hinge but missing the column (in both cases the column would be empty or filled). There is an example of this below.
-
------
-
-**General Example**
-
-Given the following files::
-
-  FILE 1
-  chr2    1    T    6    .C...,     I$$III
-  chr2    2    G    6    ..N..,     III@II
-  chr2    3    C    7    ..C...,    I$IIIII
-  chr2    4    G    7    .G....,    I#IIIII
-  chr2    5    G    7    ...N..,    IIII#BI
-  chr2    6    A    7    ..T...,    I$IDIII
-  chr1    1    C    1    ^:.        I
-  chr1    2    G    2    .^:.       $I
-  chr1    3    A    2    ..         I%
-  chr1    4    C    2    ..         I$
-  chr1    5    T    3    ..^:.      I#I
-  chr1    6    G    3    ..^:,      I#I
-
-  FILE 2
-  chr1    3    T    1    ^:.        I
-  chr1    4    G    2    .^:.       $I
-  chr1    5    T    2    ..         I%
-  chr1    6    C    3    ..^:.      III
-  chr1    7    G    3    ..^:.      I#I
-  chr1    8    T    4    ...^:,     I#II
-  chr2    77   C    6    .G...,     I$$III
-  chr2    78   G    6    ..N..,     III@II
-  chr2    79   T    7    ..N...,    I$IIIII
-  chr2    80   C    7    .G....,    I#IIIII
-  chr2    81   G    7    ...A..,    IIII#BI
-  chr2    82   A    8    ...G...,   I$IDIIII
-  chr2    83   T    8    .A.....N   IIIIIIII
-  chr2    84   A    9    ......T.   I$IIIIIII
-
-  FILE 3
-  chr1    1    A    1    .          I
-  chr1    2    T    2    G.         I$
-  chr1    3    C    2    .,         I@
-  chr1    4    C    3    ..N        III
-  chr1    42   C    5    ...N^:.    III@I
-  chr1    43   C    5    .N..^:.    IIIII
-  chr1    44   T    5    .A..,      IA@II
-  chr1    45   A    6    .N...^:.   IIIII$
-  chr1    46   G    6    .GN..^:.   I@IIII
-  chr1    47   A    7    ....^:..,  IIIII$I
-  chr2    73   T    5    .N..,      II$II
-  chr2    74   A    5    ....,      IIIII
-  chr2    75   T    5    ....,      IIIII
-  chr2    76   T    5    ....,      IIIII
-  chr2    77   C    5    ....,      IIIBI
-  chr2    78   T    5    ....,      IDIII
-
-To join on columns 3 and 4 combining on columns 1 and 2, columns 1-4 should be selected for the 'Include these columns' option, and column 2 selected for the 'hinge'. With these settings, the following would be output::
-
-  chr1    1    C    1              A    1
-  chr1    2    G    2              T    2
-  chr1    3    A    2    T    1    C    2
-  chr1    4    C    2    G    2    C    3
-  chr1    5    T    3    T    2
-  chr1    6    G    3    C    3
-  chr1    7              G    3
-  chr1    8              T    4
-  chr1    42                       C    5
-  chr1    43                       C    5
-  chr1    44                       T    5
-  chr1    45                       A    6
-  chr1    46                       G    6
-  chr1    47                       A    7
-  chr2    1    T    6
-  chr2    2    G    6
-  chr2    3    C    7
-  chr2    4    G    7
-  chr2    5    G    7
-  chr2    6    A    7
-  chr2    73                       T    5
-  chr2    74                       A    5
-  chr2    75                       T    5
-  chr2    76                       T    5
-  chr2    77             C    6    C    5
-  chr2    78             G    6    T    5
-  chr2    79             T    7
-  chr2    80             C    7
-  chr2    81             G    7
-  chr2    82             A    8
-  chr2    83             T    8
-  chr2    84             A    9
-
-**Example with missing columns**
-
-Given the following input files::
-
-  FILE 1
-  1   A
-  2   B   b
-  4   C   c
-  5   D
-  6   E   e
-
-  FILE 2
-  1   M   m
-  2   N
-  3   O   o
-  4   P   p
-  5   Q
-  7   R   r
-
-if we join only column 3 using column 1 as the hinge and with a fill value of '0', this is what will be output::
-
-  1   0   m
-  2   b   0
-  3   0   o
-  4   c   p
-  5   0   0
-  6   e   0
-  7   0   r
-
-Row 5 appears in both files with the missing column, so it's got nothing but fill values in the output file.
-
-  </help>
-</tool>
--- a/tools/new_operations/complement.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="gops_complement_1" name="Complement">
-  <description>intervals of a dataset</description>
-  <command interpreter="python">gops_complement.py $input1 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -l ${chromInfo} $allchroms</command>
-  <inputs>
-    <param format="interval" name="input1" type="data">
-      <label>Complement regions of</label>
-    </param>
-    <param name="allchroms" type="boolean" truevalue="--all" falsevalue="" label="Genome-wide complement">
-    </param>
-   </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input1" />
-  </outputs>
-  <code file="operation_filter.py"/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" dbkey="?" />
-      <param name="allchroms" value="true" />
-      <output name="output" file="gops_complement_out.bed" />
-    </test>
-    <test>
-      <param name="input1" value="2_mod.bed" ftype="interval" dbkey="?" />
-      <param name="allchroms" value="true" />
-      <output name="output" file="gops_complement_out_diffCols.dat" />
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint.interval" dbkey="?" />
-      <param name="allchroms" value="true" />
-      <output name="output" file="gops_complement_out2.bed" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
-This operation complements the regions of a set of intervals.  Regions are returned that represent the empty space in the input interval.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Syntax**
-
-- **Genome-wide complement** will complement all chromosomes of the genome.  Leaving this option unchecked will only complement chromosomes present in the dataset.
-
------
-
-**Example**
-
-.. image:: ./static/operation_icons/gops_complement.gif
-
-</help>
-</tool>
--- a/tools/new_operations/concat.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-<tool id="gops_concat_1" name="Concatenate">
-  <description>two datasets into one dataset</description>
-  <command interpreter="python">gops_concat.py $input1 $input2 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} $sameformat</command>
-  <inputs>
-    <param format="interval" name="input1" type="data" help="First dataset">
-      <label>Concatenate</label>
-    </param>
-    <param format="interval" name="input2" type="data" help="Second dataset">
-      <label>with</label>
-    </param>
-    <param name="sameformat" type="boolean" truevalue="--sameformat" falsevalue="" label="Both datasets are same filetype?" checked="true" help="If unchecked Second dataset will be forced into format of First dataset">
-    </param>
-   </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input1" />
-  </outputs>
-  <code file="operation_filter.py"/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="sameformat" value="true" />
-      <output name="output" file="gops_concat_out1.bed" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="1.interval" />
-      <param name="sameformat" value="false" />
-      <output name="output" file="gops_concat_out2.bed" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu -> it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Syntax**
-
-- **Both datasets are exactly the same filetype** will preserve all extra fields in both files.  Leaving this unchecked will force the second dataset to use the same column assignments for chrom, start, end and strand, but will fill extra fields with a period(.).  In both cases, the output fields are truncated or padded with fields of periods to maintain a truly tabular output.
-
------
-
-**Example**
-
-.. image:: ./static/operation_icons/gops_concatenate.gif
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/coverage.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-<tool id="gops_coverage_1" name="Coverage">
-  <description>of a set of intervals on second set of intervals</description>
-  <command interpreter="python">gops_coverage.py $input1 $input2 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}</command>
-  <inputs>
-    <param format="interval" name="input1" type="data" help="First dataset">
-      <label>What portion of</label>
-    </param>
-    <param format="interval" name="input2" type="data" help="Second dataset">
-      <label>is covered by</label>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="interval" name="output" metadata_source="input1" />
-  </outputs>
-  <code file="operation_filter.py"/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <output name="output" file="gops_coverage_out.interval" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2_mod.bed" ftype="interval"/>
-      <output name="output" file="gops_coverage_out_diffCols.interval" />
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint.interval" />
-      <param name="input2" value="gops_bigint2.interval" />
-      <output name="output" file="gops_coverage_out2.interval" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu -> it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
-Find the coverage of intervals in the first dataset on intervals in the second dataset.  The coverage is added as two columns, the first being bases covered, and the second being the fraction of bases covered by that interval.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Example**
-
-
-    if **First dataset** are genes ::
-
-      chr11 5203271 5204877 NM_000518 0 -
-      chr11 5210634 5212434 NM_000519 0 -
-      chr11 5226077 5227663 NM_000559 0 -
-      chr11 5226079 5232587 BC020719  0 -
-      chr11 5230996 5232587 NM_000184 0 -
-
-    and **Second dataset** are repeats::
-
-       chr11      5203895 5203991 L1MA6     500 +
-       chr11      5204163 5204239 A-rich    219 +
-       chr11      5211034 5211167 (CATATA)n 245 +
-       chr11      5211642 5211673 AT_rich    24 +
-       chr11      5226551 5226606 (CA)n     303 +
-       chr11      5228782 5228825 (TTTTTG)n 208 +
-       chr11      5229045 5229121 L1PA11    440 +
-       chr11      5229133 5229319 MER41A   1106 +
-       chr11      5229374 5229485 L2        244 -
-       chr11      5229751 5230083 MLT1A     913 -
-       chr11      5231469 5231526 (CA)n     330 +
-
-    the Result is the coverage density of repeats in the genes::
-
-       chr11 5203271 5204877 NM_000518 0 - 172   0.107098
-       chr11 5210634 5212434 NM_000519 0 - 164   0.091111
-       chr11 5226077 5227663 NM_000559 0 -  55   0.034678
-       chr11 5226079 5232587 BC020719  0 - 860   0.132145
-       chr11 5230996 5232587 NM_000184 0 -  57   0.035827
-
-    For example, the following line of output::
-
-      chr11 5203271 5204877 NM_000518 0 - 172   0.107098
-
-   implies that 172 nucleotides accounting for 10.7% of the this interval (chr11:5203271-5204877) overlap with repetitive elements.
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/flanking_features.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,214 +0,0 @@
-#!/usr/bin/env python
-#By: Guruprasad Ananda
-"""
-Fetch closest up/downstream interval from features corresponding to every interval in primary
-
-usage: %prog primary_file features_file out_file direction
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
-    -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
-    -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-from bx.intervals.io import *
-from bx.intervals.operations import quicksect
-from galaxy.datatypes.util.gff_util import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def get_closest_feature (node, direction, threshold_up, threshold_down, report_func_up, report_func_down):
-    #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases
-    #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand
-    #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand
-    if direction == 1:
-        if node.maxend <= threshold_up:
-            if node.end == node.maxend:
-                report_func_up(node)
-            elif node.right and node.left:
-                if node.right.maxend == node.maxend:
-                    get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-                elif node.left.maxend == node.maxend:
-                    get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-            elif node.right and node.right.maxend == node.maxend:
-                get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-            elif node.left and node.left.maxend == node.maxend:
-                get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-        elif node.minend <= threshold_up:
-            if node.end <= threshold_up:
-                report_func_up(node)
-            if node.left and node.right:
-                if node.right.minend <= threshold_up:
-                    get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-                if node.left.minend <= threshold_up:
-                    get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-            elif node.left:
-                if node.left.minend <= threshold_up:
-                    get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-            elif node.right:
-                if node.right.minend <= threshold_up:
-                    get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-    elif direction == 0:
-        if node.start > threshold_down:
-            report_func_down(node)
-            if node.left:
-                get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-        else:
-            if node.right:
-                get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
-
-def proximal_region_finder(readers, region, comments=True):
-    """
-    Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ].
-    Intervals are GenomicInterval objects.
-    """
-    primary = readers[0]
-    features = readers[1]
-    either = False
-    if region == 'Upstream':
-        up, down = True, False
-    elif region == 'Downstream':
-        up, down = False, True
-    else:
-        up, down = True, True
-        if region == 'Either':
-            either = True
-
-    # Read features into memory:
-    rightTree = quicksect.IntervalTree()
-    for item in features:
-        if type( item ) is GenomicInterval:
-            rightTree.insert( item, features.linenum, item )
-
-    for interval in primary:
-        if type( interval ) is Header:
-            yield interval
-        if type( interval ) is Comment and comments:
-            yield interval
-        elif type( interval ) == GenomicInterval:
-            chrom = interval.chrom
-            start = int(interval.start)
-            end = int(interval.end)
-            strand = interval.strand
-            if chrom not in rightTree.chroms:
-                continue
-            else:
-                root = rightTree.chroms[chrom]    #root node for the chrom tree
-                result_up = []
-                result_down = []
-                if (strand == '+' and up) or (strand == '-' and down):
-                    #upstream +ve strand and downstream -ve strand cases
-                    get_closest_feature (root, 1, start, None, lambda node: result_up.append( node ), None)
-
-                if (strand == '+' and down) or (strand == '-' and up):
-                    #downstream +ve strand and upstream -ve strand case
-                    get_closest_feature (root, 0, None, end-1, None, lambda node: result_down.append( node ))
-
-                if result_up:
-                    if len(result_up) > 1: #The results_up list has a list of intervals upstream to the given interval.
-                        ends = []
-                        for n in result_up:
-                            ends.append(n.end)
-                        res_ind = ends.index(max(ends)) #fetch the index of the closest interval i.e. the interval with the max end from the results_up list
-                    else:
-                        res_ind = 0
-                    if not(either):
-                        yield [ interval, result_up[res_ind].other ]
-
-                if result_down:
-                    if not(either):
-                        #The last element of result_down will be the closest element to the given interval
-                        yield [ interval, result_down[-1].other ]
-
-                if either and (result_up or result_down):
-                    iter_val = []
-                    if result_up and result_down:
-                        if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)):
-                            iter_val = [ interval, result_up[res_ind].other ]
-                        else:
-                            #The last element of result_down will be the closest element to the given interval
-                            iter_val = [ interval, result_down[-1].other ]
-                    elif result_up:
-                        iter_val = [ interval, result_up[res_ind].other ]
-                    elif result_down:
-                        #The last element of result_down will be the closest element to the given interval
-                        iter_val = [ interval, result_down[-1].other ]
-                    yield iter_val
-
-def main():
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
-        in1_gff_format = bool( options.gff1 )
-        in2_gff_format = bool( options.gff2 )
-        in_fname, in2_fname, out_fname, direction = args
-    except:
-        doc_optparse.exception()
-
-    # Set readers to handle either GFF or default format.
-    if in1_gff_format:
-        in1_reader_wrapper = GFFIntervalToBEDReaderWrapper
-    else:
-        in1_reader_wrapper = NiceReaderWrapper
-    if in2_gff_format:
-        in2_reader_wrapper = GFFIntervalToBEDReaderWrapper
-    else:
-        in2_reader_wrapper = NiceReaderWrapper
-
-    g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-    g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
-                            chrom_col=chr_col_2,
-                            start_col=start_col_2,
-                            end_col=end_col_2,
-                            strand_col=strand_col_2,
-                            fix_strand=True )
-
-    # Find flanking features.
-    out_file = open( out_fname, "w" )
-    try:
-        for result in proximal_region_finder([g1,g2], direction):
-            if type( result ) is list:
-                line, closest_feature = result
-                # Need to join outputs differently depending on file types.
-                if in1_gff_format:
-                    # Output is GFF with added attribute 'closest feature.'
-
-                    # Invervals are in BED coordinates; need to convert to GFF.
-                    line = convert_bed_coords_to_gff( line )
-                    closest_feature = convert_bed_coords_to_gff( closest_feature )
-
-                    # Replace double quotes with single quotes in closest feature's attributes.
-                    out_file.write( "%s closest_feature \"%s\" \n" %
-                                    ( "\t".join( line.fields ), \
-                                      "\t".join( closest_feature.fields ).replace( "\"", "\\\"" )
-                                     ) )
-                else:
-                    # Output is BED + closest feature fields.
-                    output_line_fields = []
-                    output_line_fields.extend( line.fields )
-                    output_line_fields.extend( closest_feature.fields )
-                    out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) )
-            else:
-                out_file.write( "%s\n" % result )
-    except ParseError, exc:
-        fail( "Invalid file format: %s" % str( exc ) )
-
-    print "Direction: %s" %(direction)
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
-    if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 2nd dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/flanking_features.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-<tool id="flanking_features_1" name="Fetch closest non-overlapping feature" version="4.0.1">
-  <description>  for every interval</description>
-  <command interpreter="python">
-      flanking_features.py $input1 $input2 $out_file1 $direction
-
-      #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-          -1 1,4,5,7 --gff1
-      #else:
-          -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
-      #end if
-
-      #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-          -2 1,4,5,7 --gff2
-      #else:
-          -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
-      #end if
-  </command>
-  <inputs>
-    <param format="interval,gff" name="input1" type="data" label="For every interval in"/>
-    <param format="interval,gff" name="input2" type="data" label="Fetch closest feature(s) from"/>
-    <param name="direction" type="select" label="Located">
-      <option value="Either">Either Upstream or Downstream</option>
-      <option value="Both">Both Upstream and Downstream</option>
-      <option value="Upstream">Upstream</option>
-      <option value="Downstream">Downstream</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="4_windows.bed"/>
-      <param name="input2" value="4_windows_2.bed"/>
-      <param name="direction" value="Either"/>
-      <output name="out_file1" file="closest_features_either.interval"/>
-    </test>
-    <test>
-      <param name="input1" value="4_windows.bed"/>
-      <param name="input2" value="4_windows_2.bed"/>
-      <param name="direction" value="Both"/>
-      <output name="out_file1" file="closest_features.interval"/>
-    </test>
-    <test>
-      <param name="input1" value="4_windows.bed"/>
-      <param name="input2" value="4_windows_2.bed"/>
-      <param name="direction" value="Upstream"/>
-      <output name="out_file1" file="closest_features_up.interval"/>
-    </test>
-    <test>
-      <param name="input1" value="4_windows.bed"/>
-      <param name="input2" value="4_windows_2.bed"/>
-      <param name="direction" value="Downstream"/>
-      <output name="out_file1" file="closest_features_down.interval"/>
-    </test>
-    <test>
-      <param name="input1" value="4_windows.bed"/>
-      <param name="input2" value="4_windows_3.bed"/>
-      <param name="direction" value="Both"/>
-      <output name="out_file1" file="closest_features_both.interval"/>
-    </test>
-    <!-- Tests for GFF functionality. -->
-
-    <test>
-      <param name="input1" value="4_windows.bed"/>
-      <param name="input2" value="4_windows_2.gff"/>
-      <param name="direction" value="Either"/>
-      <output name="out_file1" file="closest_features_both.gff"/>
-    </test>
-    <test>
-      <param name="input1" value="4_windows.gff"/>
-      <param name="input2" value="4_windows_2.gff"/>
-      <param name="direction" value="Either"/>
-      <output name="out_file1" file="closest_features_both2.gff"/>
-    </test>
-
-  </tests>
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-For every interval in the **interval** dataset, this tool fetches the **closest non-overlapping** upstream and / or downstream features from the **features** dataset.
-
------
-
-.. class:: warningmark
-
-**Note:**
-
-Every line should contain at least 3 columns: chromosome number, start and stop coordinates. If any of these columns is missing or if start and stop coordinates are not numerical, the lines will be treated as invalid and skipped. The number of skipped lines is documented in the resulting history item as a "data issue".
-
-If the strand column is missing from your input interval dataset, the intervals will be considered to be on positive strand. You can add a strand column to your input dataset by using the *Text Manipulation->Add column* tool.
-
-For GFF files, features are added as a GTF-style attribute at the end of the line.
-
------
-
-**Example**
-
-If the **intervals** are::
-
-   chr1 10   100  Query1.1
-   chr1 500  1000 Query1.2
-   chr1 1100 1250 Query1.3
-
-and the **features** are::
-
-   chr1 120  180  Query2.1
-   chr1 140  200  Query2.2
-   chr1 580  1050 Query2.3
-   chr1 2000 2204 Query2.4
-   chr1 2500 3000 Query2.5
-
-Running this tool for **Both Upstream and Downstream** will return::
-
-   chr1 10   100  Query1.1 chr1 120  180  Query2.1
-   chr1 500  1000 Query1.2 chr1 140  200  Query2.2
-   chr1 500  1000 Query1.2 chr1 2000 2204 Query2.4
-   chr1 1100 1250 Query1.3 chr1 580  1050 Query2.3
-   chr1 1100 1250 Query1.3 chr1 2000 2204 Query2.4
-
-</help>
-
-
-</tool>
\ No newline at end of file
--- a/tools/new_operations/get_flanks.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-#!/usr/bin/env python
-#Done by: Guru
-
-"""
-Get Flanking regions.
-
-usage: %prog input out_file size direction region
-   -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
-   -o, --off=N: Offset
-"""
-
-import sys, re, os
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    try:
-        if int( sys.argv[3] ) < 0:
-            raise Exception
-    except:
-        stop_err( "Length of flanking region(s) must be a non-negative integer." )
-
-    # Parsing Command Line here
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
-        inp_file, out_file, size, direction, region = args
-        if strand_col_1 <= 0:
-            strand = "+"        #if strand is not defined, default it to +
-    except:
-        stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." )
-    try:
-        offset = int(options.off)
-        size = int(size)
-    except:
-        stop_err( "Invalid offset or length entered. Try again by entering valid integer values." )
-
-    fo = open(out_file,'w')
-
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_line = None
-    elems = []
-    j=0
-    for i, line in enumerate( file( inp_file ) ):
-        line = line.strip()
-        if line and (not line.startswith( '#' )) and line != '':
-            j+=1
-            try:
-                elems = line.split('\t')
-                #if the start and/or end columns are not numbers, skip that line.
-                assert int(elems[start_col_1])
-                assert int(elems[end_col_1])
-                if strand_col_1 != -1:
-                    strand = elems[strand_col_1]
-                #if the stand value is not + or -, skip that line.
-                assert strand in ['+', '-']
-                if direction == 'Upstream':
-                    if strand == '+':
-                        if region == 'end':
-                            elems[end_col_1] = str(int(elems[end_col_1]) + offset)
-                            elems[start_col_1] = str( int(elems[end_col_1]) - size )
-                        else:
-                            elems[end_col_1] = str(int(elems[start_col_1]) + offset)
-                            elems[start_col_1] = str( int(elems[end_col_1]) - size )
-                    elif strand == '-':
-                        if region == 'end':
-                            elems[start_col_1] = str(int(elems[start_col_1]) - offset)
-                            elems[end_col_1] = str(int(elems[start_col_1]) + size)
-                        else:
-                            elems[start_col_1] = str(int(elems[end_col_1]) - offset)
-                            elems[end_col_1] = str(int(elems[start_col_1]) + size)
-                    assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                    fo.write( "%s\n" % '\t'.join( elems ) )
-
-                elif direction == 'Downstream':
-                    if strand == '-':
-                        if region == 'start':
-                           elems[end_col_1] = str(int(elems[end_col_1]) - offset)
-                           elems[start_col_1] = str( int(elems[end_col_1]) - size )
-                        else:
-                           elems[end_col_1] = str(int(elems[start_col_1]) - offset)
-                           elems[start_col_1] = str( int(elems[end_col_1]) - size )
-                    elif strand == '+':
-                        if region == 'start':
-                            elems[start_col_1] = str(int(elems[start_col_1]) + offset)
-                            elems[end_col_1] = str(int(elems[start_col_1]) + size)
-                        else:
-                            elems[start_col_1] = str(int(elems[end_col_1]) + offset)
-                            elems[end_col_1] = str(int(elems[start_col_1]) + size)
-                    assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                    fo.write( "%s\n" % '\t'.join( elems ) )
-
-                elif direction == 'Both':
-                    if strand == '-':
-                        if region == 'start':
-                            start = str(int(elems[end_col_1]) - offset)
-                            end1 = str(int(start) + size)
-                            end2 = str(int(start) - size)
-                            elems[start_col_1]=start
-                            elems[end_col_1]=end1
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                            elems[start_col_1]=end2
-                            elems[end_col_1]=start
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                        elif region == 'end':
-                            start = str(int(elems[start_col_1]) - offset)
-                            end1 = str(int(start) + size)
-                            end2 = str(int(start) - size)
-                            elems[start_col_1]=start
-                            elems[end_col_1]=end1
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                            elems[start_col_1]=end2
-                            elems[end_col_1]=start
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                        else:
-                            start1 = str(int(elems[end_col_1]) - offset)
-                            end1 = str(int(start1) + size)
-                            start2 = str(int(elems[start_col_1]) - offset)
-                            end2 = str(int(start2) - size)
-                            elems[start_col_1]=start1
-                            elems[end_col_1]=end1
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                            elems[start_col_1]=end2
-                            elems[end_col_1]=start2
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                    elif strand == '+':
-                        if region == 'start':
-                            start = str(int(elems[start_col_1]) + offset)
-                            end1 = str(int(start) - size)
-                            end2 = str(int(start) + size)
-                            elems[start_col_1]=end1
-                            elems[end_col_1]=start
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                            elems[start_col_1]=start
-                            elems[end_col_1]=end2
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                        elif region == 'end':
-                            start = str(int(elems[end_col_1]) + offset)
-                            end1 = str(int(start) - size)
-                            end2 = str(int(start) + size)
-                            elems[start_col_1]=end1
-                            elems[end_col_1]=start
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                            elems[start_col_1]=start
-                            elems[end_col_1]=end2
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                        else:
-                            start1 = str(int(elems[start_col_1]) + offset)
-                            end1 = str(int(start1) - size)
-                            start2 = str(int(elems[end_col_1]) + offset)
-                            end2 = str(int(start2) + size)
-                            elems[start_col_1]=end1
-                            elems[end_col_1]=start1
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-                            elems[start_col_1]=start2
-                            elems[end_col_1]=end2
-                            assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
-                            fo.write( "%s\n" % '\t'.join( elems ) )
-            except:
-                skipped_lines += 1
-                if not invalid_line:
-                    first_invalid_line = i + 1
-                    invalid_line = line
-    fo.close()
-
-    if skipped_lines == j:
-        stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
-    if skipped_lines > 0:
-        print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
-    print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/get_flanks.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-<tool id="get_flanks1" name="Get flanks">
-  <description>returns flanking region/s for every gene</description>
-  <command interpreter="python">get_flanks.py $input $out_file1 $size $direction $region -o $offset -l ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol}</command>
-  <inputs>
-    <param format="interval" name="input" type="data" label="Select data"/>
-    <param name="region" type="select" label="Region">
-      <option value="whole" selected="true">Whole feature</option>
-      <option value="start">Around Start</option>
-      <option value="end">Around End</option>
-    </param>
-    <param name="direction" type="select" label="Location of the flanking region/s">
-      <option value="Upstream">Upstream</option>
-      <option value="Downstream">Downstream</option>
-      <option value="Both">Both</option>
-    </param>
-    <param name="offset" size="10" type="integer" value="0" label="Offset" help="Use positive values to offset co-ordinates in the direction of transcription and negative values to offset in the opposite direction."/>
-    <param name="size" size="10" type="integer" value="50" label="Length of the flanking region(s)" help="Use non-negative value for length"/>
-
-
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="flanks_inp.bed"/>
-      <param name="offset" value="-500"/>
-      <param name="size" value="1000"/>
-      <param name="direction" value="Both"/>
-      <param name="region" value="whole"/>
-      <output name="out_file1" file="flanks_out1.bed"/>
-    </test>
-    <test>
-      <param name="input" value="flanks_inp.bed"/>
-      <param name="offset" value="200"/>
-      <param name="size" value="1000"/>
-      <param name="direction" value="Downstream"/>
-      <param name="region" value="start" />
-      <output name="out_file1" file="flanks_out2.bed"/>
-    </test>
-  </tests>
- <help>
-
-This tool finds the upstream and/or downstream flanking region(s) of all the selected regions in the input file.
-
-**Note:** Every line should contain at least 3 columns: Chromosome number, Start and Stop co-ordinates. If any of these columns is missing or if start and stop co-ordinates are not numerical, the tool may encounter exceptions and such lines are skipped as invalid. The number of invalid skipped lines is documented in the resulting history item as a "Data issue".
-
------
-
-
-**Example 1**
-
-- For the following query::
-
-   chr22  1000  7000  NM_174568 0 +
-
-- running get flanks with Region: Around start, Offset: -200, Flank-length: 300 and Location: Upstream will return **(Red: Query positive strand; Blue: Flanks output)**::
-
-   chr22  500  800  NM_174568 0 +
-
-.. image:: ./static/operation_icons/flanks_ex1.gif
-
-**Example 2**
-
-- For the following query::
-
-   chr22  1000  7000  NM_028946 0 -
-
-- running get flanks with Region: Whole, Offset: 200, Flank-length: 300 and Location: Downstream will return **(Orange: Query negative strand; Magenta: Flanks output)**::
-
-   chr22  500  800  NM_028946 0 -
-
-.. image:: ./static/operation_icons/flanks_ex2.gif
-
-</help>
-
-
-</tool>
\ No newline at end of file
--- a/tools/new_operations/gops_basecoverage.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-"""
-Count total base coverage.
-
-usage: %prog in_file out_file
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.base_coverage import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        in_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col = strand_col_1,
-                            fix_strand=True )
-
-    try:
-        bases = base_coverage(g1)
-    except ParseError, exc:
-        fail( "Invalid file format: %s" % str( exc ) )
-    out_file = open( out_fname, "w" )
-    out_file.write( "%s\n" % str( bases ) )
-    out_file.close()
-    if g1.skipped > 0:
-        print skipped( g1, filedesc="" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_cluster.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-#!/usr/bin/env python
-"""
-Cluster regions of intervals.
-
-usage: %prog in_file out_file
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in file
-    -d, --distance=N: Maximum distance between clustered intervals
-    -v, --overlap=N: Minimum overlap require (negative distance)
-    -m, --minregions=N: Minimum regions per cluster
-    -o, --output=N: 1)merged 2)filtered 3)clustered 4) minimum 5) maximum
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.find_clusters import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    distance = 0
-    minregions = 2
-    output = 1
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        if options.distance: distance = int( options.distance )
-        if options.overlap: distance = -1 * int( options.overlap )
-        if options.output: output = int( options.output )
-        if options.minregions: minregions = int( options.minregions )
-        in_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-
-    # Get the cluster tree
-    try:
-        clusters, extra = find_clusters( g1, mincols=distance, minregions=minregions)
-    except ParseError, exc:
-        fail( "Invalid file format: %s" % str( exc ) )
-
-    f1 = open( in_fname, "r" )
-    out_file = open( out_fname, "w" )
-
-    # If "merge"
-    if output == 1:
-        fields = ["."  for x in range(max(g1.chrom_col, g1.start_col, g1.end_col)+1)]
-        for chrom, tree in clusters.items():
-            for start, end, lines in tree.getregions():
-                fields[g1.chrom_col] = chrom
-                fields[g1.start_col] = str(start)
-                fields[g1.end_col] = str(end)
-                out_file.write( "%s\n" % "\t".join( fields ) )
-
-    # If "filtered" we preserve order of file and comments, etc.
-    if output == 2:
-        linenums = dict()
-        for chrom, tree in clusters.items():
-            for linenum in tree.getlines():
-                linenums[linenum] = 0
-        linenum = -1
-        f1.seek(0)
-        for line in f1.readlines():
-            linenum += 1
-            if linenum in linenums or linenum in extra:
-                out_file.write( "%s\n" % line.rstrip( "\n\r" ) )
-
-    # If "clustered" we output original intervals, but near each other (i.e. clustered)
-    if output == 3:
-        linenums = list()
-        f1.seek(0)
-        fileLines = f1.readlines()
-        for chrom, tree in clusters.items():
-            for linenum in tree.getlines():
-                out_file.write( "%s\n" % fileLines[linenum].rstrip( "\n\r" ) )
-
-    # If "minimum" we output the smallest interval in each cluster
-    if output == 4 or output == 5:
-        linenums = list()
-        f1.seek(0)
-        fileLines = f1.readlines()
-        for chrom, tree in clusters.items():
-            regions = tree.getregions()
-            for start, end, lines in tree.getregions():
-                outsize = -1
-                outinterval = None
-                for line in lines:
-                    # three nested for loops?
-                    # should only execute this code once per line
-                    fileline = fileLines[line].rstrip("\n\r")
-                    try:
-                        cluster_interval = GenomicInterval( g1, fileline.split("\t"),
-                                                            g1.chrom_col,
-                                                            g1.start_col,
-                                                            g1.end_col,
-                                                            g1.strand_col,
-                                                            g1.default_strand,
-                                                            g1.fix_strand )
-                    except Exception, exc:
-                        print >> sys.stderr, str( exc )
-                        f1.close()
-                        sys.exit()
-                    interval_size = cluster_interval.end - cluster_interval.start
-                    if outsize == -1 or \
-                       ( outsize > interval_size and output == 4 ) or \
-                       ( outsize < interval_size and output == 5 ) :
-                        outinterval = cluster_interval
-                        outsize = interval_size
-                out_file.write( "%s\n" % outinterval )
-
-    f1.close()
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc="" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_complement.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-"""
-Complement regions.
-
-usage: %prog in_file out_file
-    -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in file
-    -l, --lengths=N: Filename of .len file for species (chromosome lengths)
-    -a, --all: Complement all chromosomes (Genome-wide complement)
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.complement import complement
-from bx.intervals.operations.subtract import subtract
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    allchroms = False
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        lengths = options.lengths
-        if options.all: allchroms = True
-        in_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-
-    lens = dict()
-    chroms = list()
-    # dbfile is used to determine the length of each chromosome.  The lengths
-    # are added to the lens dict and passed copmlement operation code in bx.
-    dbfile = fileinput.FileInput( lengths )
-
-    if dbfile:
-        if not allchroms:
-            try:
-                for line in dbfile:
-                    fields = line.split("\t")
-                    lens[fields[0]] = int(fields[1])
-            except:
-                # assume LEN doesn't exist or is corrupt somehow
-                pass
-        elif allchroms:
-            try:
-                for line in dbfile:
-                    fields = line.split("\t")
-                    end = int(fields[1])
-                    chroms.append("\t".join([fields[0],"0",str(end)]))
-            except:
-                pass
-
-    # Safety...if the dbfile didn't exist and we're on allchroms, then
-    # default to generic complement
-    if allchroms and len(chroms) == 0:
-        allchroms = False
-
-    if allchroms:
-        chromReader = GenomicIntervalReader(chroms)
-        generator = subtract([chromReader, g1])
-    else:
-        generator = complement(g1, lens)
-
-    out_file = open( out_fname, "w" )
-
-    try:
-        for interval in generator:
-            if type( interval ) is GenomicInterval:
-                out_file.write( "%s\n" % "\t".join( interval ) )
-            else:
-                out_file.write( "%s\n" % interval )
-    except ParseError, exc:
-        out_file.close()
-        fail( "Invalid file format: %s" % str( exc ) )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc="" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_concat.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-"""
-Concatenate two bed files.  The concatenated files are returned in the
-same format as the first.  If --sameformat is specified, then all
-columns will be treated as the same, and all fields will be saved,
-although the output will be trimmed to match the primary input.  In
-addition, if --sameformat is specified, missing fields will be padded
-with a period(.).
-
-usage: %prog in_file_1 in_file_2 out_file
-    -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in first file
-    -2, --cols2=N,N,N,N: Columns for chrom, start, end, strand in second file
-    -s, --sameformat: All files are precisely the same format.
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.concat import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    sameformat=False
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
-        if options.sameformat: sameformat = True
-        in_file_1, in_file_2, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in_file_1 ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            fix_strand=True )
-
-    g2 = NiceReaderWrapper( fileinput.FileInput( in_file_2 ),
-                            chrom_col=chr_col_2,
-                            start_col=start_col_2,
-                            end_col=end_col_2,
-                            strand_col=strand_col_2,
-                            fix_strand=True )
-
-    if strand_col_1 >= 0:
-        g1.strand_col = strand_col_1
-
-    out_file = open( out_fname, "w" )
-
-    try:
-        for line in concat( [g1, g2], sameformat=sameformat ):
-            if type( line ) is GenomicInterval:
-                out_file.write( "%s\n" % "\t".join( line.fields ) )
-            else:
-                out_file.write( "%s\n" % line )
-    except ParseError, exc:
-        out_file.close()
-        fail( "Invalid file format: %s" % str( exc ) )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
-    if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 2nd dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_coverage.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#!/usr/bin/env python
-"""
-Calculate coverage of one query on another, and append the coverage to
-the last two columns as bases covered and percent coverage.
-
-usage: %prog bed_file_1 bed_file_2 out_file
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.coverage import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
-        in_fname, in2_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-    g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
-                            chrom_col=chr_col_2,
-                            start_col=start_col_2,
-                            end_col=end_col_2,
-                            strand_col=strand_col_2,
-                            fix_strand=True )
-
-    out_file = open( out_fname, "w" )
-
-    try:
-        for line in coverage( [g1,g2] ):
-            if type( line ) is GenomicInterval:
-                out_file.write( "%s\n" % "\t".join( line.fields ) )
-            else:
-                out_file.write( "%s\n" % line )
-    except ParseError, exc:
-        out_file.close()
-        fail( "Invalid file format: %s" % str( exc ) )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
-    if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 2nd dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_intersect.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-"""
-Find regions of first interval file that overlap regions in a second interval file.
-Interval files can either be BED or GFF format.
-
-usage: %prog interval_file_1 interval_file_2 out_file
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
-    -m, --mincols=N: Require this much overlap (default 1bp)
-    -p, --pieces: just print pieces of second set (after padding)
-    -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
-    -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.intersect import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-from galaxy.datatypes.util.gff_util import GFFFeature, GFFReaderWrapper, convert_bed_coords_to_gff
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    mincols = 1
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
-        if options.mincols: mincols = int( options.mincols )
-        pieces = bool( options.pieces )
-        in1_gff_format = bool( options.gff1 )
-        in2_gff_format = bool( options.gff2 )
-        in_fname, in2_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    # Set readers to handle either GFF or default format.
-    if in1_gff_format:
-        in1_reader_wrapper = GFFReaderWrapper
-    else:
-        in1_reader_wrapper = NiceReaderWrapper
-    if in2_gff_format:
-        in2_reader_wrapper = GFFReaderWrapper
-    else:
-        in2_reader_wrapper = NiceReaderWrapper
-
-    g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-    if in1_gff_format:
-        # Intersect requires coordinates in BED format.
-        g1.convert_to_bed_coord=True
-    g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
-                            chrom_col=chr_col_2,
-                            start_col=start_col_2,
-                            end_col=end_col_2,
-                            strand_col=strand_col_2,
-                            fix_strand=True )
-    if in2_gff_format:
-        # Intersect requires coordinates in BED format.
-        g2.convert_to_bed_coord=True
-
-    out_file = open( out_fname, "w" )
-    try:
-        for feature in intersect( [g1,g2], pieces=pieces, mincols=mincols ):
-            if isinstance( feature, GFFFeature ):
-                # Convert back to GFF coordinates since reader converted automatically.
-                convert_bed_coords_to_gff( feature )
-                for interval in feature.intervals:
-                    out_file.write( "%s\n" % "\t".join( interval.fields ) )
-            elif isinstance( feature, GenomicInterval ):
-                out_file.write( "%s\n" % "\t".join( feature.fields ) )
-            else:
-                out_file.write( "%s\n" % feature )
-    except ParseError, e:
-        out_file.close()
-        fail( "Invalid file format: %s" % str( e ) )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
-    if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 2nd dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_join.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-"""
-Join two sets of intervals using their overlap as the key.
-
-usage: %prog bed_file_1 bed_file_2 out_file
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
-    -m, --mincols=N: Require this much overlap (default 1bp)
-    -f, --fill=N: none, right, left, both
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.join import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    mincols = 1
-    upstream_pad = 0
-    downstream_pad = 0
-    leftfill = False
-    rightfill = False
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
-        if options.mincols: mincols = int( options.mincols )
-        if options.fill:
-            if options.fill == "both":
-                rightfill = leftfill = True
-            else:
-                rightfill = options.fill == "right"
-                leftfill = options.fill == "left"
-        in_fname, in2_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-    g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
-                            chrom_col=chr_col_2,
-                            start_col=start_col_2,
-                            end_col=end_col_2,
-                            strand_col=strand_col_2,
-                            fix_strand=True )
-
-    out_file = open( out_fname, "w" )
-
-    try:
-        for outfields in join(g1, g2, mincols=mincols, rightfill=rightfill, leftfill=leftfill):
-            if type( outfields ) is list:
-                out_file.write( "%s\n" % "\t".join( outfields ) )
-            else:
-                out_file.write( "%s\n" % outfields )
-    except ParseError, exc:
-        out_file.close()
-        fail( "Invalid file format: %s" % str( exc ) )
-    except MemoryError:
-        out_file.close()
-        fail( "Input datasets were too large to complete the join operation." )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
-    if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 2nd dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_merge.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-"""
-Merge overlaping regions.
-
-usage: %prog in_file out_file
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-    -m, --mincols=N: Require this much overlap (default 1bp)
-    -3, --threecol: Output 3 column bed
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.merge import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    mincols = 1
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        if options.mincols: mincols = int( options.mincols )
-        in_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col = strand_col_1,
-                            fix_strand=True )
-
-    out_file = open( out_fname, "w" )
-
-    try:
-        for line in merge(g1,mincols=mincols):
-            if options.threecol:
-                if type( line ) is GenomicInterval:
-                    out_file.write( "%s\t%s\t%s\n" % ( line.chrom, str( line.startCol ), str( line.endCol ) ) )
-                elif type( line ) is list:
-                    out_file.write( "%s\t%s\t%s\n" % ( line[chr_col_1], str( line[start_col_1] ), str( line[end_col_1] ) ) )
-                else:
-                    out_file.write( "%s\n" % line )
-            else:
-                if type( line ) is GenomicInterval:
-                    out_file.write( "%s\n" % "\t".join( line.fields ) )
-                elif type( line ) is list:
-                    out_file.write( "%s\n" % "\t".join( line ) )
-                else:
-                    out_file.write( "%s\n" % line )
-    except ParseError, exc:
-        out_file.close()
-        fail( "Invalid file format: %s" % str( exc ) )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/gops_subtract.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-"""
-Find regions of first interval file that do not overlap regions in a second
-interval file. Interval files can either be BED or GFF format.
-
-usage: %prog interval_file_1 interval_file_2 out_file
-    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
-    -m, --mincols=N: Require this much overlap (default 1bp)
-    -p, --pieces: just print pieces of second set (after padding)
-    -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
-    -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals import *
-from bx.intervals.io import *
-from bx.intervals.operations.subtract import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-from galaxy.datatypes.util.gff_util import GFFFeature, GFFReaderWrapper, convert_bed_coords_to_gff
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    mincols = 1
-    upstream_pad = 0
-    downstream_pad = 0
-
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
-        if options.mincols: mincols = int( options.mincols )
-        pieces = bool( options.pieces )
-        in1_gff_format = bool( options.gff1 )
-        in2_gff_format = bool( options.gff2 )
-        in_fname, in2_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-
-    # Set readers to handle either GFF or default format.
-    if in1_gff_format:
-        in1_reader_wrapper = GFFReaderWrapper
-    else:
-        in1_reader_wrapper = NiceReaderWrapper
-    if in2_gff_format:
-        in2_reader_wrapper = GFFReaderWrapper
-    else:
-        in2_reader_wrapper = NiceReaderWrapper
-
-    g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-    if in1_gff_format:
-        # Subtract requires coordinates in BED format.
-        g1.convert_to_bed_coord=True
-
-    g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
-                            chrom_col=chr_col_2,
-                            start_col=start_col_2,
-                            end_col=end_col_2,
-                            strand_col=strand_col_2,
-                            fix_strand=True )
-    if in2_gff_format:
-        # Subtract requires coordinates in BED format.
-        g2.convert_to_bed_coord=True
-
-    out_file = open( out_fname, "w" )
-    try:
-        for feature in subtract( [g1,g2], pieces=pieces, mincols=mincols ):
-            if isinstance( feature, GFFFeature ):
-                # Convert back to GFF coordinates since reader converted automatically.
-                convert_bed_coords_to_gff( feature )
-                for interval in feature.intervals:
-                    out_file.write( "%s\n" % "\t".join( interval.fields ) )
-            elif isinstance( feature, GenomicInterval ):
-                out_file.write( "%s\n" % "\t".join( feature.fields ) )
-            else:
-                out_file.write( "%s\n" % feature )
-    except ParseError, exc:
-        out_file.close()
-        fail( "Invalid file format: %s" % str( exc ) )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 2nd dataset" )
-    if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 1st dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/intersect.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-<tool id="gops_intersect_1" name="Intersect">
-  <description>the intervals of two datasets</description>
-  <command interpreter="python">gops_intersect.py
-      $input1 $input2 $output
-
-      #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -1 1,4,5,7 --gff1
-      #else:
-        -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
-      #end if
-
-      #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -2 1,4,5,7 --gff2
-      #else:
-          -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
-      #end if
-
-      -m $min $returntype
-  </command>
-  <inputs>
-      <param name="returntype" type="select" label="Return" help="(see figure below)">
-          <option value="">Overlapping Intervals</option>
-          <option value="-p">Overlapping pieces of Intervals</option>
-      </param>
-      <param format="interval,gff" name="input1" type="data" help="First dataset">
-          <label>of</label>
-      </param>
-      <param format="interval,gff" name="input2" type="data" help="Second dataset">
-          <label>that intersect</label>
-      </param>
-      <param name="min" size="4" type="integer" value="1" min="1" help="(bp)">
-          <label>for at least</label>
-      </param>
-  </inputs>
-  <outputs>
-      <data format="input" name="output" metadata_source="input1"/>
-  </outputs>
-  <code file="operation_filter.py"/>
-  <trackster_conf/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_out.bed" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2_mod.bed" ftype="interval"/>
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_diffCols.bed" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2_mod.bed" ftype="interval"/>
-      <param name="min" value="1" />
-      <param name="returntype" value="Overlapping pieces of Intervals" />
-      <output name="output" file="gops_intersect_p_diffCols.bed" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="10" />
-      <param name="returntype" value="Overlapping pieces of Intervals" />
-      <output name="output" file="gops_intersect_p_out.bed" />
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint.interval" ftype="interval" />
-      <param name="input2" value="gops_bigint2.interval" ftype="interval" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_bigint_out.interval" />
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint2.interval" ftype="interval" />
-      <param name="input2" value="gops_bigint.interval" ftype="interval" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_bigint_out.interval" />
-    </test>
-    <test>
-      <param name="input1" value="12.bed" ftype="bed" />
-      <param name="input2" value="1.bed" ftype="bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_no_strand_out.bed" />
-    </test>
-    <!-- Intersect two GFF files. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.gff" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_intersect_out2.gff" />
-    </test>
-    <!-- Intersect GFF file and bed file. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.bed" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_intersect_out2.gff" />
-    </test>
-
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Syntax**
-
-- **Where overlap is at least** sets the minimum length (in base pairs) of overlap between elements of the two datasets
-- **Overlapping Intervals** returns entire intervals from the first dataset  that overlap the second dataset.  The returned intervals are completely unchanged, and this option only filters out intervals that do not overlap with the second dataset.
-- **Overlapping pieces of Intervals** returns intervals that indicate the exact base pair overlap between the first dataset and the second dataset.  The intervals returned are from the first dataset, and all fields besides start and end are guaranteed to remain unchanged.
-
------
-
-**Example**
-
-.. image:: ./static/operation_icons/gops_intersect.gif
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/join.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-<tool id="gops_join_1" name="Join">
-  <description>the intervals of two datasets side-by-side</description>
-  <command interpreter="python">gops_join.py $input1 $input2 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} -m $min -f $fill</command>
-  <inputs>
-    <param format="interval" name="input1" type="data" help="First dataset">
-      <label>Join</label>
-    </param>
-    <param format="interval" name="input2" type="data" help="Second dataset">
-      <label>with</label>
-    </param>
-    <param name="min" size="4" type="integer" value="1" help="(bp)">
-      <label>with min overlap</label>
-    </param>
-  <param name="fill" type="select" label="Return">
-    <option value="none">Only records that are joined (INNER JOIN)</option>
-    <option value="right">All records of first dataset (fill null with ".")</option>
-    <option value="left">All records of second dataset (fill null with ".")</option>
-    <option value="both">All records of both datasets (fill nulls with ".")</option>
-  </param>
-   </inputs>
-  <outputs>
-    <data format="interval" name="output" metadata_source="input1" />
-  </outputs>
-  <code file="operation_filter.py"/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="fill" value="none" />
-      <output name="output" file="gops-join-none.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="fill" value="right" />
-      <output name="output" file="gops-join-right.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="fill" value="left" />
-      <output name="output" file="gops-join-left.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="fill" value="both" />
-      <output name="output" file="gops-join-both.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="500" />
-      <param name="fill" value="none" />
-      <output name="output" file="gops-join-none-500.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="100" />
-      <param name="fill" value="both" />
-      <output name="output" file="gops-join-both-100.dat" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Syntax**
-
-- **Where overlap** specifies the minimum overlap between intervals that allows them to be joined.
-- **Return only records that are joined** returns only the records of the first dataset that join to a record in the second dataset.  This is analogous to an INNER JOIN.
-- **Return all records of first dataset (fill null with &quot;.&quot;)** returns all intervals of the first dataset, and any intervals that do not join an interval from the second dataset are filled in with a period(.).  This is analogous to a LEFT JOIN.
-- **Return all records of second dataset (fill null with &quot;.&quot;)** returns all intervals of the second dataset, and any intervals that do not join an interval from the first dataset are filled in with a period(.).  **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**
-- **Return all records of both datasets (fill nulls with &quot;.&quot;)** returns all records from both datasets, and fills on either the right or left with periods.  **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**
-
------
-
-**Example**
-
-If **First dataset** is::
-
-   chr1 10   100  Query1.1
-   chr1 500  1000 Query1.2
-   chr1 1100 1250 Query1.3
-
-and **Second dataset** is::
-
-   chr1 20   80   Query2.1
-   chr1 2000 2204 Query2.2
-   chr1 2500 3000 Query2.3
-
-
-The four return options will generate:
-
-
-- **Return only records that are joined**::
-
-   chr1 10 100 Query1.1 chr1 20 80 Query2.1
-
-- **Return all records of first dataset**::
-
-   chr1 10   100  Query1.1 chr1 20 80 Query2.1
-   chr1 500  1000 Query1.2 .    .  .  .
-   chr1 1100 1250 Query1.3 .    .  .  .
-
-- **Return all records of second dataset**::
-
-   chr1 10 100 Query1.1 chr1 20   80   Query2.1
-   .    .  .   .        chr1 2000 2204 Query2.2
-   .    .  .   .        chr1 2500 3000 Query2.3
-
-- **Return all records of both datasets**::
-
-   chr1 10   100  Query1.1 chr1 20   80   Query2.1
-   chr1 500  1000 Query1.2 .    .    .    .
-   chr1 1100 1250 Query1.3 .    .    .    .
-   .    .    .    .        chr1 2000 2204 Query2.2
-   .    .    .    .        chr1 2500 3000 Query2.3
-
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/merge.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-<tool id="gops_merge_1" name="Merge">
-  <description>the overlapping intervals of a dataset</description>
-  <command interpreter="python">gops_merge.py $input1 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} $returntype</command>
-  <inputs>
-    <param format="interval" name="input1" type="data">
-      <label>Merge overlaping regions of</label>
-    </param>
-    <param name="returntype" type="boolean" truevalue="-3" falsevalue="">
-      <label>Output 3 column bed</label>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input1" />
-  </outputs>
-  <code file="operation_filter.py">
-    <hook exec_after_process="exec_after_merge" />
-  </code>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <output name="output" file="gops-merge.dat" />
-      <param name="returntype" value="true" />
-    </test>
-    <test>
-      <param name="input1" value="2_mod.bed" ftype="interval"/>
-      <output name="output" file="gops_merge_diffCols.dat" />
-      <param name="returntype" value="true" />
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint.interval" />
-      <output name="output" file="gops_merge_out2.bed" />
-      <param name="returntype" value="true" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-This operation merges all overlapping intervals into single intervals.
-
-**Example**
-
-.. image:: ./static/operation_icons/gops_merge.gif
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/operation_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-# runs after the job (and after the default post-filter)
-import os
-from galaxy import eggs
-from galaxy import jobs
-from galaxy.tools.parameters import DataToolParameter
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-#def exec_before_process(app, inp_data, out_data, param_dict, tool=None):
-#    """Sets the name of the data"""
-#    dbkeys = sets.Set( [data.dbkey for data in inp_data.values() ] )
-#    if len(dbkeys) != 1:
-#        raise Exception, '<p><font color="yellow">Both Queries must be from the same genome build</font></p>'
-
-def validate_input( trans, error_map, param_values, page_param_map ):
-    dbkeys = set()
-    data_param_names = set()
-    data_params = 0
-    for name, param in page_param_map.iteritems():
-        if isinstance( param, DataToolParameter ):
-            # for each dataset parameter
-            if param_values.get(name, None) != None:
-                dbkeys.add( param_values[name].dbkey )
-                data_params += 1
-                # check meta data
-                try:
-                    param = param_values[name]
-                    if isinstance( param.datatype, trans.app.datatypes_registry.get_datatype_by_extension( 'gff' ).__class__ ):
-                        # TODO: currently cannot validate GFF inputs b/c they are not derived from interval.
-                        pass
-                    else: # Validate interval datatype.
-                        startCol = int( param.metadata.startCol )
-                        endCol = int( param.metadata.endCol )
-                        chromCol = int( param.metadata.chromCol )
-                        if param.metadata.strandCol is not None:
-                            strandCol = int ( param.metadata.strandCol )
-                        else:
-                            strandCol = 0
-                except:
-                    error_msg = "The attributes of this dataset are not properly set. " + \
-                    "Click the pencil icon in the history item to set the chrom, start, end and strand columns."
-                    error_map[name] = error_msg
-            data_param_names.add( name )
-    if len( dbkeys ) > 1:
-        for name in data_param_names:
-            error_map[name] = "All datasets must belong to same genomic build, " \
-                "this dataset is linked to build '%s'" % param_values[name].dbkey
-    if data_params != len(data_param_names):
-        for name in data_param_names:
-            error_map[name] = "A dataset of the appropriate type is required"
-
-# Commented out by INS, 5/30/2007.  What is the PURPOSE of this?
-def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    """Verify the output data after each run"""
-    items = out_data.items()
-
-    for name, data in items:
-        try:
-            if stderr and len( stderr ) > 0:
-                raise Exception( stderr )
-
-        except Exception, exc:
-            data.blurb = jobs.JOB_ERROR
-            data.state = jobs.JOB_ERROR
-
-## def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-##     pass
-
-
-def exec_after_merge(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    exec_after_process(
-        app, inp_data, out_data, param_dict, tool=tool, stdout=stdout, stderr=stderr)
-
-    # strip strand column if clusters were merged
-    items = out_data.items()
-    for name, data in items:
-        if param_dict['returntype'] == True:
-            data.metadata.chromCol = 1
-            data.metadata.startCol = 2
-            data.metadata.endCol = 3
-        # merge always clobbers strand
-        data.metadata.strandCol = None
-
-
-def exec_after_cluster(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    exec_after_process(
-        app, inp_data, out_data, param_dict, tool=tool, stdout=stdout, stderr=stderr)
-
-    # strip strand column if clusters were merged
-    if param_dict["returntype"] == '1':
-        items = out_data.items()
-        for name, data in items:
-            data.metadata.strandCol = None
--- a/tools/new_operations/subtract.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-<tool id="gops_subtract_1" name="Subtract">
-  <description>the intervals of two datasets</description>
-  <command interpreter="python">gops_subtract.py
-      $input1 $input2 $output
-
-      #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -1 1,4,5,7 --gff1
-      #else:
-        -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
-      #end if
-
-      #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -2 1,4,5,7 --gff2
-      #else:
-          -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
-      #end if
-
-      -m $min $returntype
-  </command>
-  <inputs>
-    <param format="interval,gff" name="input2" type="data" help="Second dataset">
-      <label>Subtract</label>
-    </param>
-
-    <param format="interval,gff" name="input1" type="data" help="First dataset">
-      <label>from</label>
-    </param>
-
-    <param name="returntype" type="select" label="Return" help="of the first dataset (see figure below)">
-      <option value="">Intervals with no overlap</option>
-      <option value="-p">Non-overlapping pieces of intervals</option>
-    </param>
-
-    <param name="min" size="4" type="integer" value="1" min="1" help="(bp)">
-      <label>where minimal overlap is</label>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input1"/>
-  </outputs>
-  <code file="operation_filter.py"/>
-  <trackster_conf/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops-subtract.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2_mod.bed" ftype="interval"/>
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_subtract_diffCols.dat" />
-    </test>
-    <test>
-      <param name="input1" value="gops_subtract_bigint.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops-subtract.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="10" />
-      <param name="returntype" value="Non-overlapping pieces of intervals" />
-      <output name="output" file="gops-subtract-p.dat" />
-    </test>
-    <!-- Subtract two GFF files. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.gff" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_subtract_out1.gff" />
-    </test>
-    <!-- Subtract BED file from GFF file. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.bed" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_subtract_out1.gff" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Syntax**
-
-- **Where overlap is at least** sets the minimum length (in base pairs) of overlap between elements of the two datasets.
-- **Intervals with no overlap** returns entire intervals from the first dataset that do not overlap the second dataset.  The returned intervals are completely unchanged, and this option only filters out intervals that overlap with the second dataset.
-- **Non-overlapping pieces of intervals** returns intervals from the first dataset that have the intervals from the second dataset removed.  Any overlapping base pairs are removed from the range of the interval.  All fields besides start and end are guaranteed to remain unchanged.
-
------
-
-**Example**
-
-.. image:: ./static/operation_icons/gops_subtract.gif
-
-</help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/subtract_query.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-# Greg Von Kuster
-
-"""
-Subtract an entire query from another query
-usage: %prog in_file_1 in_file_2 begin_col end_col output
-"""
-import sys, re
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def get_lines(fname, begin_col='', end_col=''):
-    lines = set([])
-    i = 0
-    for i, line in enumerate(file(fname)):
-        line = line.rstrip('\r\n')
-        if line and not line.startswith('#'):
-            if begin_col and end_col:
-                """Both begin_col and end_col must be integers at this point."""
-                try:
-                    line = line.split('\t')
-                    line = '\t'.join([line[j] for j in range(begin_col-1, end_col)])
-                    lines.add( line )
-                except: pass
-            else:
-                lines.add( line )
-    if i: return (i+1, lines)
-    else: return (i, lines)
-
-def main():
-
-    # Parsing Command Line here
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        inp1_file, inp2_file, begin_col, end_col, out_file = args
-    except:
-        doc_optparse.exception()
-
-    begin_col = begin_col.strip()
-    end_col = end_col.strip()
-
-    if begin_col != 'None' or end_col != 'None':
-        """
-        The user selected columns for restriction.  We'll allow default
-        values for both begin_col and end_col as long as the user selected
-        at least one of them for restriction.
-        """
-        if begin_col == 'None':
-            begin_col = end_col
-        elif end_col == 'None':
-            end_col = begin_col
-        begin_col = int(begin_col)
-        end_col = int(end_col)
-        """Make sure that begin_col <= end_col (switch if not)"""
-        if begin_col > end_col:
-            tmp_col = end_col
-            end_col = begin_col
-            begin_col = tmp_col
-    else:
-        begin_col = end_col = ''
-
-    try:
-        fo = open(out_file,'w')
-    except:
-        print >> sys.stderr, "Unable to open output file"
-        sys.exit()
-
-    """
-    len1 is the number of lines in inp1_file
-    lines1 is the set of unique lines in inp1_file
-    diff1 is the number of duplicate lines removed from inp1_file
-    """
-    len1, lines1 = get_lines(inp1_file, begin_col, end_col)
-    diff1 = len1 - len(lines1)
-    len2, lines2 = get_lines(inp2_file, begin_col, end_col)
-
-    lines1.difference_update(lines2)
-    """lines1 is now the set of unique lines in inp1_file - the set of unique lines in inp2_file"""
-
-    for line in lines1:
-        print >> fo, line
-
-    fo.close()
-
-    info_msg = 'Subtracted %d lines. ' %((len1 - diff1) - len(lines1))
-
-    if begin_col and end_col:
-        info_msg += 'Restricted to columns c' + str(begin_col) + ' thru c' + str(end_col) + '. '
-
-    if diff1 > 0:
-        info_msg += 'Eliminated %d duplicate/blank/comment/invalid lines from first query.' %diff1
-
-    print info_msg
-
-if __name__ == "__main__":
-    main()
--- a/tools/new_operations/subtract_query.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-<tool id="subtract_query1" name="Subtract Whole Dataset">
-  <description>from another dataset</description>
-  <command interpreter="python">subtract_query.py $input1 $input2 $begin_col $end_col $output</command>
-  <inputs>
-    <param format="txt" name="input2" type="data" label="Subtract" help="Second dataset" />
-    <param format="txt" name="input1" type="data" label="from" help="First dataset" />
-    <param name="begin_col" type="data_column" data_ref="input1" force_select="False" label="Restrict subtraction between 'begin column'" />
-    <param name="end_col" type="data_column" data_ref="input1" force_select="False" label="and 'end column'" help="Specifying columns for restricting subtraction is available only for tabular formatted datasets" />
-  </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input1" />
-  </outputs>
-  <tests>
-  	<!-- Subtract 2 non-tabular files with no column restrictions. -->
-  	<!-- Cannot figure out why this test won't pass, it works in real time...
-    <test>
-      <param name="input1" value="1.txt" />
-      <param name="input2" value="2.txt" />
-      <param name="begin_col" value="None" />
-      <param name="end_col" value="None" />
-      <output name="output" file="subtract-query-1.dat" />
-    </test>
-    -->
-  	<!-- Subtract 2 tabular files with no column restrictions. -->
-    <test>
-      <param name="input1" value="eq-showbeginning.dat" />
-      <param name="input2" value="eq-showtail.dat" />
-      <param name="begin_col" value="None" />
-      <param name="end_col" value="None" />
-      <output name="output" file="subtract-query-2.dat" />
-    </test>
-  	<!-- Subtract 2 tabular files with column restrictions. -->
-    <test>
-      <param name="input1" value="eq-showbeginning.dat" />
-      <param name="input2" value="eq-removebeginning.dat" />
-      <param name="begin_col" value="c1" />
-      <param name="end_col" value="c3" />
-      <output name="output" file="subtract-query-3.dat" />
-    </test>
-  	<!-- Subtract a non-tabular file from a tabular file with no column restrictions. -->
-    <test>
-      <param name="input1" value="eq-showbeginning.dat" />
-      <param name="input2" value="2.txt" />
-      <param name="begin_col" value="None" />
-      <param name="end_col" value="None" />
-      <output name="output" file="subtract-query-4.dat" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** This tool complements the tool in the **Operate on Genomic Intervals** tool set which subtracts the intervals of two datasets.
-
-
------
-
-**Syntax**
-
-This tool subtracts an entire dataset from another dataset.
-
-- Any text format is valid.
-- If both dataset formats are tabular, you may restrict the subtraction to specific columns **contained in both datasets** and the resulting dataset will include only the columns specified.
-- The begin column must be less than or equal to the end column.  If it is not, begin column is switched with end column.
-- If begin column is specified but end column is not, end column will default to begin_column (and vice versa).
-- All blank and comment lines are skipped and not included in the resulting dataset (comment lines are lines beginning with a # character).
-- Duplicate lines are eliminated from both dataset prior to subtraction.  If any duplicate lines were eliminated from the first dataset, the number is displayed in the resulting history item.
-
------
-
-**Example**
-
-If this is the **First dataset**::
-
-  chr1            4225    19670
-  chr10           6       8
-  chr1            24417   24420
-  chr6_hla_hap2   0       150
-  chr2            1       5
-  chr10           2       10
-  chr1            30      55
-  chrY            1       20
-  chr1            1225979 42287290
-  chr10           7       8
-
-and this is the **Second dataset**::
-
-  chr1            4225    19670
-  chr10           6       8
-  chr1            24417   24420
-  chr6_hla_hap2   0       150
-  chr2            1       5
-  chr1            30      55
-  chrY            1       20
-  chr1            1225979 42287290
-
-Subtracting the **Second dataset** from the **First dataset** (including all columns) will yield::
-
-  chr10           7       8
-  chr10           2       10
-
-Conversely, subtracting the **First dataset** from the **Second dataset** (including all columns) will result in an empty dataset.
-
-Subtracting the **Second dataset** from the **First dataset** (restricting to columns c1 and c2) will yield::
-
-  chr10           7
-  chr10           2
-
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/new_operations/tables_arithmetic_operations.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-# A program to implement arithmetic operations on tabular files data. The program takes three inputs:
-# The first input is a TABULAR format file containing numbers only.
-# The second input is a TABULAR format file containing numbers only.
-# The two files must have the same number of columns and the same number of rows
-# The third input is an arithmetic operation: +, -, *, or / for addition, subtraction, multiplication, or division, respectively
-# The output file is a TABULAR format file containing the result of implementing the arithmetic operation on both input files.
-# The output file has the same number of columns and the same number of rows as each of the two input files.
-# Note: in case of division, none of the values in the second input file could be 0.
-
-use strict;
-use warnings;
-
-#variables to handle information of the first input tabular file
-my $lineData1 = "";
-my @lineDataArray1 = ();
-my $lineArraySize = 0;
-my $lineCounter1 = 0;
-
-#variables to handle information of the second input tabular file
-my $lineData2= "";
-my @lineDataArray2 = ();
-my $lineCounter2 = 0;
-
-my $result = 0;
-
-# check to make sure having the correct number of arguments
-my $usage = "usage: tables_arithmetic_operations.pl [TABULAR.in] [TABULAR.in] [ArithmeticOperation] [TABULAR.out] \n";
-die $usage unless @ARGV == 4;
-
-#variables to store the names of input and output files
-my $inputTabularFile1 = $ARGV[0];
-my $inputTabularFile2 = $ARGV[1];
-my $arithmeticOperation = $ARGV[2];
-my $outputTabularFile = $ARGV[3];
-
-#open the input and output files
-open (INPUT1, "<", $inputTabularFile1) || die("Could not open file $inputTabularFile1 \n");
-open (INPUT2, "<", $inputTabularFile2) || die("Could not open file $inputTabularFile2 \n");
-open (OUTPUT, ">", $outputTabularFile) || die("Could not open file $outputTabularFile \n");
-
-#store the first input file in the array @motifsFrequencyData1
-my @tabularData1 = <INPUT1>;
-
-#store the second input file in the array @motifsFrequencyData2
-my @tabularData2 = <INPUT2>;
-
-#reset the $lineCounter1 to 0
-$lineCounter1 = 0;
-
-#iterated through the lines of the first input file
-INDEL1:
-foreach $lineData1 (@tabularData1){
-	chomp ($lineData1);
-	$lineCounter1++;
-
-	#reset the $lineCounter2 to 0
-	$lineCounter2 = 0;
-
-	#iterated through the lines of the second input file
-	foreach $lineData2 (@tabularData2){
-		chomp ($lineData2);
-		$lineCounter2++;
-
-		#check if the two motifs are the same in the two input files
-		if ($lineCounter1 == $lineCounter2){
-
-			@lineDataArray1 = split(/\t/, $lineData1);
-			@lineDataArray2 = split(/\t/, $lineData2);
-
-			$lineArraySize = @lineDataArray1;
-
-			for (my $index = 0; $index < $lineArraySize; $index++){
-
-				if ($arithmeticOperation eq "Addition"){
-					#compute the additin of both values
-					$result = $lineDataArray1[$index] + $lineDataArray2[$index];
-				}
-
-				if ($arithmeticOperation eq "Subtraction"){
-					#compute the subtraction of both values
-					$result = $lineDataArray1[$index] - $lineDataArray2[$index];
-				}
-
-				if ($arithmeticOperation eq "Multiplication"){
-					#compute the multiplication of both values
-					$result = $lineDataArray1[$index] * $lineDataArray2[$index];
-				}
-
-				if ($arithmeticOperation eq "Division"){
-
-					#check if the denominator is 0
-					if ($lineDataArray2[$index] != 0){
-						#compute the division of both values
-						$result = $lineDataArray1[$index] / $lineDataArray2[$index];
-					}
-					else{
-						die("A denominator could not be zero \n");
-					}
-				}
-
-				#store the result in the output file
-				if ($index < $lineArraySize - 1){
-					print OUTPUT $result . "\t";
-				}
-				else{
-					print OUTPUT $result . "\n";
-				}
-			}
-			next INDEL1;
-		}
-	}
-}
-
-#close the input and output files
-close(OUTPUT);
-close(INPUT2);
-close(INPUT1);
\ No newline at end of file
--- a/tools/new_operations/tables_arithmetic_operations.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-<tool id="tables_arithmetic_operations" name="Arithmetic Operations " version="1.0.0">
-  <description>on tables</description>
-
-  <command interpreter="perl">
-  	tables_arithmetic_operations.pl $inputFile1 $inputFile2 $inputArithmeticOperation3 $outputFile1
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the first input tabular file"/>
-  	<param format="tabular" name="inputFile2" type="data" label="Select the second input tabular file"/>
-
-    <param name="inputArithmeticOperation3" type="select" label="Choose the arithmetic operation:">
-    	<option value="Addition">Addition</option>
-      	<option value="Subtraction">Subtraction</option>
-      	<option value="Multiplication">Multiplication</option>
-      	<option value="Division">Division</option>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="numericalTable1.tabular" />
-  		<param name="inputFile2" value="numericalTable1.tabular" />
-    	<param name="inputArithmeticOperation3" value="Addition" />
-    	<output name="outputFile1" file="table_addition_result.tabular" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="numericalTable1.tabular" />
-  		<param name="inputFile2" value="numericalTable1.tabular" />
-    	<param name="inputArithmeticOperation3" value="Subtraction" />
-    	<output name="outputFile1" file="table_subtraction_result.tabular" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="numericalTable1.tabular" />
-  		<param name="inputFile2" value="numericalTable1.tabular" />
-    	<param name="inputArithmeticOperation3" value="Multiplication" />
-    	<output name="outputFile1" file="table_multiplication_result.tabular" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="numericalTable1.tabular" />
-  		<param name="inputFile2" value="numericalTable1.tabular" />
-    	<param name="inputArithmeticOperation3" value="Division" />
-    	<output name="outputFile1" file="table_division_result.tabular" />
-  	</test>
-
-  </tests>
-
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program implements arithmetic operations on tabular files data. The program takes three inputs:
-
-- The first input is a TABULAR format file containing numbers only.
-- The second input is a TABULAR format file containing numbers only.
-- The third input is an arithmetic operation: +, -, x, or / for addition, subtraction, multiplication, or division, respectively.
-- The output file is a TABULAR format file containing the result of implementing the arithmetic operation on both input files.
-
-
-Notes:
-
-- The two files must have the same number of columns and the same number of rows.
-- The output file has the same number of columns and the same number of rows as each of the two input files.
-- In case of division, none of the values in the second input file could be 0, otherwise the program will stop and report an error.
-
-**Example**
-
-Let us have the first input file as follows::
-
-	5	4	0
-	10	11	12
-	1	3	1
-	1	2	1
-	2	0	4
-
-And the second input file as follows::
-
-	5	4	4
-	2	5	8
-	1	2	1
-	3	2	5
-	2	4	4
-
-Running the program and choosing "Addition" as an arithmetic operation will give the following output::
-
-	10	8	4
-	12	16	20
-	2	5	2
-	4	4	6
-	4	4	8
-
-
-  </help>
-
-</tool>
--- a/tools/next_gen_conversion/bwa_solid2fastq_modified.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#!/usr/bin/perl -w
-
-# Author: lh3
-# Note: Ideally, this script should be written in C. It is a bit slow at present.
-
-use strict;
-use warnings;
-use Getopt::Std;
-
-my %opts;
-my $version = '0.1.3';
-my $usage = qq{
-Usage: solid2fastq.pl <paired> <outfile1> <outfile2> <F3.csfasta> <F3.qual> <R3.csfasta> <R3.qual>
-
-Note: <in.title> is the string showed in the `# Title:' line of a
-      ".csfasta" read file. Then <in.title>F3.csfasta is read sequence
-      file and <in.title>F3_QV.qual is the quality file. If
-      <in.title>R3.csfasta is present, this script assumes reads are
-      paired; otherwise reads will be regarded as single-end.
-
-      The read name will be <out.prefix>:panel_x_y/[12] with `1' for R3
-      tag and `2' for F3. Usually you may want to use short <out.prefix>
-      to save diskspace. Long <out.prefix> also causes troubles to maq.
-
-};
-
-getopts('', \%opts);
-die($usage) if (@ARGV != 7);
-my ($is_paired,$outfile1,$outfile2,$f3reads,$f3qual,$r3reads,$r3qual) = @ARGV;
-my (@fhr, @fhw);
-my $fn = '';
-my @fn_suff = ($f3reads,$f3qual,$r3reads,$r3qual);
-if ($is_paired eq "yes") { # paired end
-  for (0 .. 3) {
-	$fn = $fn_suff[$_];
-	$fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
-	open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
-  }
-  open($fhw[0], "|gzip >$outfile2") || die;
-  open($fhw[1], "|gzip >$outfile1") || die;
-  my (@df, @dr);
-  @df = &read1(1); @dr = &read1(2);
-  while (@df && @dr) {
-	if ($df[0] eq $dr[0]) { # mate pair
-	  print {$fhw[0]} $df[1]; print {$fhw[1]} $dr[1];
-	  @df = &read1(1); @dr = &read1(2);
-	}
-  }
-  close($fhr[$_]) for (0 .. $#fhr);
-  close($fhw[$_]) for (0 .. $#fhw);
-} else { # single end
-  for (0 .. 1) {
-	my $fn = "$fn_suff[$_]";
-	$fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
-	open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
-  }
-  open($fhw[2], "|gzip >$outfile1") || die;
-  my @df;
-  while (@df = &read1(1, $fhr[0], $fhr[1])) {
-	print {$fhw[2]} $df[1];
-  }
-  close($fhr[$_]) for (0 .. $#fhr);
-  close($fhw[2]);
-}
-
-sub read1 {
-  my $i = shift(@_);
-  my $j = ($i-1)<<1;
-  my ($key, $seq);
-  my ($fhs, $fhq) = ($fhr[$j], $fhr[$j|1]);
-  while (<$fhs>) {
-	my $t = <$fhq>;
-	if (/^>(\d+)_(\d+)_(\d+)_[FR]3/) {
-	  $key = sprintf("%.4d_%.4d_%.4d", $1, $2, $3); # this line could be improved on 64-bit machines
-	  #print $key;
-	  die(qq/** unmatched read name: '$_' != '$t'\n/) unless ($_ eq $t);
-	  my $name = "$1_$2_$3/$i";
-	  $_ = substr(<$fhs>, 2);
-	  tr/0123./ACGTN/;
-	  my $s = $_;
-	  $_ = <$fhq>;
-	  s/^(\d+)\s*//;
-	  s/(\d+)\s*/chr($1+33)/eg;
-	  $seq = qq/\@$name\n$s+\n$_\n/;
-	  last;
-	}
-  }
-  return defined($seq)? ($key, $seq) : ();
-}
--- a/tools/next_gen_conversion/fastq_conversions.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Performs various conversions around Sanger FASTQ data
-
-usage: %prog [options]
-   -c, --command=c: Command to run
-   -i, --input=i: Input file to be converted
-   -o, --outputFastqsanger=o: FASTQ Sanger converted output file for sol2std
-   -s, --outputFastqsolexa=s: FASTQ Solexa converted output file
-   -f, --outputFasta=f: FASTA converted output file
-
-usage: %prog command input_file output_file
-"""
-
-import os, sys, tempfile
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-
-    cmd = "fq_all2std.pl %s %s > %s"
-    if options.command == 'sol2std':
-        cmd = cmd % (options.command, options.input, options.outputFastqsanger)
-    elif options.command == 'std2sol':
-        cmd = cmd % (options.command, options.input, options.outputFastqsolexa)
-    elif options.command == 'fq2fa':
-        cmd = cmd % (options.command, options.input, options.outputFasta)
-    try:
-        os.system(cmd)
-    except Exception, eq:
-        stop_err("Error converting data format.\n" + str(eq))
-
-if __name__=="__main__": __main__()
--- a/tools/next_gen_conversion/fastq_conversions.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,133 +0,0 @@
-<tool id="fastq_conversions" name="FASTQ Conversions" version="1.0.0">
-  <description>converts between FASTQ data and other data formats</description>
-  <command interpreter="python">
-    fastq_conversions.py
-    --command=$conversionType.type
-    --input=$input
-    #if $conversionType.type == "sol2std":
-     --outputFastqsanger=$outputFastqsanger
-    #else:
-     --outputFastqsanger="None"
-    #end if
-    #if $conversionType.type == "std2sol":
-     --outputFastqsolexa=$outputFastqsolexa
-    #else:
-     --outputFastqsolexa="None"
-    #end if
-    #if $conversionType.type == "fq2fa":
-     --outputFasta=$outputFasta
-    #else:
-     --outputFasta="None"
-    #end if
-  </command>
-  <inputs>
-    <conditional name="conversionType">
-      <param name="type" type="select" label="What type of conversion do you want to do?">
-        <option value="sol2std">Solexa/Illumina FASTQ to standard Sanger FASTQ</option>
-        <option value="std2sol">Standard Sanger FASTQ to Solexa/Illumina FASTQ</option>
-        <option value="fq2fa">Various FASTQ to FASTA</option>
-      </param>
-      <when value="sol2std">
-        <param name="input" type="data" format="fastqsolexa" label="File to convert" />
-      </when>
-      <when value="std2sol">
-        <param name="input" type="data" format="fastqsanger" label="File to convert" />
-      </when>
-      <when value="fq2fa">
-        <param name="input" type="data" format="fastqsolexa, fastqsanger" label="File to convert" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data name="outputFastqsanger" format="fastqsanger">
-      <filter>conversionType['type'] == 'sol2std'</filter>
-    </data>
-    <data name="outputFastqsolexa" format="fastqsolexa">
-      <filter>conversionType['type'] == 'std2sol'</filter>
-    </data>
-    <data name="outputFasta" format="fasta">
-      <filter>conversionType['type'] == 'fq2fa'</filter>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="type" value="sol2std" />
-      <param name="input" value="fastq_conv_in1.fastq" ftype="fastqsolexa" />
-      <output name="outputFastqsanger" file="fastq_conv_out1.fastqsanger" />
-    </test>
-    <test>
-      <param name="type" value="std2sol" />
-      <param name="input" value="1.fastqsanger" ftype="fastqsanger" />
-      <output name="outputFastqsolexa" file="fastq_conv_out2.fastqsolexa" />
-    </test>
-    <test>
-      <param name="type" value="fq2fa" />
-      <param name="input" value="1.fastqsanger" ftype="fastqsanger" />
-      <output name="outputFasta" file="fastq_conv_out4.fasta" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool offers several conversions options relating to the FASTQ format.
-
------
-
-**Examples**
-
-- Converting the Solexa/Illumina FASTQ data::
-
-	@081017-and-081020:1:1:1715:1759
-	GGACTCAGATAGTAATCCACGCTCCTTTAAAATATC
-	+
-	II#IIIIIII$5+.(9IIIIIII$%*$G$A31I&amp;&amp;B
-
-- will produce the following Sanger FASTQ data::
-
-	@081017-and-081020:1:1:1715:1759
-	GGACTCAGATAGTAATCCACGCTCCTTTAAAATATC
-	+
-	++!+++++++!!!!!"+++++++!!!!)!%!!+!!%!
-
-- Converting standard Sanger FASTQ::
-
-    @1831_573_1004/1
-	AATACTTTCGGCGCCCTAAACCAGCTCACTGGGG
-	+
-	>&lt;C&amp;&amp;9952+C>5&lt;.?&lt;79,=42&lt;292:&lt;(9/-7
-	@1831_573_1050/1
-	TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT
-	+
-	;@@17?@=>7??@A8?==@4A?A4)&amp;+.'&amp;+'1,
-
-- will produce the following Solexa/Illumina FASTQ data::
-
-	@1831_573_1004/1
-	AATACTTTCGGCGCCCTAAACCAGCTCACTGGGG
-	+
-	][bEEXXTQJb]T[M^[VXK\SQ[QXQY[GXNLV
-	@1831_573_1050/1
-	TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT
-	+
-	Z__PV^_\]V^^_`W^\\_S`^`SHEJMFEJFPK
-
-- Converting the Sanger FASTQ data::
-
-    @1831_573_1004/1
-	AATACTTTCGGCGCCCTAAACCAGCTCACTGGGG
-	+
-	>&lt;C&amp;&amp;9952+C>5&lt;.?&lt;79,=42&lt;292:&lt;(9/-7
-	@1831_573_1050/1
-	TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT
-	+
-	;@@17?@=>7??@A8?==@4A?A4)&amp;+.'&amp;+'1,
-
-- will produce the following FASTA data::
-
-	>1831_573_1004/1
-	AATACTTTCGGCGCCCTAAACCAGCTCACTGGGG
-	>1831_573_1050/1
-	TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT
-
-  </help>
-</tool>
--- a/tools/next_gen_conversion/fastq_gen_conv.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,172 +0,0 @@
-"""
-Converts any type of FASTQ file to Sanger type  and makes small adjustments if necessary.
-
-usage: %prog [options]
-   -i, --input=i: Input FASTQ candidate file
-   -r, --origType=r: Original type
-   -a, --allOrNot=a: Whether or not to check all blocks
-   -b, --blocks=b: Number of blocks to check
-   -o, --output=o: Output file
-
-usage: %prog input_file oroutput_file
-"""
-
-import math, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def all_bases_valid(seq):
-    """Confirm that the sequence contains only bases"""
-    valid_bases = ['a', 'A', 'c', 'C', 'g', 'G', 't', 'T', 'N']
-    for base in seq:
-        if base not in valid_bases:
-            return False
-    return True
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    orig_type = options.origType
-    if orig_type == 'sanger' and options.allOrNot == 'not':
-        max_blocks = int(options.blocks)
-    else:
-        max_blocks = -1
-    fin = file(options.input, 'r')
-    fout = file(options.output, 'w')
-    range_min = 1000
-    range_max = -5
-    block_num = 0
-    bad_blocks = 0
-    base_len = -1
-    line_count = 0
-    lines = []
-    line = fin.readline()
-    while line:
-        if line.strip() and max_blocks >= 0 and block_num > 0 and orig_type == 'sanger' and block_num >= max_blocks:
-            fout.write(line)
-            if line_count % 4 == 0:
-                block_num += 1
-            line_count += 1
-        elif line.strip():
-            # the line that starts a block, with a name
-            if line_count % 4 == 0 and line.startswith('@'):
-                lines.append(line)
-            else:
-                # if we expect a sequence of bases
-                if line_count % 4 == 1 and all_bases_valid(line.strip()):
-                    lines.append(line)
-                    base_len = len(line.strip())
-                # if we expect the second name line
-                elif line_count % 4 == 2 and line.startswith('+'):
-                    lines.append(line)
-                # if we expect a sequence of qualities and it's the expected length
-                elif line_count % 4 == 3:
-                    split_line = line.strip().split()
-                    # decimal qualities
-                    if len(split_line) == base_len:
-                        # convert
-                        phred_list = []
-                        for ch in split_line:
-                            int_ch = int(ch)
-                            if int_ch < range_min:
-                                range_min = int_ch
-                            if int_ch > range_max:
-                                range_max = int_ch
-                            if int_ch >= 0 and int_ch <= 93:
-                                phred_list.append(chr(int_ch + 33))
-                        # make sure we haven't lost any quality values
-                        if len(phred_list) == base_len:
-                            # print first three lines
-                            for l in lines:
-                                fout.write(l)
-                            # print converted quality line
-                            fout.write(''.join(phred_list))
-                            # reset
-                            lines = []
-                            base_len = -1
-                        # abort if so
-                        else:
-                            bad_blocks += 1
-                            lines = []
-                            base_len = -1
-                    # ascii qualities
-                    elif len(split_line[0]) == base_len:
-                        qualities = []
-                        # print converted quality line
-                        if orig_type == 'illumina':
-                            for c in line.strip():
-                                if ord(c) - 64 < range_min:
-                                    range_min = ord(c) - 64
-                                if ord(c) - 64 > range_max:
-                                    range_max = ord(c) - 64
-                                if ord(c) < 64 or ord(c) > 126:
-                                    bad_blocks += 1
-                                    base_len = -1
-                                    lines = []
-                                    break
-                                else:
-                                    qualities.append( chr( ord(c) - 31 ) )
-                            quals = ''.join(qualities)
-                        elif orig_type == 'solexa':
-                            for c in line.strip():
-                                if ord(c) - 64 < range_min:
-                                    range_min = ord(c) - 64
-                                if ord(c) - 64 > range_max:
-                                    range_max = ord(c) - 64
-                                if ord(c) < 59 or ord(c) > 126:
-                                    bad_blocks += 1
-                                    base_len = -1
-                                    lines = []
-                                    break
-                                else:
-                                    p = 10.0**( ( ord(c) - 64 ) / -10.0 ) / ( 1 + 10.0**( ( ord(c) - 64 ) / -10.0 ) )
-                                    qualities.append( chr( int( -10.0*math.log10( p ) ) + 33 ) )
-                            quals = ''.join(qualities)
-                        else:  # 'sanger'
-                            for c in line.strip():
-                                if ord(c) - 33 < range_min:
-                                    range_min = ord(c) - 33
-                                if ord(c) - 33 > range_max:
-                                    range_max = ord(c) - 33
-                                if ord(c) < 33 or ord(c) > 126:
-                                    bad_blocks += 1
-                                    base_len = -1
-                                    lines = []
-                                    break
-                                else:
-                                    qualities.append(c)
-                            quals = ''.join(qualities)
-                        # make sure we don't have bad qualities
-                        if len(quals) == base_len:
-                            # print first three lines
-                            for l in lines:
-                                fout.write(l)
-                            # print out quality line
-                            fout.write(quals+'\n')
-                        # reset
-                        lines = []
-                        base_len = -1
-                    else:
-                        bad_blocks += 1
-                        base_len = -1
-                        lines = []
-                    # mark the successful end of a block
-                    block_num += 1
-            line_count += 1
-        line = fin.readline()
-    fout.close()
-    fin.close()
-    if range_min != 1000 and range_min != -5:
-        outmsg = 'The range of quality values found were: %s to %s' % (range_min, range_max)
-    else:
-        outmsg = ''
-    if bad_blocks > 0:
-        outmsg += '\nThere were %s bad blocks skipped' % (bad_blocks)
-    sys.stdout.write(outmsg)
-
-if __name__=="__main__": __main__()
\ No newline at end of file
--- a/tools/next_gen_conversion/fastq_gen_conv.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-<tool id="fastq_gen_conv" name="FASTQ Groomer" version="1.0.0">
-  <description>converts any FASTQ to Sanger</description>
-  <command interpreter="python">
-    fastq_gen_conv.py
-     --input=$input
-     --origType=$origTypeChoice.origType
-     #if $origTypeChoice.origType == "sanger":
-      --allOrNot=$origTypeChoice.howManyBlocks.allOrNot
-      #if $origTypeChoice.howManyBlocks.allOrNot == "not":
-       --blocks=$origTypeChoice.howManyBlocks.blocks
-      #else:
-       --blocks="None"
-      #end if
-     #else:
-      --allOrNot="None"
-      --blocks="None"
-     #end if
-     --output=$output
-  </command>
-  <inputs>
-    <param name="input" type="data" format="fastq" label="Groom this dataset" />
-    <conditional name="origTypeChoice">
-      <param name="origType" type="select" label="How do you think quality values are scaled?" help="See below for explanation">
-        <option value="solexa">Solexa/Illumina 1.0</option>
-        <option value="illumina">Illumina 1.3+</option>
-        <option value="sanger">Sanger (validation only)</option>
-      </param>
-      <when value="solexa" />
-      <when value="illumina" />
-      <when value="sanger">
-        <conditional name="howManyBlocks">
-          <param name="allOrNot" type="select" label="Since your fastq is already in Sanger format you can check it for consistency">
-            <option value="all">Check all (may take a while)</option>
-            <option selected="true" value="not">Check selected number of blocks</option>
-          </param>
-          <when value="all" />
-          <when value="not">
-            <param name="blocks" type="integer" value="1000" label="How many blocks (four lines each) do you want to check?" />
-          </when>
-        </conditional>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data name="output" format="fastqsanger"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="fastq_gen_conv_in1.fastq" ftype="fastq" />
-      <param name="origType" value="solexa" />
-      <output name="output" format="fastqsanger" file="fastq_gen_conv_out1.fastqsanger" />
-    </test>
-    <test>
-      <param name="input" value="fastq_gen_conv_in2.fastq" ftype="fastq" />
-      <param name="origType" value="sanger" />
-      <param name="allOrNot" value="not" />
-      <param name="blocks" value="3" />
-      <output name="output" format="fastqsanger" file="fastq_gen_conv_out2.fastqsanger" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Galaxy pipeline for mapping of Illumina data requires data to be in fastq format with quality values conforming to so called "Sanger" format. Unfortunately there are many other types of fastq. Thus the main objective of this tool is to "groom" multiple types of fastq into Sanger-conforming fastq that can be used in downstream application such as mapping.
-
-.. class:: infomark
-
-**TIP**: If the input dataset is already in Sanger format the tool does not perform conversion. However validation (described below) is still performed.
-
------
-
-**Types of fastq datasets**
-
-A good description of fastq datasets can be found `here`__, while a description of Galaxy's fastq "logic" can be found `here`__. Because ranges of quality values within different types of fastq datasets overlap it very difficult to detect them automatically. This tool supports conversion of two commonly found types (Solexa/Illumina 1.0 and Illumina 1.3+) into fastq Sanger.
-
- .. __: http://en.wikipedia.org/wiki/FASTQ_format
- .. __: http://wiki.g2.bx.psu.edu/Admin/NGS%20Local%20Setup
-
-.. class:: warningmark
-
-**NOTE** that there is also a type of fastq format where quality values are represented by a list of space-delimited integers (e.g., 40 40 20 15 -5 20 ...). This tool **does not** handle such fastq. If you have such a dataset, it needs to be converted into ASCII-type fastq (where quality values are encoded by characters) by "Numeric-to-ASCII" utility before it can accepted by this tool.
-
------
-
-**Validation**
-
-In addition to converting quality values to Sanger format the tool also checks the input dataset for consistency. Specifically, it performs these four checks:
-
-- skips empty lines
-- checks that blocks are properly formed by making sure that:
-
-  #. there are four lines per block
-  #. the first line starts with "@"
-  #. the third line starts with "+"
-  #. lengths of second line (sequences) and the fourth line (quality string) are identical
-
-- checks that quality values are within range for the chosen fastq format (e.g., the format provided by the user in **How do you think quality values are scaled?** drop down.
-
-To see exactly what the tool does you can take a look at its source code `here`__.
-
- .. __: http://bitbucket.org/galaxy/galaxy-central/src/tip/tools/next_gen_conversion/fastq_gen_conv.py
-
-
-    </help>
-</tool>
--- a/tools/next_gen_conversion/solid2fastq.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,214 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import string
-import optparse
-import tempfile
-import sqlite3
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def solid2sanger( quality_string, min_qual = 0 ):
-    sanger = ""
-    quality_string = quality_string.rstrip( " " )
-    for qv in quality_string.split(" "):
-        try:
-            if int( qv ) < 0:
-                qv = '0'
-            if int( qv ) < min_qual:
-                return False
-                break
-            sanger += chr( int( qv ) + 33 )
-        except:
-            pass
-    return sanger
-
-def Translator(frm='', to='', delete='', keep=None):
-    allchars = string.maketrans('','')
-    if len(to) == 1:
-        to = to * len(frm)
-    trans = string.maketrans(frm, to)
-    if keep is not None:
-        delete = allchars.translate(allchars, keep.translate(allchars, delete))
-    def callable(s):
-        return s.translate(trans, delete)
-    return callable
-
-def merge_reads_qual( f_reads, f_qual, f_out, trim_name=False, out='fastq', double_encode = False, trim_first_base = False, pair_end_flag = '', min_qual = 0, table_name=None ):
-
-    # Reads from two files f_csfasta (reads) and f_qual (quality values) and produces output in three formats depending on out parameter,
-    # which can have three values: fastq, txt, and db
-    # fastq = fastq format
-    # txt = space delimited format with defline, reads, and qvs
-    # dp = dump data into sqlite3 db.
-    # IMPORTNAT! If out = db two optins must be provided:
-    #   1. f_out must be a db connection object initialized with sqlite3.connect()
-    #   2. table_name must be provided
-
-    if out == 'db':
-        cursor = f_out.cursor()
-        sql = "create table %s (name varchar(50) not null, read blob, qv blob)" % table_name
-        cursor.execute(sql)
-
-    lines = []
-    line = " "
-    while line:
-        for f in [ f_reads, f_qual ]:
-            line = f.readline().rstrip( '\n\r' )
-            while line.startswith( '#' ):
-                line = f.readline().rstrip( '\n\r' )
-            lines.append( line )
-
-
-        if lines[0].startswith( '>' ) and lines[1].startswith( '>' ):
-
-            if lines[0] != lines[1]:
-                stop_err('Files reads and quality score files are out of sync and likely corrupted. Please, check your input data')
-
-            defline = lines[0][1:]
-            if trim_name and ( defline[ len( defline )-3: ] == "_F3" or defline[ len( defline )-3: ] == "_R3" ):
-                defline = defline[ : len( defline )-3 ]
-
-        elif ( not lines[0].startswith( '>' ) and not lines[1].startswith( '>' ) and len( lines[0] ) > 0 and len( lines[1] ) > 0 ):
-
-            if trim_first_base:
-                lines[0] = lines[0][1:]
-            if double_encode:
-                de = Translator(frm="0123.", to="ACGTN")
-                lines[0] = de(lines[0])
-            qual = solid2sanger( lines[1], int( min_qual ) )
-            if qual:
-                if out == 'fastq':
-                    f_out.write( "@%s%s\n%s\n+\n%s\n" % ( defline, pair_end_flag, lines[0], qual ) )
-                if out == 'txt':
-                    f_out.write( '%s %s %s\n' % (defline, lines[0], qual ) )
-                if out == 'db':
-                    cursor.execute('insert into %s values("%s","%s","%s")' % (table_name, defline, lines[0], qual ) )
-        lines = []
-
-def main():
-
-    usage = "%prog --fr F3.csfasta --fq R3.csfasta --fout fastq_output_file [option]"
-    parser = optparse.OptionParser(usage=usage)
-
-
-    parser.add_option(
-        '--fr','--f_reads',
-        metavar="F3_CSFASTA_FILE",
-        dest='fr',
-        help='Name of F3 file with color space reads')
-
-    parser.add_option(
-        '--fq','--f_qual',
-        metavar="F3_QUAL_FILE",
-        dest='fq',
-        help='Name of F3 file with color quality values')
-
-    parser.add_option(
-        '--fout','--f3_fastq_output',
-        metavar="F3_OUTPUT",
-        dest='fout',
-        help='Name for F3 output file')
-
-    parser.add_option(
-        '--rr','--r_reads',
-        metavar="R3_CSFASTA_FILE",
-        dest='rr',
-        default = False,
-        help='Name of R3 file with color space reads')
-
-    parser.add_option(
-        '--rq','--r_qual',
-        metavar="R3_QUAL_FILE",
-        dest='rq',
-        default = False,
-        help='Name of R3 file with color quality values')
-
-    parser.add_option(
-        '--rout',
-        metavar="R3_OUTPUT",
-        dest='rout',
-        help='Name for F3 output file')
-
-    parser.add_option(
-        '-q','--min_qual',
-        dest='min_qual',
-        default = '-1000',
-        help='Minimum quality threshold for printing reads. If a read contains a single call with QV lower than this value, it will not be reported. Default is -1000')
-
-    parser.add_option(
-        '-t','--trim_name',
-        dest='trim_name',
-        action='store_true',
-        default = False,
-        help='Trim _R3 and _F3 off read names. Default is False')
-
-    parser.add_option(
-        '-f','--trim_first_base',
-        dest='trim_first_base',
-        action='store_true',
-        default = False,
-        help='Remove the first base of reads in color-space. Default is False')
-
-    parser.add_option(
-        '-d','--double_encode',
-        dest='de',
-        action='store_true',
-        default = False,
-        help='Double encode color calls as nucleotides: 0123. becomes ACGTN. Default is False')
-
-    options, args = parser.parse_args()
-
-    if not ( options.fout and options.fr and options.fq ):
-        parser.error("""
-        One or more of the three required paremetrs is missing:
-        (1) --fr F3.csfasta file
-        (2) --fq F3.qual file
-        (3) --fout name of output file
-        Use --help for more info
-        """)
-
-    fr =  open ( options.fr , 'r' )
-    fq =  open ( options.fq , 'r' )
-    f_out = open ( options.fout , 'w' )
-
-    if options.rr and options.rq:
-        rr =  open ( options.rr , 'r' )
-        rq =  open ( options.rq , 'r' )
-        if not options.rout:
-            parser.error("Provide the name for f3 output using --rout option. Use --help for more info")
-        r_out = open ( options.rout, 'w' )
-
-        db = tempfile.NamedTemporaryFile()
-
-        try:
-            con = sqlite3.connect(db.name)
-            cur = con.cursor()
-        except:
-            stop_err('Cannot connect to %s\n') % db.name
-
-
-        merge_reads_qual( fr, fq, con, trim_name=options.trim_name, out='db', double_encode=options.de, trim_first_base=options.trim_first_base, min_qual=options.min_qual, table_name="f3" )
-        merge_reads_qual( rr, rq, con, trim_name=options.trim_name, out='db', double_encode=options.de, trim_first_base=options.trim_first_base, min_qual=options.min_qual, table_name="r3" )
-        cur.execute('create index f3_name on f3( name )')
-        cur.execute('create index r3_name on r3( name )')
-
-        cur.execute('select * from f3,r3 where f3.name = r3.name')
-        for item in cur:
-            f_out.write( "@%s%s\n%s\n+\n%s\n" % (item[0], "/1", item[1], item[2]) )
-            r_out.write( "@%s%s\n%s\n+\n%s\n" % (item[3], "/2", item[4], item[5]) )
-
-
-    else:
-        merge_reads_qual( fr, fq, f_out, trim_name=options.trim_name, out='fastq', double_encode = options.de, trim_first_base = options.trim_first_base, min_qual=options.min_qual )
-
-
-
-    f_out.close()
-
-if __name__ == "__main__":
-    main()
-
-
\ No newline at end of file
--- a/tools/next_gen_conversion/solid2fastq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-<tool id="solid2fastq" name="Convert">
-  <description>SOLiD output to fastq</description>
-  <command interpreter="python">
-    #if   $is_run.paired == "no"    #solid2fastq.py --fr=$input1 --fq=$input2 --fout=$out_file1 -q $qual $trim_name $trim_first_base $double_encode
-    #elif $is_run.paired == "yes"   #solid2fastq.py --fr=$input1 --fq=$input2 --fout=$out_file1 --rr=$input3 --rq=$input4 --rout=$out_file2 -q $qual $trim_name $trim_first_base $double_encode
-    #end if#
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="csfasta" label="Select reads"/>
-    <param name="input2" type="data" format="qualsolid" label="Select qualities"/>
-    <conditional name="is_run">
-        <param name="paired" type="select" label="Is this a mate-pair run?">
-            <option value="no" selected="true">No</option>
-            <option value="yes">Yes</option>
-        </param>
-        <when value="yes">
-            <param name="input3" type="data" format="csfasta" label="Select Reverse reads"/>
-            <param name="input4" type="data" format="qualsolid" label="Select Reverse qualities"/>
-        </when>
-        <when value="no">
-        </when>
-    </conditional>
-    <param name="qual" label="Remove reads containing color qualities below this value" type="integer" value="0"/>
-    <param name="trim_name" type="select" label="Trim trailing &quot;_F3&quot; and &quot;_R3&quot; ?">
-        <option value="-t" selected="true">Yes</option>
-        <option value="">No</option>
-    </param>
-    <param name="trim_first_base" type="select" label="Trim first base?">
-        <option value="-f">Yes (BWA)</option>
-        <option value="" selected="true">No (bowtie)</option>
-    </param>
-    <param name="double_encode" type="select" label="Double encode?">
-        <option value="-d">Yes (BWA)</option>
-        <option value="" selected="true">No (bowtie)</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fastqcssanger" name="out_file1"/>
-    <data format="fastqcssanger" name="out_file2">
-        <filter>is_run['paired'] == 'yes'</filter>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="fr.csfasta" ftype="csfasta"/>
-      <param name="input2" value="fr.qualsolid" ftype="qualsolid" />
-      <param name="paired" value="no"/>
-      <param name="qual" value="0" />
-      <param name="trim_first_base" value="No" />
-      <param name="trim_name" value="No" />
-      <param name="double_encode" value="No"/>
-      <output name="out_file1" file="solid2fastq_out_1.fastq"/>
-    </test>
-    <test>
-      <param name="input1" value="fr.csfasta" ftype="csfasta"/>
-      <param name="input2" value="fr.qualsolid" ftype="qualsolid" />
-      <param name="paired" value="yes"/>
-      <param name="input3" value="rr.csfasta" ftype="csfasta"/>
-      <param name="input4" value="rr.qualsolid" ftype="qualsolid" />
-      <param name="qual" value="0" />
-      <param name="trim_first_base" value="No" />
-      <param name="trim_name" value="Yes" />
-      <param name="double_encode" value="No"/>
-      <output name="out_file1" file="solid2fastq_out_2.fastq"/>
-      <output name="out_file2" file="solid2fastq_out_3.fastq"/>
-    </test>
- </tests>
-<help>
-
-**What it does**
-
-Converts output of SOLiD instrument (versions 3.5 and earlier) to fastq format suitable for bowtie, bwa, and PerM mappers.
-
---------
-
-**Input datasets**
-
-Below are examples of forward (F3) reads and quality scores:
-
-Reads::
-
-    >1831_573_1004_F3
-    T00030133312212111300011021310132222
-    >1831_573_1567_F3
-    T03330322230322112131010221102122113
-
-Quality scores::
-
-    >1831_573_1004_F3
-    4 29 34 34 32 32 24 24 20 17 10 34 29 20 34 13 30 34 22 24 11 28 19 17 34 17 24 17 25 34 7 24 14 12 22
-    >1831_573_1567_F3
-    8 26 31 31 16 22 30 31 28 29 22 30 30 31 32 23 30 28 28 31 19 32 30 32 19 8 32 10 13 6 32 10 6 16 11
-
-
-**Mate pairs**
-
-If your data is from a mate-paired run, you will have additional read and quality datasets that will look similar to the ones above with one exception: the names of reads will be ending with &quot;_R3&quot;.
-In this case choose **Yes** from the *Is this a mate-pair run?* drop down and you will be able to select R reads. When processing mate pairs this tool generates two output files: one for F3 reads and the other for R3 reads.
-The reads are guaranteed to be paired -- mated reads will be in the same position in F3 and R3 fastq file. However, because pairing is verified it may take a while to process an entire SOLiD run (several hours).
-
-------
-
-**Explanation of parameters**
-
-**Remove reads containing color qualities below this value** - any read that contains as least one color call with quality lower than the specified value **will not** be reported.
-
-**Trim trailing &quot;_F3&quot; and &quot;_R3&quot;?** - does just that. Not necessary for bowtie. Required for BWA.
-
-**Trim first base?** - SOLiD reads contain an adapter base such as the first T in this read::
-
-    >1831_573_1004_F3
-    T00030133312212111300011021310132222
-
-this option removes this base leaving only color calls. Not necessary for bowtie. Required for BWA.
-
-**Double encode?** - converts color calls (0123.) to pseudo-nucleotides (ACGTN). Not necessary for bowtie. Required for BWA.
-
-------
-
-**Examples of output**
-
-When all parameters are left &quot;as-is&quot; you will get this (using reads and qualities shown above)::
-
- @1831_573_1004
- T00030133312212111300011021310132222
- +
- %>CCAA9952+C>5C.?C79,=42C292:C(9/-7
- @1831_573_1004
- T03330322230322112131010221102122113
- +
- );@@17?@=>7??@A8?==@4A?A4)A+.'A+'1,
-
-Setting *Trim first base from reads* to **Yes** will produce this::
-
- @1831_573_1004
- 00030133312212111300011021310132222
- +
- %>CCAA9952+C>5C.?C79,=42C292:C(9/-7
- @1831_573_1004
- 03330322230322112131010221102122113
- +
- );@@17?@=>7??@A8?==@4A?A4)A+.'A+'1,
-
-Finally, setting *Double encode* to **Yes** will yield::
-
- @1831_573_1004
- TAAATACTTTCGGCGCCCTAAACCAGCTCACTGGGG
- +
- %>CCAA9952+C>5C.?C79,=42C292:C(9/-7
- @1831_573_1004
- TATTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT
- +
- );@@17?@=>7??@A8?==@4A?A4)A+.'A+'1,
-
-
-
-
-
-</help>
-</tool>
--- a/tools/next_gen_conversion/solid_to_fastq.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Converts SOLiD data to Sanger FASTQ format.
-
-usage: %prog [options]
-   -i, --input1=i: Forward reads file
-   -q, --input2=q: Forward qual file
-   -I, --input3=I: Reverse reads file
-   -Q, --input4=Q: Reverse qual file
-   -o, --output1=o: Forward output
-   -r, --output2=r: Reverse output
-
-usage: %prog forward_reads_file forwards_qual_file reverse_reads_file(or_None) reverse_qual_file(or_None) output_file ouptut_id output_dir
-"""
-
-import os, sys, tempfile
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def replaceNeg1(fin, fout):
-    line = fin.readline()
-    while line.strip():
-        fout.write(line.replace('-1', '1'))
-        line = fin.readline()
-    fout.seek(0)
-    return fout
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    # common temp file setup
-    tmpf = tempfile.NamedTemporaryFile()    #forward reads
-    tmpqf = tempfile.NamedTemporaryFile()
-    tmpqf = replaceNeg1(file(options.input2,'r'), tmpqf)
-    # if paired-end data (have reverse input files)
-    if options.input3 != "None" and options.input4 != "None":
-        tmpr = tempfile.NamedTemporaryFile()    #reverse reads
-        # replace the -1 in the qualities file
-        tmpqr = tempfile.NamedTemporaryFile()
-        tmpqr = replaceNeg1(file(options.input4,'r'), tmpqr)
-        cmd1 = "%s/bwa_solid2fastq_modified.pl 'yes' %s %s %s %s %s %s 2>&1" %(os.path.split(sys.argv[0])[0], tmpf.name, tmpr.name, options.input1, tmpqf.name, options.input3, tmpqr.name)
-        try:
-            os.system(cmd1)
-            os.system('gunzip -c %s >> %s' %(tmpf.name,options.output1))
-            os.system('gunzip -c %s >> %s' %(tmpr.name,options.output2))
-        except Exception, eq:
-            stop_err("Error converting data to fastq format.\n" + str(eq))
-        tmpr.close()
-        tmpqr.close()
-    # if single-end data
-    else:
-        cmd1 = "%s/bwa_solid2fastq_modified.pl 'no' %s %s %s %s %s %s 2>&1" % (os.path.split(sys.argv[0])[0], tmpf.name, None, options.input1, tmpqf.name, None, None)
-        try:
-            os.system(cmd1)
-            os.system('gunzip -c %s >> %s' % (tmpf.name, options.output1))
-        except Exception, eq:
-            stop_err("Error converting data to fastq format.\n" + str(eq))
-    tmpqf.close()
-    tmpf.close()
-    sys.stdout.write('converted SOLiD data')
-
-if __name__=="__main__": __main__()
--- a/tools/next_gen_conversion/solid_to_fastq.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-<tool id="solid_to_fastq" name="SOLiD-to-FASTQ" version="1.0.0">
-  <description>converts SOLiD data to FASTQ data</description>
-  <command interpreter="python">
-    solid_to_fastq.py
-    --input1=$input1
-    --input2=$input2
-    #if $paired.pairedSingle == "single":
-     --input3="None"
-     --input4="None"
-    #else:
-     --input3=$input3
-     --input4=$input4
-    #end if
-    --output1=$output1
-    #if $paired.pairedSingle == "single":
-     --output2="None"
-    #else:
-     --output2=$output2
-    #end if
-  </command>
-  <inputs>
-    <conditional name="paired">
-      <param name="pairedSingle" type="select" label="Is this library mate-paired?">
-        <option value="single">Single</option>
-        <option value="paired">Paired</option>
-      </param>
-      <when value="single">
-        <param name="input1" type="data" format="csfasta" label="F3 read file" />
-        <param name="input2" type="data" format="qualsolid" label="F3 qual file" />
-      </when>
-      <when value="paired">
-        <param name="input1" type="data" format="csfasta" label="F3 read file" />
-        <param name="input2" type="data" format="qualsolid" label="F3 qual file" />
-        <param name="input3" type="data" format="csfasta" label="R3 read file" />
-        <param name="input4" type="data" format="qualsolid" label="R3 qual file" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <!-- Variable number of outputs. Either one (for single-end) or two (for paired-end) -->
-    <data name="output1" format="fastqsanger"/>
-    <data name="output2" format="fastqsanger">
-      <filter>paired['pairedSingle'] == 'paired'</filter>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="pairedSingle" value="single" />
-      <param name="input1" value="s2fq_phiX.csfasta" ftype="csfasta" />
-      <param name="input2" value="s2fq_phiX.qualsolid" ftype="qualsolid" />
-      <output name="output1" file="s2fq_out1.fastqsanger" />
-    </test>
-    <test>
-      <param name="pairedSingle" value="paired" />
-      <param name="input1" value="s2fq_paired_F3.csfasta" ftype="csfasta" />
-      <param name="input2" value="s2fq_paired_F3_QV.qualsolid" ftype="qualsolid" />
-      <param name="input3" value="s2fq_paired_R3.csfasta" ftype="csfasta" />
-      <param name="input4" value="s2fq_paired_R3_QV.qualsolid" ftype="qualsolid" />
-      <output name="output1" file="s2fq_out2.fastqsanger" />
-      <!-- testing framework does not deal with multiple outputs yet
-      <output name="output2" file="s2fq_out3.fastqsanger" />
-      -->
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool takes reads and quality files and converts them to FASTQ data ( Sanger variant ). Any -1 qualities are converted to 1 before being converted to FASTQ. Note that it also converts sequences to base pairs.
-
------
-
-**Example**
-
-- Converting the following sequences::
-
-    >1831_573_1004_F3
-    T00030133312212111300011021310132222
-    >1831_573_1567_F3
-    T03330322230322112131010221102122113
-
-- and quality scores::
-
-    >1831_573_1004_F3
-    4 29 34 34 32 32 24 24 20 17 10 34 29 20 34 13 30 34 22 24 11 28 19 17 34 17 24 17 25 34 7 24 14 12 22
-    >1831_573_1567_F3
-    8 26 31 31 16 22 30 31 28 29 22 30 30 31 32 23 30 28 28 31 19 32 30 32 19 8 32 10 13 6 32 10 6 16 11
-
-- will produce the following Sanger FASTQ data::
-
-    @1831_573_1004/1
-    AATACTTTCGGCGCCCTAAACCAGCTCACTGGGG
-    +
-    >CCAA9952+C>5C.?C79,=42C292:C(9/-7
-    @1831_573_1567/1
-    TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT
-    +
-    ;@@17?@=>7??@A8?==@4A?A4)A+.'A+'1,
-
-    </help>
-</tool>
--- a/tools/ngs_rna/cuffcompare_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-#!/usr/bin/env python
-
-import optparse, os, shutil, subprocess, sys, tempfile
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-# Copied from sam_to_bam.py:
-def check_seq_file( dbkey, cached_seqs_pointer_file ):
-    seq_path = ''
-    for line in open( cached_seqs_pointer_file ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seq_path = fields[2].strip()
-                break
-    return seq_path
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-r', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
-    parser.add_option( '-R', action="store_true", dest='ignore_nonoverlap', help='If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts accuracy file' )
-    parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
-
-    # Wrapper / Galaxy options.
-    parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
-    parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
-    parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
-
-    # Outputs.
-    parser.add_option( '', '--combined-transcripts', dest='combined_transcripts' )
-
-    (options, args) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='cuffcompare 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'cuffcompare v' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( '%s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Cuffcompare version\n' )
-
-    # Set/link to sequence file.
-    if options.use_seq_data:
-        cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
-        if not os.path.exists( cached_seqs_pointer_file ):
-            stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
-        # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
-        # and the equCab2.fa file will contain fasta sequences.
-        seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
-        if options.ref_file != 'None':
-            # Create symbolic link to ref_file so that index will be created in working directory.
-            seq_path = "ref.fa"
-            os.symlink( options.ref_file, seq_path  )
-
-    # Build command.
-
-    # Base.
-    cmd = "cuffcompare -o cc_output "
-
-    # Add options.
-    if options.ref_annotation:
-        cmd += " -r %s " % options.ref_annotation
-    if options.ignore_nonoverlap:
-        cmd += " -R "
-    if options.use_seq_data:
-        cmd += " -s %s " % seq_path
-
-    # Add input files.
-
-    # Need to symlink inputs so that output files are written to temp directory.
-    for i, arg in enumerate( args ):
-        input_file_name = "./input%i" % ( i+1 )
-        os.symlink( arg, input_file_name )
-        cmd += "%s " % input_file_name
-
-    # Debugging.
-    print cmd
-
-    # Run command.
-    try:
-        tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
-        tmp_stderr = open( tmp_name, 'wb' )
-        proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-
-        # Get stderr, allowing for case where it's very large.
-        tmp_stderr = open( tmp_name, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-
-        # Error checking.
-        if returncode != 0:
-            raise Exception, stderr
-
-        # Copy outputs.
-        shutil.copyfile( "cc_output.combined.gtf" , options.combined_transcripts )
-
-        # check that there are results in the output file
-        cc_output_fname = "cc_output.stats"
-        if len( open( cc_output_fname, 'rb' ).read().strip() ) == 0:
-            raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
-    except Exception, e:
-        stop_err( 'Error running cuffcompare. ' + str( e ) )
-
-if __name__=="__main__": __main__()
--- a/tools/ngs_rna/cuffcompare_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,217 +0,0 @@
-<tool id="cuffcompare" name="Cuffcompare" version="0.0.5">
-    <!-- Wrapper supports Cuffcompare versions v1.0.0-v1.0.3 -->
-    <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description>
-    <requirements>
-        <requirement type="package">cufflinks</requirement>
-    </requirements>
-    <command interpreter="python">
-        cuffcompare_wrapper.py
-
-            ## Use annotation reference?
-            #if $annotation.use_ref_annotation == "Yes":
-                -r $annotation.reference_annotation
-                #if $annotation.ignore_nonoverlapping_reference:
-                    -R
-                #end if
-            #end if
-
-            ## Use sequence data?
-            #if $seq_data.use_seq_data == "Yes":
-	        -s
-                #if $seq_data.seq_source.index_source == "history":
-                    --ref_file=$seq_data.seq_source.ref_file
-                #else:
-                    --ref_file="None"
-                #end if
-                --dbkey=${first_input.metadata.dbkey}
-                --index_dir=${GALAXY_DATA_INDEX_DIR}
-            #end if
-
-            ## Outputs.
-            --combined-transcripts=${transcripts_combined}
-
-            ## Inputs.
-            ${first_input}
-            #for $input_file in $input_files:
-              ${input_file.additional_input}
-            #end for
-
-    </command>
-    <inputs>
-        <param format="gtf" name="first_input" type="data" label="GTF file produced by Cufflinks" help=""/>
-        <repeat name="input_files" title="Additional GTF Input Files">
-            <param format="gtf" name="additional_input" type="data" label="GTF file produced by Cufflinks" help=""/>
-        </repeat>
-        <conditional name="annotation">
-            <param name="use_ref_annotation" type="select" label="Use Reference Annotation">
-                <option value="No">No</option>
-                <option value="Yes">Yes</option>
-            </param>
-            <when value="Yes">
-                <param format="gtf" name="reference_annotation" type="data" label="Reference Annotation" help="Make sure your annotation file is in GTF format and that Galaxy knows that your file is GTF--not GFF."/>
-                <param name="ignore_nonoverlapping_reference" type="boolean" label="Ignore reference transcripts that are not overlapped by any transcript in input files"/>
-            </when>
-            <when value="No">
-            </when>
-        </conditional>
-        <conditional name="seq_data">
-            <param name="use_seq_data" type="select" label="Use Sequence Data" help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff.">
-                <option value="Yes">Yes</option>
-                <option value="No">No</option>
-            </param>
-            <when value="No"></when>
-            <when value="Yes">
-                <conditional name="seq_source">
-                  <param name="index_source" type="select" label="Choose the source for the reference list">
-                    <option value="cached">Locally cached</option>
-                    <option value="history">History</option>
-                  </param>
-                  <when value="cached"></when>
-                  <when value="history">
-                      <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-                  </when>
-                </conditional>
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="txt" name="transcripts_accuracy" label="${tool.name} on ${on_string}: transcript accuracy"
-            from_work_dir="cc_output.stats" />
-        <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: ${first_input.hid} data tmap file"
-            from_work_dir="cc_output.input1.tmap" />
-        <data format="tabular" name="input1_refmap" label="${tool.name} on ${on_string}: data ${first_input.hid} refmap file"
-            from_work_dir="cc_output.input1.refmap"/>
-        <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${input_files[0]['additional_input'].hid} tmap file" from_work_dir="cc_output.input2.tmap">
-            <filter>len( input_files ) > 0</filter>
-        </data>
-        <data format="tabular" name="input2_refmap" label="${tool.name} on ${on_string}: data ${input_files[0]['additional_input'].hid} refmap file" from_work_dir="cc_output.input2.refmap">
-            <filter>len( input_files ) > 0</filter>
-        </data>
-        <data format="tabular" name="transcripts_tracking" label="${tool.name} on ${on_string}: transcript tracking" from_work_dir="cc_output.tracking">
-            <filter>len( input_files ) > 0</filter>
-        </data>
-        <data format="gtf" name="transcripts_combined" label="${tool.name} on ${on_string}: combined transcripts"/>
-    </outputs>
-
-    <tests>
-        <!--
-            cuffcompare -r cuffcompare_in3.gtf -R cuffcompare_in1.gtf cuffcompare_in2.gtf
-        -->
-        <test>
-            <param name="first_input" value="cuffcompare_in1.gtf" ftype="gtf"/>
-            <param name="additional_input" value="cuffcompare_in2.gtf" ftype="gtf"/>
-            <param name="use_ref_annotation" value="Yes"/>
-            <param name="reference_annotation" value="cuffcompare_in3.gtf" ftype="gtf"/>
-            <param name="ignore_nonoverlapping_reference" value="Yes"/>
-            <param name="use_seq_data" value="No"/>
-            <!-- Line diffs are the result of different locations for input files; this cannot be fixed as cuffcompare outputs
-                full input path for each input. -->
-            <output name="transcripts_accuracy" file="cuffcompare_out7.txt" lines_diff="16"/>
-            <output name="input1_tmap" file="cuffcompare_out1.tmap"/>
-            <output name="input1_refmap" file="cuffcompare_out2.refmap"/>
-            <output name="input2_tmap" file="cuffcompare_out3.tmap"/>
-            <output name="input2_refmap" file="cuffcompare_out4.refmap"/>
-            <output name="transcripts_tracking" file="cuffcompare_out6.tracking"/>
-            <output name="transcripts_combined" file="cuffcompare_out5.gtf"/>
-        </test>
-    </tests>
-
-    <help>
-**Cuffcompare Overview**
-
-Cuffcompare is part of Cufflinks_. Cuffcompare helps you: (a) compare your assembled transcripts to a reference annotation and (b) track Cufflinks transcripts across multiple experiments (e.g. across a time course). Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
-
-.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
-.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffcompare
-
-------
-
-**Input format**
-
-Cuffcompare takes Cufflinks' GTF output as input, and optionally can take a "reference" annotation (such as from Ensembl_)
-
-.. _Ensembl: http://www.ensembl.org
-
-------
-
-**Outputs**
-
-Cuffcompare produces the following output files:
-
-Transcripts Accuracy File:
-
-Cuffcompare reports various statistics related to the "accuracy" of the transcripts in each sample when compared to the reference annotation data. The typical gene finding measures of "sensitivity" and "specificity" (as defined in Burset, M., Guigó, R. : Evaluation of gene structure prediction programs (1996) Genomics, 34 (3), pp. 353-367. doi: 10.1006/geno.1996.0298) are calculated at various levels (nucleotide, exon, intron, transcript, gene) for each input file and reported in this file. The Sn and Sp columns show specificity and sensitivity values at each level, while the fSn and fSp columns are "fuzzy" variants of these same accuracy calculations, allowing for a very small variation in exon boundaries to still be counted as a "match".
-
-Transcripts Combined File:
-
-Cuffcompare reports a GTF file containing the "union" of all transfrags in each sample. If a transfrag is present in both samples, it is thus reported once in the combined gtf.
-
-Transcripts Tracking File:
-
-This file matches transcripts up between samples. Each row contains a transcript structure that is present in one or more input GTF files. Because the transcripts will generally have different IDs (unless you assembled your RNA-Seq reads against a reference transcriptome), cuffcompare examines the structure of each the transcripts, matching transcripts that agree on the coordinates and order of all of their introns, as well as strand. Matching transcripts are allowed to differ on the length of the first and last exons, since these lengths will naturally vary from sample to sample due to the random nature of sequencing.
-If you ran cuffcompare with the -r option, the first and second columns contain the closest matching reference transcript to the one described by each row.
-
-Here's an example of a line from the tracking file::
-
-  TCONS_00000045 XLOC_000023 Tcea|uc007afj.1	j	\
-     q1:exp.115|exp.115.0|100|3.061355|0.350242|0.350207 \
-     q2:60hr.292|60hr.292.0|100|4.094084|0.000000|0.000000
-
-In this example, a transcript present in the two input files, called exp.115.0 in the first and 60hr.292.0 in the second, doesn't match any reference transcript exactly, but shares exons with uc007afj.1, an isoform of the gene Tcea, as indicated by the class code j. The first three columns are as follows::
-
-  Column number   Column name               Example          Description
-  -----------------------------------------------------------------------
-  1               Cufflinks transfrag id    TCONS_00000045   A unique internal id for the transfrag
-  2               Cufflinks locus id        XLOC_000023      A unique internal id for the locus
-  3               Reference gene id         Tcea             The gene_name attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript
-  4               Reference transcript id   uc007afj.1       The transcript_id attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript
-  5               Class code                c                The type of match between the Cufflinks transcripts in column 6 and the reference transcript. See class codes
-
-Each of the columns after the fifth have the following format:
-  qJ:gene_id|transcript_id|FMI|FPKM|conf_lo|conf_hi
-
-A transcript need be present in all samples to be reported in the tracking file. A sample not containing a transcript will have a "-" in its entry in the row for that transcript.
-
-Class Codes
-
-If you ran cuffcompare with the -r option, tracking rows will contain the following values. If you did not use -r, the rows will all contain "-" in their class code column::
-
-  Priority	 Code	   Description
-  ---------------------------------
-  1	         =	       Match
-  2	         c	       Contained
-  3	         j	       New isoform
-  4	         e	       A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment.
-  5	         i	       A single exon transcript falling entirely with a reference intron
-  6	         r	       Repeat. Currently determined by looking at the reference sequence and applied to transcripts where at least 50% of the bases are lower case
-  7	         p	       Possible polymerase run-on fragment
-  8	         u	       Unknown, intergenic transcript
-  9	         o	       Unknown, generic overlap with reference
-  10             .	       (.tracking file only, indicates multiple classifications)
-
--------
-
-**Settings**
-
-All of the options have a default value. You can change any of them. Most of the options in Cuffcompare have been implemented here.
-
-------
-
-**Cuffcompare parameter list**
-
-This is a list of implemented Cuffcompare options::
-
-  -r    An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.
-  -R    If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts_accuracy file
-    </help>
-</tool>
--- a/tools/ngs_rna/cuffdiff_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-#!/usr/bin/env python
-
-import optparse, os, shutil, subprocess, sys, tempfile
-
-def group_callback( option, op_str, value, parser ):
-    groups = []
-    flist = []
-    for arg in parser.rargs:
-        arg = arg.strip()
-        if arg[0] is "-":
-            break
-        elif arg[0] is ",":
-            groups.append(flist)
-            flist = []
-        else:
-            flist.append(arg)
-    groups.append(flist)
-
-    setattr(parser.values, option.dest, groups)
-
-def label_callback( option, op_str, value, parser ):
-    labels = []
-    for arg in parser.rargs:
-        arg = arg.strip()
-        if arg[0] is "-":
-            break
-        else:
-            labels.append(arg)
-
-    setattr(parser.values, option.dest, labels)
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-# Copied from sam_to_bam.py:
-def check_seq_file( dbkey, cached_seqs_pointer_file ):
-    seq_path = ''
-    for line in open( cached_seqs_pointer_file ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seq_path = fields[2].strip()
-                break
-    return seq_path
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-
-    # Cuffdiff options.
-    parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
-    parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
-    parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
-                                                                                For, example, for paired end runs with fragments selected at 300bp, \
-                                                                                where each end is 50bp, you should set -r to be 200. The default is 45bp.')
-    parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' )
-    parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' )
-
-    # Advanced Options:
-    parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
-    parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
-
-    # Wrapper / Galaxy options.
-    parser.add_option( '-f', '--files', dest='groups', action="callback", callback=group_callback, help="Groups to be processed, groups are separated by spaces, replicates in a group comma separated. group1_rep1,group1_rep2 group2_rep1,group2_rep2, ..., groupN_rep1, groupN_rep2" )
-    parser.add_option( '-A', '--inputA', dest='inputA', help='A transcript GTF file produced by cufflinks, cuffcompare, or other source.')
-    parser.add_option( '-1', '--input1', dest='input1', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
-    parser.add_option( '-2', '--input2', dest='input2', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
-
-    # Label options
-    parser.add_option('-L', '--labels', dest='labels', action="callback", callback=label_callback, help="Labels for the groups the replicates are in.")
-
-	# Normalization options.
-    parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" )
-
-    # Bias correction options.
-    parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
-    parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
-    parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
-    parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
-
-    # Outputs.
-    parser.add_option( "--isoforms_fpkm_tracking_output", dest="isoforms_fpkm_tracking_output" )
-    parser.add_option( "--genes_fpkm_tracking_output", dest="genes_fpkm_tracking_output" )
-    parser.add_option( "--cds_fpkm_tracking_output", dest="cds_fpkm_tracking_output" )
-    parser.add_option( "--tss_groups_fpkm_tracking_output", dest="tss_groups_fpkm_tracking_output" )
-    parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" )
-    parser.add_option( "--genes_exp_output", dest="genes_exp_output" )
-    parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" )
-    parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" )
-    parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" )
-    parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
-    parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" )
-
-    (options, args) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='cuffdiff --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'cuffdiff v' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( '%s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Cuffdiff version\n' )
-
-    # Make temp directory for output.
-    tmp_output_dir = tempfile.mkdtemp()
-
-    # If doing bias correction, set/link to sequence file.
-    if options.do_bias_correction:
-        cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
-        if not os.path.exists( cached_seqs_pointer_file ):
-            stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
-        # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
-        # and the equCab2.fa file will contain fasta sequences.
-        seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
-        if options.ref_file != 'None':
-            # Create symbolic link to ref_file so that index will be created in working directory.
-            seq_path = os.path.join( tmp_output_dir, "ref.fa" )
-            os.symlink( options.ref_file, seq_path  )
-
-    # Build command.
-
-    # Base; always use quiet mode to avoid problems with storing log output.
-    cmd = "cuffdiff --no-update-check -q"
-
-    # Add options.
-    if options.inner_dist_std_dev:
-        cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
-    if options.num_threads:
-        cmd += ( " -p %i" % int ( options.num_threads ) )
-    if options.inner_mean_dist:
-        cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
-    if options.min_alignment_count:
-        cmd += ( " -c %i" % int ( options.min_alignment_count ) )
-    if options.FDR:
-        cmd += ( " --FDR %f" % float( options.FDR ) )
-    if options.num_importance_samples:
-        cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
-    if options.max_mle_iterations:
-        cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
-    if options.do_normalization:
-        cmd += ( " -N" )
-    if options.do_bias_correction:
-        cmd += ( " -b %s" % seq_path )
-
-    # Add inputs.
-    # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2
-    if options.groups:
-        cmd += " --labels "
-        for label in options.labels:
-            cmd += label + ","
-        cmd = cmd[:-1]
-
-        cmd += " " + options.inputA + " "
-
-        for group in options.groups:
-            for filename in group:
-                cmd += filename + ","
-            cmd = cmd[:-1] + " "
-    else:
-        cmd += " " + options.inputA + " " + options.input1 + " " + options.input2
-
-    # Debugging.
-    print cmd
-
-    # Run command.
-    try:
-        tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
-        tmp_stderr = open( tmp_name, 'wb' )
-        proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_output_dir, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-
-        # Get stderr, allowing for case where it's very large.
-        tmp_stderr = open( tmp_name, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-
-        # Error checking.
-        if returncode != 0:
-            raise Exception, stderr
-
-        # check that there are results in the output file
-        if len( open( os.path.join( tmp_output_dir, "isoforms.fpkm_tracking" ), 'rb' ).read().strip() ) == 0:
-            raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
-    except Exception, e:
-        stop_err( 'Error running cuffdiff. ' + str( e ) )
-
-
-    # Copy output files from tmp directory to specified files.
-    try:
-        try:
-            shutil.copyfile( os.path.join( tmp_output_dir, "isoforms.fpkm_tracking" ), options.isoforms_fpkm_tracking_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "genes.fpkm_tracking" ), options.genes_fpkm_tracking_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "cds.fpkm_tracking" ), options.cds_fpkm_tracking_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "tss_groups.fpkm_tracking" ), options.tss_groups_fpkm_tracking_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "isoform_exp.diff" ), options.isoforms_exp_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "gene_exp.diff" ), options.genes_exp_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "tss_group_exp.diff" ), options.tss_groups_exp_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "splicing.diff" ), options.splicing_diff_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "cds.diff" ), options.cds_diff_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "cds_exp.diff" ), options.cds_exp_fpkm_tracking_output )
-            shutil.copyfile( os.path.join( tmp_output_dir, "promoters.diff" ), options.promoters_diff_output )
-        except Exception, e:
-            stop_err( 'Error in cuffdiff:\n' + str( e ) )
-    finally:
-        # Clean up temp dirs
-        if os.path.exists( tmp_output_dir ):
-            shutil.rmtree( tmp_output_dir )
-
-if __name__=="__main__": __main__()
--- a/tools/ngs_rna/cuffdiff_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,238 +0,0 @@
-<tool id="cuffdiff" name="Cuffdiff" version="0.0.5">
-    <!-- Wrapper supports Cuffdiff versions v1.0.0-v1.0.3 -->
-    <description>find significant changes in transcript expression, splicing, and promoter use</description>
-    <requirements>
-        <requirement type="package">cufflinks</requirement>
-    </requirements>
-    <command interpreter="python">
-        cuffdiff_wrapper.py
-            --FDR=$fdr
-            --num-threads="4"
-            --min-alignment-count=$min_alignment_count
-
-            --isoforms_fpkm_tracking_output=$isoforms_fpkm_tracking
-            --genes_fpkm_tracking_output=$genes_fpkm_tracking
-            --cds_fpkm_tracking_output=$cds_fpkm_tracking
-            --tss_groups_fpkm_tracking_output=$tss_groups_fpkm_tracking
-            --isoforms_exp_output=$isoforms_exp
-            --genes_exp_output=$genes_exp
-            --tss_groups_exp_output=$tss_groups_exp
-            --cds_exp_fpkm_tracking_output=$cds_exp_fpkm_tracking
-            --splicing_diff_output=$splicing_diff
-            --cds_diff_output=$cds_diff
-            --promoters_diff_output=$promoters_diff
-
-            ## Set paired-end data parameters?
-            #if $singlePaired.sPaired == "Yes":
-                -m $singlePaired.mean_inner_distance
-                -s $singlePaired.inner_distance_std_dev
-                #end if
-
-            ## Normalization?
-            #if str($do_normalization) == "Yes":
-            -N
-            #end if
-
-
-            ## Bias correction?
-            #if $bias_correction.do_bias_correction == "Yes":
-	        -b
-                #if $bias_correction.seq_source.index_source == "history":
-                    --ref_file=$bias_correction.seq_source.ref_file
-                #else:
-                    --ref_file="None"
-                #end if
-                --dbkey=${gtf_input.metadata.dbkey}
-                --index_dir=${GALAXY_DATA_INDEX_DIR}
-            #end if
-
-            ## Inputs.
-            --inputA=$gtf_input
-            #if $group_analysis.do_groups == "No":
-                --input1=$aligned_reads1
-                --input2=$aligned_reads2
-            #else:
-                ## Replicates.
-                --labels
-                #for $group in $group_analysis.groups
-                    ${group.group}
-                #end for
-                --files
-                #for $group in $group_analysis.groups
-                    #for $file in $group.files:
-                        ${file.file}
-                    #end for
-                    ,
-                #end for
-            #end if
-
-    </command>
-    <inputs>
-        <param format="gtf" name="gtf_input" type="data" label="Transcripts" help="A transcript GTF file produced by cufflinks, cuffcompare, or other source."/>
-        <conditional name="group_analysis">
-            <param name="do_groups" type="select" label="Perform replicate analysis" help="Perform cuffdiff with replicates in each group.">
-                <option value="No">No</option>
-                <option value="Yes">Yes</option>
-            </param>
-            <when value="Yes">
-                <repeat name="groups" title="Group">
-                    <param name="group" title="Group name" type="text" label="Group name (no spaces or commas)"/>
-                    <repeat name="files" title="Replicate">
-                        <param name="file" label="Add file" type="data" format="sam,bam"/>
-                    </repeat>
-                </repeat>
-            </when>
-            <when value="No">
-                <param format="sam,bam" name="aligned_reads1" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
-                <param format="sam,bam" name="aligned_reads2" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
-            </when>
-        </conditional>
-
-        <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/>
-        <param name="min_alignment_count" type="integer" value="1000" label="Min Alignment Count" help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/>
-        <param name="do_normalization" type="select" label="Perform quartile normalization" help="Removes top 25% of genes from FPKM denominator to improve accuracy of differential expression calls for low abundance transcripts.">
-            <option value="No">No</option>
-            <option value="Yes">Yes</option>
-        </param>
-        <conditional name="bias_correction">
-            <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
-                <option value="Yes">Yes</option>
-                <option value="No">No</option>
-            </param>
-            <when value="Yes">
-                <conditional name="seq_source">
-                  <param name="index_source" type="select" label="Reference sequence data">
-                    <option value="cached">Locally cached</option>
-                    <option value="history">History</option>
-                  </param>
-                  <when value="cached"></when>
-                  <when value="history">
-                      <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-                  </when>
-                </conditional>
-            </when>
-            <when value="No"></when>
-        </conditional>
-        <conditional name="singlePaired">
-            <param name="sPaired" type="select" label="Set Parameters for Paired-end Reads? (not recommended)">
-                <option value="No">No</option>
-                <option value="Yes">Yes</option>
-            </param>
-            <when value="No"></when>
-            <when value="Yes">
-                <param name="mean_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs"/>
-                <param name="inner_distance_std_dev" type="integer" value="20" label="Standard Deviation for Inner Distance between Mate Pairs"/>
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing"/>
-        <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing"/>
-        <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing"/>
-        <data format="tabular" name="cds_exp_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM differential expression testing"/>
-        <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking"/>
-        <data format="tabular" name="tss_groups_exp" label="${tool.name} on ${on_string}: TSS groups differential expression testing"/>
-        <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking" />
-        <data format="tabular" name="genes_exp" label="${tool.name} on ${on_string}: gene differential expression testing"/>
-        <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking"/>
-        <data format="tabular" name="isoforms_exp" label="${tool.name} on ${on_string}: transcript differential expression testing"/>
-        <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking"/>
-    </outputs>
-
-    <tests>
-        <test>
-                <!--
-                    cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam
-                -->
-                <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" />
-                <param name="do_groups" value="No" />
-                <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" />
-                <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" />
-                <!-- Defaults. -->
-                <param name="fdr" value="0.05" />
-                <param name="min_alignment_count" value="0" ftype="sam" />
-                <param name="do_bias_correction" value="No" />
-                <param name="do_normalization" value="No" />
-                <param name="sPaired" value="single" ftype="sam" />
-                <!--
-                    Line diffs are needed because cuffdiff does not produce deterministic output.
-                    TODO: can we find datasets that lead to deterministic behavior?
-                -->
-                <output name="splicing_diff" file="cuffdiff_out9.txt"/>
-                <output name="promoters_diff" file="cuffdiff_out10.txt"/>
-                <output name="cds_diff" file="cuffdiff_out11.txt"/>
-                <output name="cds_exp_fpkm_tracking" file="cuffdiff_out4.txt"/>
-                <output name="cds_fpkm_tracking" file="cuffdiff_out8.txt"/>
-                <output name="tss_groups_exp" file="cuffdiff_out3.txt"/>
-                <output name="tss_groups_fpkm_tracking" file="cuffdiff_out7.txt"/>
-                <output name="genes_exp" file="cuffdiff_out2.txt" lines_diff="200"/>
-                <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/>
-                <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/>
-                <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/>
-        </test>
-    </tests>
-
-    <help>
-**Cuffdiff Overview**
-
-Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
-
-.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
-.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff
-
-------
-
-**Input format**
-
-Cuffdiff takes Cufflinks or Cuffcompare GTF files as input along with two SAM files containing the fragment alignments for two or more samples.
-
-------
-
-**Outputs**
-
-Cuffdiff produces many output files:
-
-1. Transcript FPKM expression tracking.
-2. Gene FPKM expression tracking; tracks the summed FPKM of transcripts sharing each gene_id
-3. Primary transcript FPKM tracking; tracks the summed FPKM of transcripts sharing each tss_id
-4. Coding sequence FPKM tracking; tracks the summed FPKM of transcripts sharing each p_id, independent of tss_id
-5. Transcript differential FPKM.
-6. Gene differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each gene_id
-7. Primary transcript differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each tss_id
-8. Coding sequence differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each p_id independent of tss_id
-9. Differential splicing tests: this tab delimited file lists, for each primary transcript, the amount of overloading detected among its isoforms, i.e. how much differential splicing exists between isoforms processed from a single primary transcript. Only primary transcripts from which two or more isoforms are spliced are listed in this file.
-10. Differential promoter tests: this tab delimited file lists, for each gene, the amount of overloading detected among its primary transcripts, i.e. how much differential promoter use exists between samples. Only genes producing two or more distinct primary transcripts (i.e. multi-promoter genes) are listed here.
-11. Differential CDS tests: this tab delimited file lists, for each gene, the amount of overloading detected among its coding sequences, i.e. how much differential CDS output exists between samples. Only genes producing two or more distinct CDS (i.e. multi-protein genes) are listed here.
-
--------
-
-**Settings**
-
-All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here.
-
-------
-
-**Cuffdiff parameter list**
-
-This is a list of implemented Cuffdiff options::
-
-  -m INT                         This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 45bp.
-  -s INT                         The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
-  -c INT                         The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not significant, and the locus' observed changes don't contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).
-  --FDR FLOAT                    The allowed false discovery rate. The default is 0.05.
-  --num-importance-samples INT   Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000
-  --max-mle-iterations INT       Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000
-  -N                             With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts.
-
-    </help>
-</tool>
--- a/tools/ngs_rna/cufflinks_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-#!/usr/bin/env python
-
-import optparse, os, shutil, subprocess, sys, tempfile
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-# Copied from sam_to_bam.py:
-def check_seq_file( dbkey, cached_seqs_pointer_file ):
-    seq_path = ''
-    for line in open( cached_seqs_pointer_file ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seq_path = fields[2].strip()
-                break
-    return seq_path
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
-    parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
-    parser.add_option( '-I', '--max-intron-length', dest='max_intron_len', help='The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.' )
-    parser.add_option( '-F', '--min-isoform-fraction', dest='min_isoform_fraction', help='After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.' )
-    parser.add_option( '-j', '--pre-mrna-fraction', dest='pre_mrna_fraction', help='Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.' )
-    parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
-    parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
-                                                                                For, example, for paired end runs with fragments selected at 300bp, \
-                                                                                where each end is 50bp, you should set -r to be 200. The default is 45bp.')
-    parser.add_option( '-G', '--GTF', dest='GTF', help='Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.' )
-    parser.add_option( '-g', '--GTF-guide', dest='GTFguide', help='use reference transcript annotation to guide assembly' )
-
-    # Normalization options.
-    parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" )
-
-    # Wrapper / Galaxy options.
-    parser.add_option( '-A', '--assembled-isoforms-output', dest='assembled_isoforms_output_file', help='Assembled isoforms output file; formate is GTF.' )
-
-    # Advanced Options:
-    parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
-    parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
-
-    # Bias correction options.
-    parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
-    parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
-    parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
-    parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
-
-    (options, args) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='cufflinks --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'cufflinks v' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( '%s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Cufflinks version\n' )
-
-    # If doing bias correction, set/link to sequence file.
-    if options.do_bias_correction:
-        cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
-        if not os.path.exists( cached_seqs_pointer_file ):
-            stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
-        # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
-        # and the equCab2.fa file will contain fasta sequences.
-        seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
-        if options.ref_file != 'None':
-            # Create symbolic link to ref_file so that index will be created in working directory.
-            seq_path = "ref.fa"
-            os.symlink( options.ref_file, seq_path  )
-
-    # Build command.
-
-    # Base; always use quiet mode to avoid problems with storing log output.
-    cmd = "cufflinks -q --no-update-check"
-
-    # Add options.
-    if options.inner_dist_std_dev:
-        cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
-    if options.max_intron_len:
-        cmd += ( " -I %i" % int ( options.max_intron_len ) )
-    if options.min_isoform_fraction:
-        cmd += ( " -F %f" % float ( options.min_isoform_fraction ) )
-    if options.pre_mrna_fraction:
-        cmd += ( " -j %f" % float ( options.pre_mrna_fraction ) )
-    if options.num_threads:
-        cmd += ( " -p %i" % int ( options.num_threads ) )
-    if options.inner_mean_dist:
-        cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
-    if options.GTF:
-        cmd += ( " -G %s" % options.GTF )
-    if options.GTFguide:
-	cmd += ( " -g %s" % options.GTFguide )
-    if options.num_importance_samples:
-        cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
-    if options.max_mle_iterations:
-        cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
-    if options.do_normalization:
-        cmd += ( " -N" )
-    if options.do_bias_correction:
-        cmd += ( " -b %s" % seq_path )
-
-    # Debugging.
-    print cmd
-
-    # Add input files.
-    cmd += " " + options.input
-
-    # Run command.
-    try:
-        tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
-        tmp_stderr = open( tmp_name, 'wb' )
-        proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-
-        # Get stderr, allowing for case where it's very large.
-        tmp_stderr = open( tmp_name, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-
-        # Copy outputs.
-        shutil.copyfile( "transcripts.gtf" , options.assembled_isoforms_output_file )
-
-        # Error checking.
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        stop_err( 'Error running cufflinks. ' + str( e ) )
-
-if __name__=="__main__": __main__()
--- a/tools/ngs_rna/cufflinks_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,222 +0,0 @@
-<tool id="cufflinks" name="Cufflinks" version="0.0.5">
-    <!-- Wrapper supports Cufflinks versions v1.0.0-v1.0.3 -->
-    <description>transcript assembly and FPKM (RPKM) estimates for RNA-Seq data</description>
-    <requirements>
-        <requirement type="package">cufflinks</requirement>
-    </requirements>
-    <command interpreter="python">
-        cufflinks_wrapper.py
-            --input=$input
-            --assembled-isoforms-output=$assembled_isoforms
-            --num-threads="4"
-            -I $max_intron_len
-            -F $min_isoform_fraction
-            -j $pre_mrna_fraction
-
-            ## Include reference annotation?
-            #if $reference_annotation.use_ref == "use reference annotation":
-                -G $reference_annotation.reference_annotation_file
-            #end if
-            #if $reference_annotation.use_ref == "use reference annotation guide":
-		-g $reference_annotation_guide.reference_annotation_guide_file
-            #end if
-
-            ## Set paired-end parameters?
-            #if $singlePaired.sPaired == "Yes":
-                -m $singlePaired.mean_inner_distance
-                -s $singlePaired.inner_distance_std_dev
-                #end if
-
-            ## Normalization?
-            #if str($do_normalization) == "Yes":
-            -N
-            #end if
-
-            ## Bias correction?
-            #if $bias_correction.do_bias_correction == "Yes":
-	        -b
-                #if $bias_correction.seq_source.index_source == "history":
-                    --ref_file=$bias_correction.seq_source.ref_file
-                #else:
-                    --ref_file="None"
-                #end if
-                --dbkey=${input.metadata.dbkey}
-                --index_dir=${GALAXY_DATA_INDEX_DIR}
-            #end if
-    </command>
-    <inputs>
-        <param format="sam,bam" name="input" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
-        <param name="max_intron_len" type="integer" value="300000" min="1" max="600000" label="Max Intron Length" help=""/>
-        <param name="min_isoform_fraction" type="float" value="0.05" min="0" max="1" label="Min Isoform Fraction" help=""/>
-        <param name="pre_mrna_fraction" type="float" value="0.05" min="0" max="1" label="Pre MRNA Fraction" help=""/>
-        <param name="do_normalization" type="select" label="Perform quartile normalization" help="Removes top 25% of genes from FPKM denominator to improve accuracy of differential expression calls for low abundance transcripts.">
-            <option value="No">No</option>
-            <option value="Yes">Yes</option>
-        </param>
-        <conditional name="reference_annotation">
-            <param name="use_ref" type="select" label="Use Reference Annotation">
-                <option value="No">No</option>
-                <option value="Use reference annotation">Use reference annotation</option>
-                <option value="Use reference annotation guide">Use reference annotation as guide</option>
-            </param>
-            <when value="No"></when>
-            <when value="Use reference annotation">
-                <param format="gff3,gtf" name="reference_annotation_file" type="data" label="Reference Aonnotation" help="Make sure your annotation file is in GTF format and that Galaxy knows that your file is GTF--not GFF."/>
-            	</when>
-	    <when value="Use reference annotation guide">
-                <param format="gff3,gtf" name="reference_annotation_guide_file" type="data" label="Reference Aonnotation" help="Make sure your annotation file is in GTF format and that Galaxy knows that your file is GTF--not GFF."/>
-                </when>
-        </conditional>
-        <conditional name="bias_correction">
-            <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
-                <option value="No">No</option>
-		<option value="Yes">Yes</option>
-            </param>
-            <when value="Yes">
-                <conditional name="seq_source">
-                  <param name="index_source" type="select" label="Reference sequence data">
-                    <option value="cached">Locally cached</option>
-                    <option value="history">History</option>
-                  </param>
-                  <when value="cached"></when>
-                  <when value="history">
-                      <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-                  </when>
-                </conditional>
-            </when>
-            <when value="No"></when>
-        </conditional>
-        <conditional name="singlePaired">
-            <param name="sPaired" type="select" label="Set Parameters for Paired-end Reads? (not recommended)">
-                <option value="No">No</option>
-                <option value="Yes">Yes</option>
-            </param>
-            <when value="No"></when>
-            <when value="Yes">
-                <param name="mean_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs"/>
-                <param name="inner_distance_std_dev" type="integer" value="20" label="Standard Deviation for Inner Distance between Mate Pairs"/>
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="tabular" name="genes_expression" label="${tool.name} on ${on_string}: gene expression" from_work_dir="genes.fpkm_tracking"/>
-        <data format="tabular" name="transcripts_expression" label="${tool.name} on ${on_string}: transcript expression" from_work_dir="isoforms.fpkm_tracking"/>
-        <data format="gtf" name="assembled_isoforms" label="${tool.name} on ${on_string}: assembled transcripts"/>
-    </outputs>
-
-    <trackster_conf/>
-    <tests>
-        <!--
-            Simple test that uses test data included with cufflinks.
-        -->
-        <test>
-            <param name="sPaired" value="No"/>
-            <param name="input" value="cufflinks_in.bam"/>
-            <param name="max_intron_len" value="300000"/>
-            <param name="min_isoform_fraction" value="0.05"/>
-            <param name="pre_mrna_fraction" value="0.05"/>
-            <param name="use_ref" value="No"/>
-            <param name="do_normalization" value="No" />
-            <param name="do_bias_correction" value="No"/>
-            <output name="genes_expression" format="tabular" lines_diff="2" file="cufflinks_out3.fpkm_tracking"/>
-            <output name="transcripts_expression" format="tabular" lines_diff="2" file="cufflinks_out2.fpkm_tracking"/>
-            <output name="assembled_isoforms" file="cufflinks_out1.gtf"/>
-        </test>
-    </tests>
-
-    <help>
-**Cufflinks Overview**
-
-Cufflinks_ assembles transcripts, estimates their abundances, and tests for differential expression and regulation in RNA-Seq samples. It accepts aligned RNA-Seq reads and assembles the alignments into a parsimonious set of transcripts. Cufflinks then estimates the relative abundances of these transcripts based on how many reads support each one.  Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
-
-.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
-.. __: http://cufflinks.cbcb.umd.edu/manual.html
-
-------
-
-**Input formats**
-
-Cufflinks takes a text file of SAM alignments as input. The RNA-Seq read mapper TopHat produces output in this format, and is recommended for use with Cufflinks. However Cufflinks will accept SAM alignments generated by any read mapper. Here's an example of an alignment Cufflinks will accept::
-
-  s6.25mer.txt-913508	16	chr1 4482736 255 14M431N11M * 0 0 \
-     CAAGATGCTAGGCAAGTCTTGGAAG IIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:-
-
-Note the use of the custom tag XS. This attribute, which must have a value of "+" or "-", indicates which strand the RNA that produced this read came from. While this tag can be applied to any alignment, including unspliced ones, it must be present for all spliced alignment records (those with a 'N' operation in the CIGAR string).
-The SAM file supplied to Cufflinks must be sorted by reference position. If you aligned your reads with TopHat, your alignments will be properly sorted already. If you used another tool, you may want to make sure they are properly sorted as follows::
-
-  sort -k 3,3 -k 4,4n hits.sam > hits.sam.sorted
-
-NOTE: Cufflinks currently only supports SAM alignments with the CIGAR match ('M') and reference skip ('N') operations. Support for the other operations, such as insertions, deletions, and clipping, will be added in the future.
-
-------
-
-**Outputs**
-
-Cufflinks produces three output files:
-
-Transcripts and Genes:
-
-This GTF file contains Cufflinks' assembled isoforms. The first 7 columns are standard GTF, and the last column contains attributes, some of which are also standardized (e.g. gene_id, transcript_id). There one GTF record per row, and each record represents either a transcript or an exon within a transcript. The columns are defined as follows::
-
-  Column number   Column name   Example     Description
-  -----------------------------------------------------
-  1               seqname       chrX        Chromosome or contig name
-  2               source        Cufflinks   The name of the program that generated this file (always 'Cufflinks')
-  3               feature       exon        The type of record (always either "transcript" or "exon").
-  4               start         77696957    The leftmost coordinate of this record (where 0 is the leftmost possible coordinate)
-  5               end           77712009    The rightmost coordinate of this record, inclusive.
-  6               score         77712009    The most abundant isoform for each gene is assigned a score of 1000. Minor isoforms are scored by the ratio (minor FPKM/major FPKM)
-  7               strand        +           Cufflinks' guess for which strand the isoform came from. Always one of '+', '-' '.'
-  7               frame         .           Cufflinks does not predict where the start and stop codons (if any) are located within each transcript, so this field is not used.
-  8               attributes    See below
-
-Each GTF record is decorated with the following attributes::
-
-  Attribute       Example       Description
-  -----------------------------------------
-  gene_id         CUFF.1        Cufflinks gene id
-  transcript_id   CUFF.1.1      Cufflinks transcript id
-  FPKM            101.267       Isoform-level relative abundance in Reads Per Kilobase of exon model per Million mapped reads
-  frac            0.7647        Reserved. Please ignore, as this attribute may be deprecated in the future
-  conf_lo         0.07          Lower bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, lower bound = FPKM * (1.0 - conf_lo)
-  conf_hi         0.1102        Upper bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, upper bound = FPKM * (1.0 + conf_lo)
-  cov             100.765       Estimate for the absolute depth of read coverage across the whole transcript
-
-
-Transcripts only:
-  This file is simply a tab delimited file containing one row per transcript and with columns containing the attributes above. There are a few additional attributes not in the table above, but these are reserved for debugging, and may change or disappear in the future.
-
-Genes only:
-This file contains gene-level coordinates and expression values.
-
--------
-
-**Cufflinks settings**
-
-All of the options have a default value. You can change any of them. Most of the options in Cufflinks have been implemented here.
-
-------
-
-**Cufflinks parameter list**
-
-This is a list of implemented Cufflinks options::
-
-  -m INT    This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 45bp.
-  -s INT    The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
-  -I INT    The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.
-  -F 	    After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.
-  -j        Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.
-  -G	    Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.
-  -N        With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts.
-    </help>
-</tool>
--- a/tools/ngs_rna/filter_transcripts_via_tracking.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-import os, sys, tempfile
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def __main__():
-    """
-    Utility script for analyzing Cufflinks data: uses a tracking file (produced by cuffcompare) to filter a GTF file of transcripts (usually the transcripts
-    produced by cufflinks). Filtering is done by extracting transcript IDs from tracking file and then filtering the GTF so that the output GTF contains only
-    transcript found in the tracking file. Because a tracking file has multiple samples, a sample number is used to filter transcripts for
-    a particular sample.
-    """
-    # Read parms.
-    tracking_file_name = sys.argv[1]
-    transcripts_file_name = sys.argv[2]
-    output_file_name = sys.argv[3]
-    sample_number = int ( sys.argv[4] )
-
-    # Open files.
-    transcripts_file = open( transcripts_file_name, 'r' )
-    output_file = open( output_file_name, 'w' )
-
-    # Read transcript IDs from tracking file.
-    transcript_ids = {}
-    for i, line in enumerate( file( tracking_file_name ) ) :
-        # Split line into elements. Line format is
-        # [Transfrag ID] [Locus ID] [Ref Gene ID] [Ref Transcript ID] [Class code] [qJ:<gene_id>|<transcript_id>|<FMI>|<FPKM>|<conf_lo>|<conf_hi>]
-        line = line.rstrip( '\r\n' )
-        elems = line.split( '\t' )
-
-        # Get transcript info.
-        if sample_number == 1:
-            transcript_info = elems[4]
-        elif sample_number == 2:
-            transcript_info = elems[5]
-        if not transcript_info.startswith('q'):
-            # No transcript for this sample.
-            continue
-
-        # Get and store transcript id.
-        transcript_id = transcript_info.split('|')[1]
-        transcript_id = transcript_id.strip('"')
-        transcript_ids[transcript_id] = ""
-
-    # Filter transcripts file using transcript_ids
-    for i, line in enumerate( file( transcripts_file_name ) ):
-        # GTF format: chrom source, name, chromStart, chromEnd, score, strand, frame, attributes.
-        elems = line.split( '\t' )
-
-        # Get attributes.
-        attributes_list = elems[8].split(";")
-        attributes = {}
-        for name_value_pair in attributes_list:
-            pair = name_value_pair.strip().split(" ")
-            name = pair[0].strip()
-            if name == '':
-                continue
-            # Need to strip double quote from values
-            value = pair[1].strip(" \"")
-            attributes[name] = value
-
-        # Get element's transcript id.
-        transcript_id = attributes['transcript_id']
-        if transcript_id in transcript_ids:
-            output_file.write(line)
-
-    # Clean up.
-    output_file.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/ngs_rna/filter_transcripts_via_tracking.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<tool id="filter_combined_via_tracking" name="Filter Combined Transcripts" version="0.1">
-    <description>using tracking file</description>
-    <command interpreter="python">
-        filter_transcripts_via_tracking.py
-            $tracking_file
-            $transcripts_file
-            $filtered_transcripts
-            $sample_num
-    </command>
-    <inputs>
-        <param format="gtf" name="transcripts_file" type="data" label="Cufflinks assembled transcripts" help=""/>
-        <param format="tabular" name="tracking_file" type="data" label="Cuffcompare tracking file" help=""/>
-        <param name="sample_num" type="select" label="Sample Number">
-            <option value="1">1</option>
-            <option value="2">2</option>
-        </param>
-    </inputs>
-
-    <outputs>
-        <data format="gtf" name="filtered_transcripts"/>
-    </outputs>
-
-    <tests>
-    </tests>
-
-    <help>
-        Uses a tracking file (produced by cuffcompare) to filter a GTF file of transcripts (usually the transcripts produced by
-        cufflinks). Filtering is done by extracting transcript IDs from tracking file and then filtering the
-        GTF so that the output GTF contains only transcript found in the tracking file. Because a tracking file has multiple
-        samples, a sample number is used to filter transcripts for a particular sample.
-    </help>
-</tool>
--- a/tools/ngs_rna/tophat_color_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,683 +0,0 @@
-<tool id="tophat_color" name="Tophat for SOLiD" version="1.0.0">
-    <description>Find splice junctions using RNA-seq data</description>
-    <requirements>
-        <requirement type="package">tophat</requirement>
-    </requirements>
-    <command interpreter="python">
-        tophat_wrapper.py
-            ## Change this to accommodate the number of threads you have available.
-            --num-threads="4"
-
-            ## base- or color-space
-            --color-space
-
-            ## Provide outputs.
-            --junctions-output=$junctions
-            --hits-output=$accepted_hits
-
-            ## Handle reference file.
-            #if $refGenomeSource.genomeSource == "history":
-                --own-file=$refGenomeSource.ownFile
-            #else:
-                --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ 'tophat_indexes_color' ].get_fields() )[0][-1] }"
-            #end if
-
-            ## Are reads single-end or paired?
-            --single-paired=$singlePaired.sPaired
-
-            ## First input file always required.
-            --input1=$input1
-
-            ## Set params based on whether reads are single-end or paired.
-            #if $singlePaired.sPaired == "single":
-                --settings=$singlePaired.sParams.sSettingsType
-                #if $singlePaired.sParams.sSettingsType == "full":
-                    -a $singlePaired.sParams.anchor_length
-                    -m $singlePaired.sParams.splice_mismatches
-                    -i $singlePaired.sParams.min_intron_length
-                    -I $singlePaired.sParams.max_intron_length
-                    -F $singlePaired.sParams.junction_filter
-                    -g $singlePaired.sParams.max_multihits
-                    --min-segment-intron $singlePaired.sParams.min_segment_intron
-                    --max-segment-intron $singlePaired.sParams.max_segment_intron
-                    --seg-mismatches=$singlePaired.sParams.seg_mismatches
-                    --seg-length=$singlePaired.sParams.seg_length
-                    --library-type=$singlePaired.sParams.library_type
-
-                    ## Indel search.
-                    #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":
-                        --allow-indels
-                        --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length
-                        --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length
-                    #end if
-
-                    ## Supplying junctions parameters.
-                    #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":
-                        #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":
-                            -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model
-                        #end if
-                        #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":
-                            -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs
-                        #end if
-                        ## TODO: No idea why a string cast is necessary, but it is:
-                        #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":
-                            --no-novel-juncs
-                        #end if
-                    #end if
-
-                    #if $singlePaired.sParams.closure_search.use_search == "Yes":
-                        --closure-search
-                        --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon
-                        --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron
-                        --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron
-                    #else:
-                        --no-closure-search
-                    #end if
-                    #if $singlePaired.sParams.coverage_search.use_search == "Yes":
-                        --coverage-search
-                        --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron
-                        --max-coverage-intron $singlePaired.sParams.coverage_search.max_coverage_intron
-                    #else:
-                        --no-coverage-search
-                    #end if
-                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
-                    #if str($singlePaired.sParams.microexon_search) == "Yes":
-                        --microexon-search
-                    #end if
-                #end if
-            #else:
-                --input2=$singlePaired.input2
-                -r $singlePaired.mate_inner_distance
-                --settings=$singlePaired.pParams.pSettingsType
-                #if $singlePaired.pParams.pSettingsType == "full":
-                    --mate-std-dev=$singlePaired.pParams.mate_std_dev
-                    -a $singlePaired.pParams.anchor_length
-                    -m $singlePaired.pParams.splice_mismatches
-                    -i $singlePaired.pParams.min_intron_length
-                    -I $singlePaired.pParams.max_intron_length
-                    -F $singlePaired.pParams.junction_filter
-                    -g $singlePaired.pParams.max_multihits
-                    --min-segment-intron $singlePaired.pParams.min_segment_intron
-                    --max-segment-intron $singlePaired.pParams.max_segment_intron
-                    --seg-mismatches=$singlePaired.pParams.seg_mismatches
-                    --seg-length=$singlePaired.pParams.seg_length
-                    --library-type=$singlePaired.pParams.library_type
-
-                    ## Indel search.
-                    #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes":
-                        --allow-indels
-                        --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length
-                        --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length
-                    #end if
-
-                    ## Supplying junctions parameters.
-                    #if $singlePaired.pParams.own_junctions.use_junctions == "Yes":
-                        #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes":
-                            -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model
-                        #end if
-                        #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes":
-                            -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs
-                        #end if
-                        ## TODO: No idea why type cast is necessary, but it is:
-                        #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes":
-                            --no-novel-juncs
-                        #end if
-                    #end if
-
-                    #if $singlePaired.pParams.closure_search.use_search == "Yes":
-                        --closure-search
-                        --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon
-                        --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron
-                        --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron
-                    #else:
-                        --no-closure-search
-                    #end if
-                    #if $singlePaired.pParams.coverage_search.use_search == "Yes":
-                        --coverage-search
-                        --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron
-                        --max-coverage-intron $singlePaired.pParams.coverage_search.max_coverage_intron
-                    #else:
-                        --no-coverage-search
-                    #end if
-                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
-                    #if str ($singlePaired.pParams.microexon_search) == "Yes":
-                        --microexon-search
-                   #end if
-                #end if
-            #end if
-    </command>
-    <inputs>
-        <param format="fastqcssanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
-        <conditional name="refGenomeSource">
-          <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-            <option value="indexed">Use a built-in index</option>
-            <option value="history">Use one from the history</option>
-          </param>
-          <when value="indexed">
-            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
-              <options from_data_table="tophat_indexes_color">
-                <filter type="sort_by" column="2"/>
-                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
-              </options>
-            </param>
-          </when>
-          <when value="history">
-            <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
-          </when>  <!-- history -->
-        </conditional>  <!-- refGenomeSource -->
-        <conditional name="singlePaired">
-            <param name="sPaired" type="select" label="Is this library mate-paired?">
-              <option value="single">Single-end</option>
-              <option value="paired">Paired-end</option>
-            </param>
-            <when value="single">
-              <conditional name="sParams">
-                <param name="sSettingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters.">
-                  <option value="preSet">Use Defaults</option>
-                  <option value="full">Full parameter list</option>
-                </param>
-                <when value="preSet" />
-                <!-- Full/advanced params. -->
-                <when value="full">
-                  <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
-                      <option value="fr-unstranded">FR Unstranded</option>
-                      <option value="fr-firststrand">FR First Strand</option>
-                      <option value="fr-secondstrand">FR Second Strand</option>
-                  </param>
-                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
-                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
-                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
-                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
-                  <conditional name="indel_search">
-                      <param name="allow_indel_search" type="select" label="Allow indel search">
-                          <option value="No">No</option>
-                          <option value="Yes">Yes</option>
-                      </param>
-                      <when value="No"/>
-                      <when value="Yes">
-                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
-                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
-                      </when>
-                  </conditional>
-                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
-                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
-                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
-                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
-                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
-                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
-
-                  <!-- Options for supplying own junctions. -->
-                  <conditional name="own_junctions">
-                      <param name="use_junctions" type="select" label="Use Own Junctions">
-                        <option value="No">No</option>
-                        <option value="Yes">Yes</option>
-                      </param>
-                      <when value="Yes">
-                          <conditional name="gene_model_ann">
-                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
-                             </when>
-                          </conditional>
-                          <conditional name="raw_juncs">
-                             <param name="use_juncs" type="select" label="Use Raw Junctions">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
-                             </when>
-                          </conditional>
-                          <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
-                            <option value="No">No</option>
-                            <option value="Yes">Yes</option>
-                          </param>
-                      </when>
-                      <when value="No" />
-                  </conditional> <!-- /own_junctions -->
-
-                  <!-- Closure search. -->
-                  <conditional name="closure_search">
-                    <param name="use_search" type="select" label="Use Closure Search">
-                      <option value="No">No</option>
-                      <option value="Yes">Yes</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
-                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
-                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <!-- Coverage search. -->
-                  <conditional name="coverage_search">
-                    <param name="use_search" type="select" label="Use Coverage Search">
-                        <option selected="true" value="Yes">Yes</option>
-                        <option value="No">No</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
-                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
-                    <option value="No">No</option>
-                    <option value="Yes">Yes</option>
-                  </param>
-                </when>  <!-- full -->
-              </conditional>  <!-- sParams -->
-            </when>  <!--  single -->
-            <when value="paired">
-              <param format="fastqcssanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
-              <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" />
-              <conditional name="pParams">
-                <param name="pSettingsType" type="select" label="TopHat settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
-                  <option value="preSet">Commonly used</option>
-                  <option value="full">Full parameter list</option>
-                </param>
-                <when value="preSet" />
-                <!-- Full/advanced params. -->
-                <when value="full">
-                    <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
-                        <option value="fr-unstranded">FR Unstranded</option>
-                        <option value="fr-firststrand">FR First Strand</option>
-                        <option value="fr-secondstrand">FR Second Strand</option>
-                    </param>
-                    <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
-                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
-                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
-                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
-                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
-                  <conditional name="indel_search">
-                      <param name="allow_indel_search" type="select" label="Allow indel search">
-                          <option value="No">No</option>
-                          <option value="Yes">Yes</option>
-                      </param>
-                      <when value="No"/>
-                      <when value="Yes">
-                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
-                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
-                      </when>
-                  </conditional>
-                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
-                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
-                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
-                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
-                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
-                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
-                  <!-- Options for supplying own junctions. -->
-                  <conditional name="own_junctions">
-                      <param name="use_junctions" type="select" label="Use Own Junctions">
-                        <option value="No">No</option>
-                        <option value="Yes">Yes</option>
-                      </param>
-                      <when value="Yes">
-                          <conditional name="gene_model_ann">
-                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
-                             </when>
-                          </conditional>
-                          <conditional name="raw_juncs">
-                             <param name="use_juncs" type="select" label="Use Raw Junctions">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
-                             </when>
-                          </conditional>
-                          <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
-                            <option value="No">No</option>
-                            <option value="Yes">Yes</option>
-                          </param>
-                      </when>
-                      <when value="No" />
-                  </conditional> <!-- /own_junctions -->
-
-                  <!-- Closure search. -->
-                  <conditional name="closure_search">
-                    <param name="use_search" type="select" label="Use Closure Search">
-                      <option value="No">No</option>
-                      <option value="Yes">Yes</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
-                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
-                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <!-- Coverage search. -->
-                  <conditional name="coverage_search">
-                    <param name="use_search" type="select" label="Use Coverage Search">
-                        <option selected="true" value="Yes">Yes</option>
-                        <option value="No">No</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
-                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
-                    <option value="No">No</option>
-                    <option value="Yes">Yes</option>
-                  </param>
-                </when>  <!-- full -->
-              </conditional>  <!-- pParams -->
-            </when>  <!-- paired -->
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed">
-            <filter>
-                (
-                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and
-                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or
-                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and
-                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
-                )
-            </filter>
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-        <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed">
-            <filter>
-                (
-                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and
-                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or
-                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and
-                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
-                )
-            </filter>
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions">
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits">
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-    </outputs>
-    <tests>
-        <!-- Test color-space single-end reads with user-supplied reference fasta and preset parameters -->
-        <test>
-            <!-- TopHat commands:
-            cp test-data/tophat_in5.fasta tophat_in5.fa
-            bowtie-build -C -f tophat_in5.fasta tophat_in5
-            tophat -p 1 -C tophat_in5 test-data/tophat_in4.fastqcssanger
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger" />
-            <param name="genomeSource" value="history" />
-            <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
-            <param name="sPaired" value="single" />
-            <param name="sSettingsType" value="preSet" />
-            <output name="junctions" file="tophat_out5j.bed" />
-            <output name="accepted_hits" file="tophat_out5h.bam" compare="sim_size" />
-        </test>
-        <!-- Test color-space single-end reads with pre-built index and full parameters -->
-        <test>
-            <!-- Tophat commands:
-            tophat -p 1 -C -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intron 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search partialMm9chrX_random_cs test-data/tophat_in4.fastqcssanger
-            Replace the + with double-dash
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger"/>
-            <param name="genomeSource" value="indexed"/>
-            <param name="index" value="partialMm9chrX_random_cs" />
-            <param name="sPaired" value="single"/>
-            <param name="sSettingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="junction_filter" value="0.15"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="50" />
-            <param name="max_segment_intron" value="500000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="25"/>
-            <param name="allow_indel_search" value="Yes"/>
-            <param name="max_insertion_length" value="3"/>
-            <param name="max_deletion_length" value="3"/>
-            <param name="use_junctions" value="Yes" />
-            <param name="use_annotations" value="No" />
-            <param name="use_juncs" value="No" />
-            <param name="no_novel_juncs" value="No" />
-            <param name="use_search" value="Yes" />
-            <param name="min_closure_exon" value="50" />
-            <param name="min_closure_intron" value="50" />
-            <param name="max_closure_intron" value="5000" />
-            <param name="use_search" value="Yes" />
-            <param name="min_coverage_intron" value="50" />
-            <param name="max_coverage_intron" value="20000" />
-            <param name="microexon_search" value="Yes" />
-            <output name="insertions" file="tophat_out6i.bed" />
-            <output name="deletions" file="tophat_out6d.bed" />
-            <output name="junctions" file="tophat_out6j.bed" />
-            <output name="accepted_hits" file="tophat_out6h.bam" compare="sim_size" />
-        </test>
-        <!-- Test color-space paired-end reads with pre-built index and preset parameters -->
-        <test>
-            <!-- TopHat commands:
-            tophat -C -o tmp_dir -r 50 -p 1 partialMm9chrX_random_cs test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger" />
-            <param name="genomeSource" value="indexed" />
-            <param name="index" value="partialMm9chrX_random_cs" />
-            <param name="sPaired" value="paired" />
-            <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
-            <param name="mate_inner_distance" value="50"/>
-            <param name="pSettingsType" value="preSet" />
-            <output name="junctions" file="tophat_out7j.bed" />
-            <output name="accepted_hits" file="tophat_out7h.bam" compare="sim_size" />
-        </test>
-        <!-- Test color-space paired-end reads with user-supplied reference fasta and full parameters -->
-        <test>
-            <!-- TopHat commands:
-            cp test-data/tophat_in5.fasta tophat_in5.fa
-            bowtie-build -C -f tophat_in5.fa tophat_in5
-            tophat -C -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +library-type fr-unstranded +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intron 15000 +mate-std-dev 20 +segment-mismatch 2 +segment-length 20 +min-segment-intron 50 +max-segment-intron 500000 tophat_in5 test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
-            Replace the + with double-dash
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger"/>
-            <param name="genomeSource" value="history"/>
-            <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
-            <param name="sPaired" value="paired"/>
-            <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
-            <param name="mate_inner_distance" value="20"/>
-            <param name="pSettingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="mate_std_dev" value="20"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="junction_filter" value="0.15"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="70" />
-            <param name="max_segment_intron" value="400000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="20"/>
-            <param name="allow_indel_search" value="Yes"/>
-            <param name="max_insertion_length" value="3"/>
-            <param name="max_deletion_length" value="3"/>
-            <param name="use_junctions" value="No" />
-            <param name="use_search" value="No" />
-            <param name="use_search" value="Yes" />
-            <param name="min_coverage_intron" value="50" />
-            <param name="max_coverage_intron" value="20000" />
-            <param name="microexon_search" value="No" />
-            <output name="insertions" file="tophat_out8i.bed" />
-            <output name="deletions" file="tophat_out8d.bed" />
-            <output name="junctions" file="tophat_out8j.bed" />
-            <output name="accepted_hits" file="tophat_out8h.bam" compare="sim_size" />
-        </test>
-    </tests>
-
-    <help>
-**Tophat Overview**
-
-TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009).
-
-.. _Tophat: http://tophat.cbcb.umd.edu/
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
-.. __: http://tophat.cbcb.umd.edu/manual.html
-
-------
-
-**Input formats**
-
-Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
-
-------
-
-**Outputs**
-
-Tophat produces two main output files:
-
-- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
-- accepted_hits -- A list of read alignments in BAM_ format.
-
-.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
-.. _BAM: http://samtools.sourceforge.net/
-
-Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.
-
--------
-
-**Tophat settings**
-
-All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.
-
-------
-
-**Tophat parameter list**
-
-This is a list of implemented Tophat options::
-
-This is a list of implemented Tophat options::
-
-  -r                                This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments
-                                    selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter
-                                    is required for paired end runs.
-  --mate-std-dev INT                The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
-  -a/--min-anchor-length INT        The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced
-                                    alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one
-                                    read with this many bases on each side. This must be at least 3 and the default is 8.
-  -m/--splice-mismatches INT        The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0.
-  -i/--min-intron-length INT        The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70.
-  -I/--max-intron-length INT        The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000.
-  -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of
-                                    exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the
-                                    filter. The default is 0.15.
-  -g/--max-multihits INT            Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many
-                                    alignments. The default is 40.
-  -G/--GTF [GTF 2.2 file]           Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.
-  -j/--raw-juncs [juncs file]       Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.
-  -no-novel-juncs                   Only look for junctions indicated in the supplied GFF file. (ignored without -G)
-  --no-closure-search               Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.
-  --closure-search                  Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)
-  --no-coverage-search              Disables the coverage based search for junctions.
-  --coverage-search                 Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.
-  --microexon-search                With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.
-  --butterfly-search                TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.
-  --segment-mismatches              Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.
-  --segment-length                  Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.
-  --min-closure-exon                During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.
-  --min-closure-intron              The minimum intron length that may be found during closure search. The default is 50.
-  --max-closure-intron              The maximum intron length that may be found during closure search. The default is 5000.
-  --min-coverage-intron             The minimum intron length that may be found during coverage search. The default is 50.
-  --max-coverage-intron             The maximum intron length that may be found during coverage search. The default is 20000.
-  --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.
-  --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.
-    </help>
-</tool>
--- a/tools/ngs_rna/tophat_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,236 +0,0 @@
-#!/usr/bin/env python
-
-import optparse, os, shutil, subprocess, sys, tempfile, fileinput
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
-    parser.add_option( '-C', '--color-space', dest='color_space', action='store_true', help='This indicates color-space data' )
-    parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', help='Junctions output file; formate is BED.' )
-    parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', help='Accepted hits output file; formate is BAM.' )
-    parser.add_option( '', '--own-file', dest='own_file', help='' )
-    parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' )
-    parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \
-                                                                                For, example, for paired end runs with fragments selected at 300bp, \
-                                                                                where each end is 50bp, you should set -r to be 200. There is no default, \
-                                                                                and this parameter is required for paired end runs.')
-    parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' )
-    parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length',
-                        help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' )
-    parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' )
-    parser.add_option( '-i', '--min-intron-length', dest='min_intron_length',
-                        help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' )
-    parser.add_option( '-I', '--max-intron-length', dest='max_intron_length',
-                        help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' )
-    parser.add_option( '-F', '--junction_filter', dest='junction_filter', help='Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)' )
-    parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' )
-    parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' )
-    parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' )
-    parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' )
-    parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.' )
-    parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' )
-    parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' )
-
-    # Options for supplying own junctions
-    parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \
-                                                                           TopHat will use the exon records in this file to build \
-                                                                           a set of known splice junctions for each gene, and will \
-                                                                           attempt to align reads to these junctions even if they \
-                                                                           would not normally be covered by the initial mapping.')
-    parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \
-                                                                    specified one per line, in a tab-delimited format. Records \
-                                                                    look like: <chrom> <left> <right> <+/-> left and right are \
-                                                                    zero-based coordinates, and specify the last character of the \
-                                                                    left sequenced to be spliced to the first character of the right \
-                                                                    sequence, inclusive.')
-    parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \
-                                                                                            supplied GFF file. (ignored without -G)")
-    # Types of search.
-    parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.')
-    parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)')
-    parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' )
-    parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.')
-    parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' )
-    parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' )
-    parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' )
-    parser.add_option( '', '--min-closure-exon', dest='min_closure_exon', help='Minimum length for exonic hops in potential splice graph' )
-    parser.add_option( '', '--min-closure-intron', dest='min_closure_intron', help='Minimum intron length that may be found during closure search' )
-    parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' )
-    parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' )
-    parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' )
-
-    # Wrapper options.
-    parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
-    parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
-    parser.add_option( '', '--single-paired', dest='single_paired', help='' )
-    parser.add_option( '', '--settings', dest='settings', help='' )
-
-    (options, args) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = open( tmp_stdout.name, 'rb' ).readline().strip()
-        if stdout:
-            sys.stdout.write( '%s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Tophat version\n' )
-
-    # Color or base space
-    space = ''
-    if options.color_space:
-        space = '-C'
-
-    # Creat bowtie index if necessary.
-    tmp_index_dir = tempfile.mkdtemp()
-    if options.own_file:
-        index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) )
-        try:
-            os.link( options.own_file, index_path + '.fa' )
-        except:
-            # Tophat prefers (but doesn't require) fasta file to be in same directory, with .fa extension
-            pass
-        cmd_index = 'bowtie-build %s -f %s %s' % ( space, options.own_file, index_path )
-        try:
-            tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            if os.path.exists( tmp_index_dir ):
-                shutil.rmtree( tmp_index_dir )
-            stop_err( 'Error indexing reference sequence\n' + str( e ) )
-    else:
-        index_path = options.index_path
-
-    # Build tophat command.
-    cmd = 'tophat %s %s %s'
-    reads = options.input1
-    if options.input2:
-        reads += ' ' + options.input2
-    opts = '-p %s %s' % ( options.num_threads, space )
-    if options.single_paired == 'paired':
-        opts += ' -r %s' % options.mate_inner_dist
-    if options.settings == 'preSet':
-        cmd = cmd % ( opts, index_path, reads )
-    else:
-        try:
-            if int( options.min_anchor_length ) >= 3:
-                opts += ' -a %s' % options.min_anchor_length
-            else:
-                raise Exception, 'Minimum anchor length must be 3 or greater'
-            opts += ' -m %s' % options.splice_mismatches
-            opts += ' -i %s' % options.min_intron_length
-            opts += ' -I %s' % options.max_intron_length
-            if float( options.junction_filter ) != 0.0:
-                opts += ' -F %s' % options.junction_filter
-            opts += ' -g %s' % options.max_multihits
-            # Custom junctions options.
-            if options.gene_model_annotations:
-                opts += ' -G %s' % options.gene_model_annotations
-            if options.raw_juncs:
-                opts += ' -j %s' % options.raw_juncs
-            if options.no_novel_juncs:
-                opts += ' --no-novel-juncs'
-            if options.library_type:
-                opts += ' --library-type %s' % options.library_type
-            if options.allow_indels:
-                # Max options do not work for Tophat v1.2.0, despite documentation to the contrary.
-                opts += ' --allow-indels'
-                #opts += ' --max-insertion-length %i --max-deletion-length %i' % ( int( options.max_insertion_length ), int( options.max_deletion_length ) )
-                # need to warn user of this fact
-                sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" )
-
-            # Search type options.
-            if options.coverage_search:
-                opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron )
-            else:
-                opts += ' --no-coverage-search'
-            if options.closure_search:
-                opts += ' --closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s'  % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron )
-            else:
-                opts += ' --no-closure-search'
-            if options.microexon_search:
-                opts += ' --microexon-search'
-            if options.single_paired == 'paired':
-                opts += ' --mate-std-dev %s' % options.mate_std_dev
-            if options.seg_mismatches:
-                opts += ' --segment-mismatches %d' % int( options.seg_mismatches )
-            if options.seg_length:
-                opts += ' --segment-length %d' % int( options.seg_length )
-            if options.min_segment_intron:
-                opts += ' --min-segment-intron %d' % int( options.min_segment_intron )
-            if options.max_segment_intron:
-                opts += ' --max-segment-intron %d' % int( options.max_segment_intron )
-            cmd = cmd % ( opts, index_path, reads )
-        except Exception, e:
-            # Clean up temp dirs
-            if os.path.exists( tmp_index_dir ):
-                shutil.rmtree( tmp_index_dir )
-            stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
-    #print cmd
-
-    # Run
-    try:
-        tmp_out = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp_out, 'wb' )
-        tmp_err = tempfile.NamedTemporaryFile().name
-        tmp_stderr = open( tmp_err, 'wb' )
-        proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
-        returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp_err, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stdout.close()
-        tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-
-        # Copy output files from tmp directory to specified files.
-        shutil.copyfile( os.path.join( "tophat_out", "junctions.bed" ), options.junctions_output_file )
-        shutil.copyfile( os.path.join( "tophat_out", "accepted_hits.bam" ), options.accepted_hits_output_file )
-
-        # TODO: look for errors in program output.
-    except Exception, e:
-        stop_err( 'Error in tophat:\n' + str( e ) )
-
-    # Clean up temp dirs
-    if os.path.exists( tmp_index_dir ):
-        shutil.rmtree( tmp_index_dir )
-
-if __name__=="__main__": __main__()
--- a/tools/ngs_rna/tophat_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,680 +0,0 @@
-<tool id="tophat" name="Tophat for Illumina" version="1.5.0">
-    <description>Find splice junctions using RNA-seq data</description>
-    <version_command>tophat --version</version_command>
-    <requirements>
-        <requirement type="package">tophat</requirement>
-    </requirements>
-    <command interpreter="python">
-        tophat_wrapper.py
-            ## Change this to accommodate the number of threads you have available.
-            --num-threads="4"
-
-            ## Provide outputs.
-            --junctions-output=$junctions
-            --hits-output=$accepted_hits
-
-            ## Handle reference file.
-            #if $refGenomeSource.genomeSource == "history":
-                --own-file=$refGenomeSource.ownFile
-            #else:
-                --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ 'tophat_indexes' ].get_fields() )[0][-1] }"
-            #end if
-
-            ## Are reads single-end or paired?
-            --single-paired=$singlePaired.sPaired
-
-            ## First input file always required.
-            --input1=$input1
-
-            ## Set params based on whether reads are single-end or paired.
-            #if $singlePaired.sPaired == "single":
-                --settings=$singlePaired.sParams.sSettingsType
-                #if $singlePaired.sParams.sSettingsType == "full":
-                    -a $singlePaired.sParams.anchor_length
-                    -m $singlePaired.sParams.splice_mismatches
-                    -i $singlePaired.sParams.min_intron_length
-                    -I $singlePaired.sParams.max_intron_length
-                    -F $singlePaired.sParams.junction_filter
-                    -g $singlePaired.sParams.max_multihits
-                    --min-segment-intron $singlePaired.sParams.min_segment_intron
-                    --max-segment-intron $singlePaired.sParams.max_segment_intron
-                    --seg-mismatches=$singlePaired.sParams.seg_mismatches
-                    --seg-length=$singlePaired.sParams.seg_length
-                    --library-type=$singlePaired.sParams.library_type
-
-                    ## Indel search.
-                    #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":
-                        --allow-indels
-                        --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length
-                        --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length
-                    #end if
-
-                    ## Supplying junctions parameters.
-                    #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":
-                        #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":
-                            -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model
-                        #end if
-                        #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":
-                            -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs
-                        #end if
-                        ## TODO: No idea why a string cast is necessary, but it is:
-                        #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":
-                            --no-novel-juncs
-                        #end if
-                    #end if
-
-                    #if $singlePaired.sParams.closure_search.use_search == "Yes":
-                        --closure-search
-                        --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon
-                        --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron
-                        --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron
-                    #else:
-                        --no-closure-search
-                    #end if
-                    #if $singlePaired.sParams.coverage_search.use_search == "Yes":
-                        --coverage-search
-                        --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron
-                        --max-coverage-intron $singlePaired.sParams.coverage_search.max_coverage_intron
-                    #else:
-                        --no-coverage-search
-                    #end if
-                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
-                    #if str($singlePaired.sParams.microexon_search) == "Yes":
-                        --microexon-search
-                    #end if
-                #end if
-            #else:
-                --input2=$singlePaired.input2
-                -r $singlePaired.mate_inner_distance
-                --settings=$singlePaired.pParams.pSettingsType
-                #if $singlePaired.pParams.pSettingsType == "full":
-                    --mate-std-dev=$singlePaired.pParams.mate_std_dev
-                    -a $singlePaired.pParams.anchor_length
-                    -m $singlePaired.pParams.splice_mismatches
-                    -i $singlePaired.pParams.min_intron_length
-                    -I $singlePaired.pParams.max_intron_length
-                    -F $singlePaired.pParams.junction_filter
-                    -g $singlePaired.pParams.max_multihits
-                    --min-segment-intron $singlePaired.pParams.min_segment_intron
-                    --max-segment-intron $singlePaired.pParams.max_segment_intron
-                    --seg-mismatches=$singlePaired.pParams.seg_mismatches
-                    --seg-length=$singlePaired.pParams.seg_length
-                    --library-type=$singlePaired.pParams.library_type
-
-                    ## Indel search.
-                    #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes":
-                        --allow-indels
-                        --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length
-                        --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length
-                    #end if
-
-                    ## Supplying junctions parameters.
-                    #if $singlePaired.pParams.own_junctions.use_junctions == "Yes":
-                        #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes":
-                            -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model
-                        #end if
-                        #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes":
-                            -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs
-                        #end if
-                        ## TODO: No idea why type cast is necessary, but it is:
-                        #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes":
-                            --no-novel-juncs
-                        #end if
-                    #end if
-
-                    #if $singlePaired.pParams.closure_search.use_search == "Yes":
-                        --closure-search
-                        --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon
-                        --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron
-                        --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron
-                    #else:
-                        --no-closure-search
-                    #end if
-                    #if $singlePaired.pParams.coverage_search.use_search == "Yes":
-                        --coverage-search
-                        --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron
-                        --max-coverage-intron $singlePaired.pParams.coverage_search.max_coverage_intron
-                    #else:
-                        --no-coverage-search
-                    #end if
-                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
-                    #if str ($singlePaired.pParams.microexon_search) == "Yes":
-                        --microexon-search
-                   #end if
-                #end if
-            #end if
-    </command>
-    <inputs>
-        <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
-        <conditional name="refGenomeSource">
-          <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-            <option value="indexed">Use a built-in index</option>
-            <option value="history">Use one from the history</option>
-          </param>
-          <when value="indexed">
-            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
-              <options from_data_table="tophat_indexes">
-                <filter type="sort_by" column="2"/>
-                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
-              </options>
-            </param>
-          </when>
-          <when value="history">
-            <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
-          </when>  <!-- history -->
-        </conditional>  <!-- refGenomeSource -->
-        <conditional name="singlePaired">
-            <param name="sPaired" type="select" label="Is this library mate-paired?">
-              <option value="single">Single-end</option>
-              <option value="paired">Paired-end</option>
-            </param>
-            <when value="single">
-              <conditional name="sParams">
-                <param name="sSettingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters.">
-                  <option value="preSet">Use Defaults</option>
-                  <option value="full">Full parameter list</option>
-                </param>
-                <when value="preSet" />
-                <!-- Full/advanced params. -->
-                <when value="full">
-                  <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
-                      <option value="fr-unstranded">FR Unstranded</option>
-                      <option value="fr-firststrand">FR First Strand</option>
-                      <option value="fr-secondstrand">FR Second Strand</option>
-                  </param>
-                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
-                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
-                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
-                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
-                  <conditional name="indel_search">
-                      <param name="allow_indel_search" type="select" label="Allow indel search">
-                          <option value="No">No</option>
-                          <option value="Yes">Yes</option>
-                      </param>
-                      <when value="No"/>
-                      <when value="Yes">
-                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
-                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
-                      </when>
-                  </conditional>
-                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
-                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
-                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
-                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
-                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
-                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
-
-                  <!-- Options for supplying own junctions. -->
-                  <conditional name="own_junctions">
-                      <param name="use_junctions" type="select" label="Use Own Junctions">
-                        <option value="No">No</option>
-                        <option value="Yes">Yes</option>
-                      </param>
-                      <when value="Yes">
-                          <conditional name="gene_model_ann">
-                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
-                             </when>
-                          </conditional>
-                          <conditional name="raw_juncs">
-                             <param name="use_juncs" type="select" label="Use Raw Junctions">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
-                             </when>
-                          </conditional>
-                          <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
-                            <option value="No">No</option>
-                            <option value="Yes">Yes</option>
-                          </param>
-                      </when>
-                      <when value="No" />
-                  </conditional> <!-- /own_junctions -->
-
-                  <!-- Closure search. -->
-                  <conditional name="closure_search">
-                    <param name="use_search" type="select" label="Use Closure Search">
-                      <option value="No">No</option>
-                      <option value="Yes">Yes</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
-                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
-                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <!-- Coverage search. -->
-                  <conditional name="coverage_search">
-                    <param name="use_search" type="select" label="Use Coverage Search">
-                        <option selected="true" value="Yes">Yes</option>
-                        <option value="No">No</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
-                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
-                    <option value="No">No</option>
-                    <option value="Yes">Yes</option>
-                  </param>
-                </when>  <!-- full -->
-              </conditional>  <!-- sParams -->
-            </when>  <!--  single -->
-            <when value="paired">
-              <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
-              <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" />
-              <conditional name="pParams">
-                <param name="pSettingsType" type="select" label="TopHat settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
-                  <option value="preSet">Commonly used</option>
-                  <option value="full">Full parameter list</option>
-                </param>
-                <when value="preSet" />
-                <!-- Full/advanced params. -->
-                <when value="full">
-                    <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
-                        <option value="fr-unstranded">FR Unstranded</option>
-                        <option value="fr-firststrand">FR First Strand</option>
-                        <option value="fr-secondstrand">FR Second Strand</option>
-                    </param>
-                    <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
-                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
-                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
-                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
-                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
-                  <conditional name="indel_search">
-                      <param name="allow_indel_search" type="select" label="Allow indel search">
-                          <option value="No">No</option>
-                          <option value="Yes">Yes</option>
-                      </param>
-                      <when value="No"/>
-                      <when value="Yes">
-                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
-                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
-                      </when>
-                  </conditional>
-                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
-                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
-                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
-                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
-                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
-                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
-                  <!-- Options for supplying own junctions. -->
-                  <conditional name="own_junctions">
-                      <param name="use_junctions" type="select" label="Use Own Junctions">
-                        <option value="No">No</option>
-                        <option value="Yes">Yes</option>
-                      </param>
-                      <when value="Yes">
-                          <conditional name="gene_model_ann">
-                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
-                             </when>
-                          </conditional>
-                          <conditional name="raw_juncs">
-                             <param name="use_juncs" type="select" label="Use Raw Junctions">
-                                <option value="No">No</option>
-                                <option value="Yes">Yes</option>
-                             </param>
-                             <when value="No" />
-                             <when value="Yes">
-                               <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
-                             </when>
-                          </conditional>
-                          <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
-                            <option value="No">No</option>
-                            <option value="Yes">Yes</option>
-                          </param>
-                      </when>
-                      <when value="No" />
-                  </conditional> <!-- /own_junctions -->
-
-                  <!-- Closure search. -->
-                  <conditional name="closure_search">
-                    <param name="use_search" type="select" label="Use Closure Search">
-                      <option value="No">No</option>
-                      <option value="Yes">Yes</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
-                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
-                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <!-- Coverage search. -->
-                  <conditional name="coverage_search">
-                    <param name="use_search" type="select" label="Use Coverage Search">
-                        <option selected="true" value="Yes">Yes</option>
-                        <option value="No">No</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
-                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
-                    </when>
-                    <when value="No" />
-                  </conditional>
-                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
-                    <option value="No">No</option>
-                    <option value="Yes">Yes</option>
-                  </param>
-                </when>  <!-- full -->
-              </conditional>  <!-- pParams -->
-            </when>  <!-- paired -->
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed">
-            <filter>
-                (
-                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and
-                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or
-                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and
-                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
-                )
-            </filter>
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-        <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed">
-            <filter>
-                (
-                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and
-                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or
-                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and
-                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
-                )
-            </filter>
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions">
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits">
-            <actions>
-              <conditional name="refGenomeSource.genomeSource">
-                <when value="indexed">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_data_table" name="tophat_indexes" column="1" offset="0">
-                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-                    </option>
-                  </action>
-                </when>
-                <when value="history">
-                  <action type="metadata" name="dbkey">
-                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-                  </action>
-                </when>
-              </conditional>
-            </actions>
-        </data>
-    </outputs>
-
-    <tests>
-        <!-- Test base-space single-end reads with pre-built index and preset parameters -->
-        <test>
-            <!-- TopHat commands:
-            tophat -o tmp_dir -p 1 tophat_in1 test-data/tophat_in2.fastqsanger
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
-            <param name="genomeSource" value="indexed" />
-            <param name="index" value="tophat_test" />
-            <param name="sPaired" value="single" />
-            <param name="sSettingsType" value="preSet" />
-            <output name="junctions" file="tophat_out1j.bed" />
-            <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" />
-        </test>
-        <!-- Test using base-space test data: paired-end reads, index from history. -->
-        <test>
-            <!-- TopHat commands:
-            bowtie-build -f test-data/tophat_in1.fasta tophat_in1
-            tophat -o tmp_dir -p 1 -r 20 tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
-            <param name="genomeSource" value="history" />
-            <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
-            <param name="sPaired" value="paired" />
-            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" />
-            <param name="mate_inner_distance" value="20" />
-            <param name="pSettingsType" value="preSet" />
-            <output name="junctions" file="tophat_out2j.bed" />
-            <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
-        </test>
-        <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
-        <test>
-            <!-- Tophat commands:
-            bowtie-build -f test-data/tophat_in1.fasta tophat_in1
-            tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
-            Replace the + with double-dash
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
-            <param name="genomeSource" value="history"/>
-            <param name="ownFile" value="tophat_in1.fasta"/>
-            <param name="sPaired" value="single"/>
-            <param name="sSettingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="junction_filter" value="0.15"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="50" />
-            <param name="max_segment_intron" value="500000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="25"/>
-            <param name="allow_indel_search" value="Yes"/>
-            <param name="max_insertion_length" value="3"/>
-            <param name="max_deletion_length" value="3"/>
-            <param name="use_junctions" value="Yes" />
-            <param name="use_annotations" value="No" />
-            <param name="use_juncs" value="No" />
-            <param name="no_novel_juncs" value="No" />
-            <param name="use_search" value="Yes" />
-            <param name="min_closure_exon" value="50" />
-            <param name="min_closure_intron" value="50" />
-            <param name="max_closure_intron" value="5000" />
-            <param name="use_search" value="Yes" />
-            <param name="min_coverage_intron" value="50" />
-            <param name="max_coverage_intron" value="20000" />
-            <param name="microexon_search" value="Yes" />
-            <output name="insertions" file="tophat_out3i.bed" />
-            <output name="deletions" file="tophat_out3d.bed" />
-            <output name="junctions" file="tophat_out3j.bed" />
-            <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" />
-        </test>
-        <!-- Test base-space paired-end reads with user-supplied reference fasta and full parameters -->
-        <test>
-            <!-- TopHat commands:
-            tophat -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intron 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
-            Replace the + with double-dash
-            Rename the files in tmp_dir appropriately
-            -->
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
-            <param name="genomeSource" value="indexed"/>
-            <param name="index" value="tophat_test"/>
-            <param name="sPaired" value="paired"/>
-            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
-            <param name="mate_inner_distance" value="20"/>
-            <param name="pSettingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="mate_std_dev" value="20"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="junction_filter" value="0.15"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="50" />
-            <param name="max_segment_intron" value="500000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="25"/>
-            <param name="allow_indel_search" value="No"/>
-            <param name="use_junctions" value="Yes" />
-            <param name="use_annotations" value="No" />
-            <param name="use_juncs" value="No" />
-            <param name="no_novel_juncs" value="No" />
-            <param name="use_search" value="Yes" />
-            <param name="min_closure_exon" value="50" />
-            <param name="min_closure_intron" value="50" />
-            <param name="max_closure_intron" value="5000" />
-            <param name="use_search" value="Yes" />
-            <param name="min_coverage_intron" value="50" />
-            <param name="max_coverage_intron" value="20000" />
-            <param name="microexon_search" value="Yes" />
-            <output name="junctions" file="tophat_out4j.bed" />
-            <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
-        </test>
-    </tests>
-
-    <help>
-**Tophat Overview**
-
-TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009).
-
-.. _Tophat: http://tophat.cbcb.umd.edu/
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
-.. __: http://tophat.cbcb.umd.edu/manual.html
-
-------
-
-**Input formats**
-
-Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
-
-------
-
-**Outputs**
-
-Tophat produces two output files:
-
-- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
-- accepted_hits -- A list of read alignments in BAM_ format.
-
-.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
-.. _BAM: http://samtools.sourceforge.net/
-
-Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.
-
--------
-
-**Tophat settings**
-
-All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.
-
-------
-
-**Tophat parameter list**
-
-This is a list of implemented Tophat options::
-
-  -r                                This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments
-                                    selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter
-                                    is required for paired end runs.
-  --mate-std-dev INT                The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
-  -a/--min-anchor-length INT        The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced
-                                    alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one
-                                    read with this many bases on each side. This must be at least 3 and the default is 8.
-  -m/--splice-mismatches INT        The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0.
-  -i/--min-intron-length INT        The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70.
-  -I/--max-intron-length INT        The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000.
-  -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of
-                                    exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the
-                                    filter. The default is 0.15.
-  -g/--max-multihits INT            Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many
-                                    alignments. The default is 40.
-  -G/--GTF [GTF 2.2 file]           Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.
-  -j/--raw-juncs [juncs file]       Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.
-  -no-novel-juncs                   Only look for junctions indicated in the supplied GFF file. (ignored without -G)
-  --no-closure-search               Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.
-  --closure-search                  Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)
-  --no-coverage-search              Disables the coverage based search for junctions.
-  --coverage-search                 Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.
-  --microexon-search                With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.
-  --butterfly-search                TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.
-  --segment-mismatches              Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.
-  --segment-length                  Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.
-  --min-closure-exon                During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.
-  --min-closure-intron              The minimum intron length that may be found during closure search. The default is 50.
-  --max-closure-intron              The maximum intron length that may be found during closure search. The default is 5000.
-  --min-coverage-intron             The minimum intron length that may be found during coverage search. The default is 50.
-  --max-coverage-intron             The maximum intron length that may be found during coverage search. The default is 20000.
-  --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.
-  --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.
-    </help>
-</tool>
--- a/tools/ngs_rna/trinity_all.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-<tool id="trinity_all" name="Trinity" version="0.0.1">
-    <!-- Run all steps of Trinity-Inchworm, Chrysalis, and Butterfly-in a single step. Wrapper status is alpha. -->
-    <description>De novo assembly of RNA-Seq data</description>
-    <requirements>
-        <requirement type="package">trinity</requirement>
-    </requirements>
-    <command>
-        Trinity.pl
-
-        ## Additional parameters.
-        #if $additional_params.use_additional == "yes":
-            --min_contig_length $additional_params.min_contig_length
-        #end if
-
-        ## Inputs.
-        #if $inputs.paired_or_single == "paired":
-            --left $inputs.left_input --right $inputs.right_input
-            #if  $inputs.left_input.ext == 'fa':
-                --seqType fa
-            #else:
-                --seqType fq
-            #end if
-            #if $inputs.library_type != 'None':
-                --SS_lib_type $inputs.library_type
-            #end if
-        #else:
-            --single $inputs.input
-            #if  $inputs.input.ext == 'fa':
-                --seqType fa
-            #else:
-                --seqType fq
-            #end if
-            #if $inputs.library_type != 'None':
-                --SS_lib_type $inputs.library_type
-            #end if
-        #end if
-
-        ## CPU and butterfly options.
-        --CPU 4 --run_butterfly --bfly_opts "-V 10 --stderr" > $trinity_log 2>&amp;1
-    </command>
-    <inputs>
-        <conditional name="inputs">
-            <param name="paired_or_single" type="select" label="Paired or Single-end data?">
-                <option value="paired">Paired</option>
-                <option value="single">Single</option>
-            </param>
-            <when value="paired">
-                <param format="fasta,fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/>
-                <param format="fasta,fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/>
-                <param name="library_type" type="select" label="Strand-specific Library Type">
-                    <option value="None">None</option>
-                    <option value="FR">FR</option>
-                    <option value="RF">RF</option>
-                </param>
-                <param name="paired_fragment_length" type="integer" value="300" min="1" label="Paired Fragment Length" help="Maximum length expected between fragment pairs"/>
-            </when>
-            <when value="single">
-                <param format="fasta,fastq" name="input" type="data" label="Single-end reads" help=""/>
-                <param name="library_type" type="select" label="Strand-specific Library Type">
-                    <option value="None">None</option>
-                    <option value="F">F</option>
-                    <option value="R">R</option>
-                </param>
-            </when>
-        </conditional>
-        <conditional name="additional_params">
-            <param name="use_additional" type="select" label="Use Additional Params?">
-                <option value="no">No</option>
-                <option value="yes">Yes</option>
-            </param>
-            <when value="no">
-            </when>
-            <when value="yes">
-                <param name="min_contig_length" type="integer" value="200" min="1" label="Minimum Contig Length" help=""/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data format="txt" name="trinity_log" label="${tool.name} on ${on_string}: log" />
-        <data format="fasta" name="assembled_transcripts" label="${tool.name} on ${on_string}: Assembled Transcripts" from_work_dir="trinity_out_dir/Trinity.fasta"/>
-    </outputs>
-    <tests>
-    </tests>
-    <help>
-        Trinity is a de novo transcript assembler that uses RNA-seq data as input. This tool runs all Trinity_ commands--Inchworm, Chrysalis, and Butterfly--in a single pass.
-
-        .. _Trinity: http://trinityrnaseq.sourceforge.net
-    </help>
-</tool>
--- a/tools/ngs_simulation/ngs_simulation.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,280 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs Ben's simulation.
-
-usage: %prog [options]
-   -i, --input=i: Input genome (FASTA format)
-   -g, --genome=g: If built-in, the genome being used
-   -l, --read_len=l: Read length
-   -c, --avg_coverage=c: Average coverage
-   -e, --error_rate=e: Error rate (0-1)
-   -n, --num_sims=n: Number of simulations to run
-   -p, --polymorphism=p: Frequency/ies for minor allele (comma-separate list of 0-1)
-   -d, --detection_thresh=d: Detection thresholds (comma-separate list of 0-1)
-   -p, --output_png=p: Plot output
-   -s, --summary_out=s: Whether or not to output a file with summary of all simulations
-   -m, --output_summary=m: File name for output summary of all simulations
-   -f, --new_file_path=f: Directory for summary output files
-
-"""
-# removed output of all simulation results on request (not working)
-#   -r, --sim_results=r: Output all tabular simulation results (number of polymorphisms times number of detection thresholds)
-#   -o, --output=o: Base name for summary output for each run
-
-from rpy import *
-import os
-import random, sys, tempfile
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    # validate parameters
-    error = ''
-    try:
-        read_len = int( options.read_len )
-        if read_len <= 0:
-            raise Exception, ' greater than 0'
-    except TypeError, e:
-        error = ': %s' % str( e )
-    if error:
-        stop_err( 'Make sure your number of reads is an integer value%s' % error )
-    error = ''
-    try:
-        avg_coverage = int( options.avg_coverage )
-        if avg_coverage <= 0:
-            raise Exception, ' greater than 0'
-    except Exception, e:
-        error = ': %s' % str( e )
-    if error:
-        stop_err( 'Make sure your average coverage is an integer value%s' % error )
-    error = ''
-    try:
-        error_rate = float( options.error_rate )
-        if error_rate >= 1.0:
-            error_rate = 10 ** ( -error_rate / 10.0 )
-        elif error_rate < 0:
-            raise Exception, ' between 0 and 1'
-    except Exception, e:
-        error = ': %s' % str( e )
-    if error:
-        stop_err( 'Make sure the error rate is a decimal value%s or the quality score is at least 1' % error )
-    try:
-        num_sims = int( options.num_sims )
-    except TypeError, e:
-        stop_err( 'Make sure the number of simulations is an integer value: %s' % str( e ) )
-    if len( options.polymorphism ) > 0:
-        polymorphisms = [ float( p ) for p in options.polymorphism.split( ',' ) ]
-    else:
-        stop_err( 'Select at least one polymorphism value to use' )
-    if len( options.detection_thresh ) > 0:
-        detection_threshes = [ float( dt ) for dt in options.detection_thresh.split( ',' ) ]
-    else:
-        stop_err( 'Select at least one detection threshold to use' )
-
-    # mutation dictionaries
-    hp_dict = { 'A':'G', 'G':'A', 'C':'T', 'T':'C', 'N':'N' } # heteroplasmy dictionary
-    mt_dict = { 'A':'C', 'C':'A', 'G':'T', 'T':'G', 'N':'N'} # misread dictionary
-
-    # read fasta file to seq string
-    all_lines = open( options.input, 'rb' ).readlines()
-    seq = ''
-    for line in all_lines:
-        line = line.rstrip()
-        if line.startswith('>'):
-            pass
-        else:
-            seq += line.upper()
-    seq_len = len( seq )
-
-    # output file name template
-# removed output of all simulation results on request (not working)
-#    if options.sim_results == "true":
-#        out_name_template = os.path.join( options.new_file_path, 'primary_output%s_' + options.output + '_visible_tabular' )
-#    else:
-#        out_name_template = tempfile.NamedTemporaryFile().name + '_%s'
-    out_name_template = tempfile.NamedTemporaryFile().name + '_%s'
-    print 'out_name_template:', out_name_template
-
-    # set up output files
-    outputs = {}
-    i = 1
-    for p in polymorphisms:
-        outputs[ p ] = {}
-        for d in detection_threshes:
-            outputs[ p ][ d ] = out_name_template % i
-            i += 1
-
-    # run sims
-    for polymorphism in polymorphisms:
-        for detection_thresh in detection_threshes:
-            output = open( outputs[ polymorphism ][ detection_thresh ], 'wb' )
-            output.write( 'FP\tFN\tGENOMESIZE=%s\n' % seq_len )
-            sim_count = 0
-            while sim_count < num_sims:
-                # randomly pick heteroplasmic base index
-                hbase = random.choice( range( 0, seq_len ) )
-                #hbase = seq_len/2#random.randrange( 0, seq_len )
-                # create 2D quasispecies list
-                qspec = map( lambda x: [], [0] * seq_len )
-                # simulate read indices and assign to quasispecies
-                i = 0
-                while i < ( avg_coverage * ( seq_len / read_len ) ): # number of reads (approximates coverage)
-                    start = random.choice( range( 0, seq_len ) )
-                    #start = seq_len/2#random.randrange( 0, seq_len ) # assign read start
-                    if random.random() < 0.5: # positive sense read
-                        end = start + read_len # assign read end
-                        if end > seq_len: # overshooting origin
-                            read = range( start, seq_len ) + range( 0, ( end - seq_len ) )
-                        else: # regular read
-                            read = range( start, end )
-                    else: # negative sense read
-                        end = start - read_len # assign read end
-                        if end < -1: # overshooting origin
-                            read = range( start, -1, -1) + range( ( seq_len - 1 ), ( seq_len + end ), -1 )
-                        else: # regular read
-                            read = range( start, end, -1 )
-                    # assign read to quasispecies list by index
-                    for j in read:
-                        if j == hbase and random.random() < polymorphism: # heteroplasmic base is variant with p = het
-                            ref = hp_dict[ seq[ j ] ]
-                        else: # ref is the verbatim reference nucleotide (all positions)
-                            ref = seq[ j ]
-                        if random.random() < error_rate: # base in read is misread with p = err
-                            qspec[ j ].append( mt_dict[ ref ] )
-                        else: # otherwise we carry ref through to the end
-                            qspec[ j ].append(ref)
-                    # last but not least
-                    i += 1
-                bases, fpos, fneg = {}, 0, 0 # last two will be outputted to summary file later
-                for i, nuc in enumerate( seq ):
-                    cov = len( qspec[ i ] )
-                    bases[ 'A' ] = qspec[ i ].count( 'A' )
-                    bases[ 'C' ] = qspec[ i ].count( 'C' )
-                    bases[ 'G' ] = qspec[ i ].count( 'G' )
-                    bases[ 'T' ] = qspec[ i ].count( 'T' )
-                    # calculate max NON-REF deviation
-                    del bases[ nuc ]
-                    maxdev = float( max( bases.values() ) ) / cov
-                    # deal with non-het sites
-                    if i != hbase:
-                        if maxdev >= detection_thresh: # greater than detection threshold = false positive
-                            fpos += 1
-                    # deal with het sites
-                    if i == hbase:
-                        hnuc = hp_dict[ nuc ] # let's recover het variant
-                        if ( float( bases[ hnuc ] ) / cov ) < detection_thresh: # less than detection threshold = false negative
-                            fneg += 1
-                        del bases[ hnuc ] # ignore het variant
-                        maxdev = float( max( bases.values() ) ) / cov # check other non-ref bases at het site
-                        if maxdev >= detection_thresh: # greater than detection threshold = false positive (possible)
-                            fpos += 1
-                # output error sums and genome size to summary file
-                output.write( '%d\t%d\n' % ( fpos, fneg ) )
-                sim_count += 1
-            # close output up
-            output.close()
-
-    # Parameters (heteroplasmy, error threshold, colours)
-    r( '''
-    het=c(%s)
-    err=c(%s)
-    grade = (0:32)/32
-    hues = rev(gray(grade))
-    ''' % ( ','.join( [ str( p ) for p in polymorphisms ] ), ','.join( [ str( d ) for d in detection_threshes ] ) ) )
-
-    # Suppress warnings
-    r( 'options(warn=-1)' )
-
-    # Create allsum (for FP) and allneg (for FN) objects
-    r( 'allsum <- data.frame()' )
-    for polymorphism in polymorphisms:
-        for detection_thresh in detection_threshes:
-            output = outputs[ polymorphism ][ detection_thresh ]
-            cmd = '''
-                  ngsum = read.delim('%s', header=T)
-                  ngsum$fprate <- ngsum$FP/%s
-                  ngsum$hetcol <- %s
-                  ngsum$errcol <- %s
-                  allsum <- rbind(allsum, ngsum)
-                  ''' % ( output, seq_len, polymorphism, detection_thresh )
-            r( cmd )
-
-    if os.path.getsize( output ) == 0:
-        for p in outputs.keys():
-            for d in outputs[ p ].keys():
-                sys.stderr.write(outputs[ p ][ d ] + ' '+str( os.path.getsize( outputs[ p ][ d ] ) )+'\n')
-
-    if options.summary_out == "true":
-        r( 'write.table(summary(ngsum), file="%s", quote=FALSE, sep="\t", row.names=FALSE)' % options.output_summary )
-
-    # Summary objects (these could be printed)
-    r( '''
-    tr_pos <- tapply(allsum$fprate,list(allsum$hetcol,allsum$errcol), mean)
-    tr_neg <- tapply(allsum$FN,list(allsum$hetcol,allsum$errcol), mean)
-    cat('\nFalse Positive Rate Summary\n\t', file='%s', append=T, sep='\t')
-    write.table(format(tr_pos, digits=4), file='%s', append=T, quote=F, sep='\t')
-    cat('\nFalse Negative Rate Summary\n\t', file='%s', append=T, sep='\t')
-    write.table(format(tr_neg, digits=4), file='%s', append=T, quote=F, sep='\t')
-    ''' % tuple( [ options.output_summary ] * 4 ) )
-
-    # Setup graphs
-    #pdf(paste(prefix,'_jointgraph.pdf',sep=''), 15, 10)
-    r( '''
-    png('%s', width=800, height=500, units='px', res=250)
-    layout(matrix(data=c(1,2,1,3,1,4), nrow=2, ncol=3), widths=c(4,6,2), heights=c(1,10,10))
-    ''' % options.output_png )
-
-    # Main title
-    genome = ''
-    if options.genome:
-        genome = '%s: ' % options.genome
-    r( '''
-    par(mar=c(0,0,0,0))
-    plot(1, type='n', axes=F, xlab='', ylab='')
-    text(1,1,paste('%sVariation in False Positives and Negatives (', %s, ' simulations, coverage ', %s,')', sep=''), font=2, family='sans', cex=0.7)
-    ''' % ( genome, options.num_sims, options.avg_coverage ) )
-
-    # False positive boxplot
-    r( '''
-    par(mar=c(5,4,2,2), las=1, cex=0.35)
-    boxplot(allsum$fprate ~ allsum$errcol, horizontal=T, ylim=rev(range(allsum$fprate)), cex.axis=0.85)
-    title(main='False Positives', xlab='false positive rate', ylab='')
-    ''' )
-
-    # False negative heatmap (note zlim command!)
-    num_polys = len( polymorphisms )
-    num_dets = len( detection_threshes )
-    r( '''
-    par(mar=c(5,4,2,1), las=1, cex=0.35)
-    image(1:%s, 1:%s, tr_neg, zlim=c(0,1), col=hues, xlab='', ylab='', axes=F, border=1)
-    axis(1, at=1:%s, labels=rownames(tr_neg), lwd=1, cex.axis=0.85, axs='i')
-    axis(2, at=1:%s, labels=colnames(tr_neg), lwd=1, cex.axis=0.85)
-    title(main='False Negatives', xlab='minor allele frequency', ylab='detection threshold')
-    ''' % ( num_polys, num_dets, num_polys, num_dets ) )
-
-    # Scale alongside
-    r( '''
-    par(mar=c(2,2,2,3), las=1)
-    image(1, grade, matrix(grade, ncol=length(grade), nrow=1), col=hues, xlab='', ylab='', xaxt='n', las=1, cex.axis=0.85)
-    title(main='Key', cex=0.35)
-    mtext('false negative rate', side=1, cex=0.35)
-    ''' )
-
-    # Close graphics
-    r( '''
-    layout(1)
-    dev.off()
-    ''' )
-
-    # Tidy up
-#    r( 'rm(folder,prefix,sim,cov,het,err,grade,hues,i,j,ngsum)' )
-
-if __name__ == "__main__" : __main__()
--- a/tools/ngs_simulation/ngs_simulation.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,217 +0,0 @@
-<tool id="ngs_simulation" name="Simulate" version="1.0.0">
-<!--<tool id="ngs_simulation" name="Simulate" force_history_refresh="True" version="1.0.0">-->
-  <description>Illumina runs</description>
-  <command interpreter="python">
-    ngs_simulation.py
-      #if $in_type.input_type == "built-in"
-        --input="${ filter( lambda x: str( x[0] ) == str( $in_type.genome ), $__app__.tool_data_tables[ 'ngs_sim_fasta' ].get_fields() )[0][-1] }"
-        --genome=$in_type.genome
-      #else
-        --input=$in_type.input1
-      #end if
-      --read_len=$read_len
-      --avg_coverage=$avg_coverage
-      --error_rate=$error_rate
-      --num_sims=$num_sims
-      --polymorphism=$polymorphism
-      --detection_thresh=$detection_thresh
-      --output_png=$output_png
-      --summary_out=$summary_out
-      --output_summary=$output_summary
-      --new_file_path=$__new_file_path__
-  </command>
-<!-- If want to include all simulation results file
-        sim_results=$sim_results
-        output=$output.id
--->
-  <inputs>
-    <conditional name="in_type">
-      <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?">
-        <option value="built-in">Built-in</option>
-        <option value="history">History file</option>
-      </param>
-      <when value="built-in">
-        <param name="genome" type="select" label="Select a built-in genome" help="if your genome of interest is not listed - contact Galaxy team">
-          <options from_data_table="ngs_sim_fasta" />
-        </param>
-      </when>
-      <when value="history">
-        <param name="input1" type="data" format="fasta" label="Input genome (FASTA format)" />
-      </when>
-    </conditional>
-    <param name="read_len" type="integer" value="76" label="Read length" />
-    <param name="avg_coverage" type="integer" value="200" label="Average coverage" />
-    <param name="error_rate" type="float" value="0.001" label="Error rate or quality score" help="Quality score if integer 1 or greater; error rate if between 0 and 1" />
-    <param name="num_sims" type="integer" value="100" label="The number of simulations to run" />
-    <param name="polymorphism" type="select" multiple="true" label="Frequency/ies for minor allele">
-      <option value="0.001">0.001</option>
-      <option value="0.002">0.002</option>
-      <option value="0.003">0.003</option>
-      <option value="0.004">0.004</option>
-      <option value="0.005">0.005</option>
-      <option value="0.006">0.006</option>
-      <option value="0.007">0.007</option>
-      <option value="0.008">0.008</option>
-      <option value="0.009">0.009</option>
-      <option value="0.01">0.01</option>
-      <option value="0.02">0.02</option>
-      <option value="0.03">0.03</option>
-      <option value="0.04">0.04</option>
-      <option value="0.05">0.05</option>
-      <option value="0.06">0.06</option>
-      <option value="0.07">0.07</option>
-      <option value="0.08">0.08</option>
-      <option value="0.09">0.09</option>
-      <option value="0.1">0.1</option>
-      <option value="0.2">0.2</option>
-      <option value="0.3">0.3</option>
-      <option value="0.4">0.4</option>
-      <option value="0.5">0.5</option>
-      <option value="0.6">0.6</option>
-      <option value="0.7">0.7</option>
-      <option value="0.8">0.8</option>
-      <option value="0.9">0.9</option>
-      <option value="1.0">1.0</option>
-    </param>
-    <param name="detection_thresh" type="select" multiple="true" label="Detection thresholds">
-      <option value="0.001">0.001</option>
-      <option value="0.002">0.002</option>
-      <option value="0.003">0.003</option>
-      <option value="0.004">0.004</option>
-      <option value="0.005">0.005</option>
-      <option value="0.006">0.006</option>
-      <option value="0.007">0.007</option>
-      <option value="0.008">0.008</option>
-      <option value="0.009">0.009</option>
-      <option value="0.01">0.01</option>
-      <option value="0.02">0.02</option>
-      <option value="0.03">0.03</option>
-      <option value="0.04">0.04</option>
-      <option value="0.05">0.05</option>
-      <option value="0.06">0.06</option>
-      <option value="0.07">0.07</option>
-      <option value="0.08">0.08</option>
-      <option value="0.09">0.09</option>
-      <option value="0.1">0.1</option>
-      <option value="0.2">0.2</option>
-      <option value="0.3">0.3</option>
-      <option value="0.4">0.4</option>
-      <option value="0.5">0.5</option>
-      <option value="0.6">0.6</option>
-      <option value="0.7">0.7</option>
-      <option value="0.8">0.8</option>
-      <option value="0.9">0.9</option>
-      <option value="1.0">1.0</option>
-    </param>
-    <param name="summary_out" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Include a (text) summary file for all the simulations" />
-<!--    <param name="sim_results" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output all tabular simulation results" help="Number of polymorphisms times number of detection thresholds"/>
--->
-  </inputs>
-  <outputs>
-    <data format="png" name="output_png" />
-    <data format="tabular" name="output_summary">
-      <filter>summary_out == True</filter>
-    </data>
-<!--
-    <data format="tabular" name="output">
-      <filter>sim_files_out</filter>
-    </data>
--->
-  </outputs>
-  <tests>
-    <!--
-      Tests cannot be run because of the non-deterministic element of the simulation.
-      But if you run the following "tests" manually in the browser and check against
-      the output files, they should be very similar to the listed output files.
-    -->
-    <!--
-    <test>
-      <param name="input_type" value="history" />
-      <param name="input1" value="ngs_simulation_in1.fasta" ftype="fasta" />
-      <param name="read_len" value="76" />
-      <param name="avg_coverage" value="200" />
-      <param name="error_rate" value="0.001" />
-      <param name="num_sims" value="25" />
-      <param name="polymorphism" value="0.02,0.04,0.1" />
-      <param name="detection_thresh" value="0.01,0.02" />
-      <param name="summary_out" value="true" />
-      <output name="output_png" file="ngs_simulation_out1.png" />
-      <output name="output_summary" file="ngs_simulation_out2.tabular" />
-    </test>
-    <test>
-      <param name="input_type" value="built-in" />
-      <param name="genome" value="pUC18" />
-      <param name="read_len" value="50" />
-      <param name="avg_coverage" value="150" />
-      <param name="error_rate" value="0.005" />
-      <param name="num_sims" value="25" />
-      <param name="polymorphism" value="0.001,0.005" />
-      <param name="detection_thresh" value="0.001,0.002" />
-      <param name="summary_out" value="false" />
-      <output name="output_png" file="ngs_simulation_out3.png" />
-    </test>
-    -->
-  </tests>
-  <help>
-
-**What it does**
-
-This tool simulates an Illumina run and provides plots of false positives and false negatives. It allows for a range of simulation parameters to be set. Note that this simulation sets only one (randomly chosen) position in the genome as polymorphic, according to the value specified. Superimposed on this are "sequencing errors", which are uniformly (and randomly) distributed. Polymorphisms are assigned using the detection threshold, so if the detection threshold is set to the same as the minor allele frequency, the expected false negative rate is 50%.
-
-**Parameter list**
-
-These are the parameters that should be set for the simulation::
-
-  Read length (which is the same for all reads)
-  Average Coverage
-  Frequency for Minor Allele
-  Sequencing Error Rate
-  Detection Threshold
-  Number of Simulations
-
-You also should choose to use either a built-in genome or supply your own FASTA file.
-
-**Output**
-
-There are one or two. The first is a png that contains two different plots and is always generated. The second is optional and is a text file with some summary information about the simulations that were run. Below are some example outputs for a 10-simulation run on phiX with the default settings::
-
-  Read length                    76
-  Average coverage               200
-  Error rate/quality score       0.001
-  Number of simulations          100
-  Frequencies for minor allele   0.002
-                                 0.004
-  Detection thresholds           0.003
-                                 0.005
-                                 0.007
-  Include summary file           Yes
-
-Plot output (png):
-
-.. image:: ./static/images/ngs_simulation.png
-
-Summary output (txt)::
-
-        FP              FN       GENOMESIZE.5386      fprate          hetcol          errcol
-  Min.   : 71.0   Min.   :0.0    Mode:logical     Min.   :0.01318         Min.   :0.004   Min.   :0.007
-  1st Qu.:86.0    1st Qu.:1.0    NA's:10          1st Qu.:0.01597         1st Qu.:0.004   1st Qu.:0.007
-  Median :92.5    Median :1.0    NA       Median :0.01717         Median :0.004   Median :0.007
-  Mean   :93.6    Mean   :0.9    NA       Mean   :0.01738         Mean   :0.004   Mean   :0.007
-  3rd Qu.:100.8   3rd Qu.:1.0    NA       3rd Qu.:0.01871         3rd Qu.:0.004   3rd Qu.:0.007
-  Max.   :123.0   Max.   :1.0    NA       Max.   :0.02284         Max.   :0.004   Max.   :0.007
-
-  False Positive Rate Summary
-          0.003   0.005   0.007
-  0.001   0.17711 0.10854 0.01673
-  0.009   0.18049 0.10791 0.01738
-
-  False Negative Rate Summary
-          0.003   0.005     0.007
-  0.001   1.0     0.8       1.0
-  0.009   0.4     0.7       0.9
-
-
-  </help>
-</tool>
-
-
--- a/tools/peak_calling/ccat_2_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-<tool id="peakcalling_ccat2" name="CCAT" version="0.0.1">
-  <description>Control-based ChIP-seq Analysis Tool</description>
-  <command interpreter="python">ccat_wrapper.py '$input_tag_file' '$input_control_file' '$chromInfo'
-  #if str( $options_type[ 'options_type_selector' ] ) == 'advanced':
-  '$input_advanced_config_file'
-  #else:
-  '${ options_type.input_config_file.fields.path }'
-  #end if
-  'CCAT in Galaxy'
-  '$output_peak_file' '$output_region_file' '$output_top_file' '$output_log_file'</command>
-  <requirements>
-    <requirement type="binary">CCAT</requirement>
-  </requirements>
-  <inputs>
-    <param name="input_tag_file" type="data" format="bed" label="ChIP-Seq Tag File" >
-      <validator type="unspecified_build" />
-    </param>
-    <param name="input_control_file" type="data" format="bed" label="ChIP-Seq Control File" >
-      <validator type="unspecified_build" />
-    </param>
-    <conditional name="options_type">
-      <param name="options_type_selector" type="select" label="Advanced Options">
-        <option value="basic" selected="True">Hide Advanced Options</option>
-        <option value="advanced">Show Advanced Options</option>
-      </param>
-      <when value="basic">
-        <param name="input_config_file" type="select" label="Select a pre-defined configuration file">
-          <options from_data_table="ccat_configurations">
-            <validator type="no_options" message="No configurations are available"/>
-          </options>
-        </param>
-      </when>
-      <when value="advanced">
-        <param name="fragment_size" type="integer" label="Length of DNA fragment" value="200"/>
-        <param name="sliding_window_size" type="integer" label="Sliding window size" value="500" help="transcription factor binding default: 300; histone modifications default: 500"/>
-        <param name="moving_step" type="integer" label="Step of sliding window" value="50" help="transcription factor binding default: 10; histone modifications default: 50"/>
-        <param name="is_strand_sensitive_mode" type="select" label="isStrandSensitiveMode" >
-          <option value="1">Transition from sense strand to anti-sense strand</option>
-          <option value="0" selected="True">Local maximum of read-enrichment profile</option>
-        </param>
-        <param name="min_count" type="integer" label="Minimum number of read counts at the peak" value="4"/>
-        <param name="output_num" type="integer" label="Number of peaks reported in top peak file" value="100000"/>
-        <param name="random_seed" type="integer" label="Random Seed" value="123456"/>
-        <param name="min_score" type="float" label="Minimum score of normalized difference" value="3.0"/>
-        <param name="bootstrap_pass" type="integer" label="Number of passes in the bootstrapping process" value="50"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data name="output_peak_file" format="interval" label="${tool.name} on ${on_string} (peaks)">
-      <actions>
-        <action type="metadata" name="chromCol" default="1"/>
-        <action type="metadata" name="startCol" default="3"/>
-        <action type="metadata" name="endCol" default="4"/>
-      </actions>
-    </data>
-    <data name="output_region_file" format="interval" label="${tool.name} on ${on_string} (regions)">
-      <actions>
-        <action type="metadata" name="chromCol" default="1"/>
-        <action type="metadata" name="startCol" default="3"/>
-        <action type="metadata" name="endCol" default="4"/>
-      </actions>
-    </data>
-    <data name="output_top_file" format="interval" label="${tool.name} on ${on_string} (top peaks)">
-      <actions>
-        <action type="metadata" name="chromCol" default="1"/>
-        <action type="metadata" name="startCol" default="3"/>
-        <action type="metadata" name="endCol" default="4"/>
-      </actions>
-    </data>
-    <data name="output_log_file" format="txt" label="${tool.name} on ${on_string} (log)"/>
-  </outputs>
-  <configfiles>
-    <configfile name="input_advanced_config_file">#if str( $options_type['options_type_selector' ] ) == 'advanced':
-fragmentSize	${options_type[ 'fragment_size' ]}
-slidingWinSize	${options_type[ 'sliding_window_size' ]}
-movingStep	${options_type[ 'moving_step' ]}
-isStrandSensitiveMode	${options_type[ 'is_strand_sensitive_mode' ]}
-minCount	${options_type[ 'min_count' ]}
-outputNum	${options_type[ 'output_num' ]}
-randomSeed	${options_type[ 'random_seed' ]}
-minScore	${options_type[ 'min_score' ]}
-bootstrapPass	${options_type[ 'bootstrap_pass' ]}
-#end if</configfile>
-  </configfiles>
-  <tests>
-    <test>
-      <param name="input_tag_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="options_type_selector" value="basic" />
-      <param name="input_config_file" value="ccat_2.0_histone_config" />
-      <output name="output_peak_file" file="peakcalling_ccat2/ccat2_test_peak_out_1.interval" />
-      <output name="output_region_file" file="peakcalling_ccat2/ccat2_test_region_out_1.interval" />
-      <output name="output_top_file" file="peakcalling_ccat2/ccat2_test_top_out_1.interval" />
-      <output name="output_log_file" file="peakcalling_ccat2/ccat2_test_log_out_1.interval" />
-    </test>
-    <test>
-      <param name="input_tag_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="fragment_size" value="200" />
-      <param name="sliding_window_size" value="500" />
-      <param name="moving_step" value="50" />
-      <param name="is_strand_sensitive_mode" value="0" />
-      <param name="min_count" value="4" />
-      <param name="output_num" value="100000" />
-      <param name="random_seed" value="123456" />
-      <param name="min_score" value="3.0" />
-      <param name="bootstrap_pass" value="50" />
-      <output name="output_peak_file" file="peakcalling_ccat2/ccat2_test_peak_out_1.interval" />
-      <output name="output_region_file" file="peakcalling_ccat2/ccat2_test_region_out_1.interval" />
-      <output name="output_top_file" file="peakcalling_ccat2/ccat2_test_top_out_1.interval" />
-      <output name="output_log_file" file="peakcalling_ccat2/ccat2_test_log_out_1.interval" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-This tool allows ChIP-seq peak/region calling using CCAT.
-
-View the original CCAT documentation: http://cmb.gis.a-star.edu.sg/ChIPSeq/paperCCAT.htm.
-  </help>
-</tool>
--- a/tools/peak_calling/ccat_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-import sys, subprocess, tempfile, shutil, os.path
-
-CCAT_BINARY = "CCAT"
-
-def get_top_count( filename ):
-    for line in open( filename ):
-        if line.startswith( 'outputNum' ):
-            return int( line.split()[-1].strip() )
-
-def stop_err( tmp_dir, exception ):
-    print >> sys.stderr, "Error running CCAT."
-    shutil.rmtree( tmp_dir ) #some error has occurred, provide info and remove possibly non-empty temp directory
-    raise exception
-
-def main():
-    input_tag_file = sys.argv[1]
-    input_control_file = sys.argv[2]
-    chrom_info_file = sys.argv[3]
-    input_config_file = sys.argv[4]
-    project_name = sys.argv[5]
-    output_peak_file = sys.argv[6]
-    output_region_file = sys.argv[7]
-    output_top_file = sys.argv[8]
-    output_log_file = sys.argv[9]
-
-    tmp_dir = tempfile.mkdtemp()
-    try:
-        proc = subprocess.Popen( args="%s %s > %s" % ( CCAT_BINARY, " ".join( map( lambda x: "'%s'" % x, [ input_tag_file, input_control_file, chrom_info_file, input_config_file, project_name ] ) ), output_log_file ), shell=True, cwd=tmp_dir )
-        proc.wait()
-        if proc.returncode:
-            raise Exception( "Error code: %i" % proc.returncode )
-        output_num = get_top_count( input_config_file )
-        shutil.move( os.path.join( tmp_dir, "%s.significant.peak" % project_name ), output_peak_file )
-        shutil.move( os.path.join( tmp_dir, "%s.significant.region" % project_name ), output_region_file )
-        shutil.move( os.path.join( tmp_dir, "%s.top%i.peak" % ( project_name, output_num ) ), output_top_file )
-    except Exception, e:
-        return stop_err( tmp_dir, e )
-    os.rmdir( tmp_dir ) #clean up empty temp working directory
-
-if __name__ == "__main__": main()
--- a/tools/peak_calling/ccat_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-<tool id="peakcalling_ccat" name="CCAT" version="0.0.1">
-  <description>Control-based ChIP-seq Analysis Tool</description>
-  <command interpreter="python">ccat_wrapper.py '$input_tag_file' '$input_control_file' '$chromInfo'
-  #if str( $options_type[ 'options_type_selector' ] ) == 'advanced':
-  '$input_advanced_config_file'
-  #else:
-  '${ options_type.input_config_file.fields.path }'
-  #end if
-  'CCAT in Galaxy'
-  '$output_peak_file' '$output_region_file' '$output_top_file' '$output_log_file'</command>
-  <requirements>
-    <requirement type="binary" version="3.0">CCAT</requirement>
-  </requirements>
-  <inputs>
-    <param name="input_tag_file" type="data" format="bed" label="ChIP-Seq Tag File" >
-      <validator type="unspecified_build" />
-    </param>
-    <param name="input_control_file" type="data" format="bed" label="ChIP-Seq Control File" >
-      <validator type="unspecified_build" />
-    </param>
-    <conditional name="options_type">
-      <param name="options_type_selector" type="select" label="Advanced Options">
-        <option value="basic" selected="True">Hide Advanced Options</option>
-        <option value="advanced">Show Advanced Options</option>
-      </param>
-      <when value="basic">
-        <param name="input_config_file" type="select" label="Select a pre-defined configuration file">
-          <options from_data_table="ccat_configurations">
-            <validator type="no_options" message="No configurations are available"/>
-          </options>
-        </param>
-      </when>
-      <when value="advanced">
-        <param name="fragment_size" type="integer" label="Length of DNA fragment" value="200"/>
-        <param name="sliding_window_size" type="integer" label="Sliding window size" value="500" help="transcription factor binding default: 300; histone modifications default: 500"/>
-        <param name="moving_step" type="integer" label="Step of sliding window" value="50" help="transcription factor binding default: 10; histone modifications default: 50"/>
-        <param name="is_strand_sensitive_mode" type="select" label="isStrandSensitiveMode" >
-          <option value="1">Transition from sense strand to anti-sense strand</option>
-          <option value="0" selected="True">Local maximum of read-enrichment profile</option>
-        </param>
-        <param name="min_count" type="integer" label="Minimum number of read counts at the peak" value="4"/>
-        <param name="output_num" type="integer" label="Number of peaks reported in top peak file" value="100000"/>
-        <param name="random_seed" type="integer" label="Random Seed" value="123456"/>
-        <param name="min_score" type="float" label="Minimum score of normalized difference" value="3.0"/>
-        <param name="bootstrap_pass" type="integer" label="Number of passes in the bootstrapping process" value="50"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data name="output_peak_file" format="interval" label="${tool.name} on ${on_string} (peaks)">
-      <actions>
-        <action type="metadata" name="chromCol" default="1"/>
-        <action type="metadata" name="startCol" default="3"/>
-        <action type="metadata" name="endCol" default="4"/>
-      </actions>
-    </data>
-    <data name="output_region_file" format="interval" label="${tool.name} on ${on_string} (regions)">
-      <actions>
-        <action type="metadata" name="chromCol" default="1"/>
-        <action type="metadata" name="startCol" default="3"/>
-        <action type="metadata" name="endCol" default="4"/>
-      </actions>
-    </data>
-    <data name="output_top_file" format="interval" label="${tool.name} on ${on_string} (top peaks)">
-      <actions>
-        <action type="metadata" name="chromCol" default="1"/>
-        <action type="metadata" name="startCol" default="3"/>
-        <action type="metadata" name="endCol" default="4"/>
-      </actions>
-    </data>
-    <data name="output_log_file" format="txt" label="${tool.name} on ${on_string} (log)"/>
-  </outputs>
-  <configfiles>
-    <configfile name="input_advanced_config_file">#if str( $options_type['options_type_selector' ] ) == 'advanced':
-fragmentSize	${options_type[ 'fragment_size' ]}
-slidingWinSize	${options_type[ 'sliding_window_size' ]}
-movingStep	${options_type[ 'moving_step' ]}
-isStrandSensitiveMode	${options_type[ 'is_strand_sensitive_mode' ]}
-minCount	${options_type[ 'min_count' ]}
-outputNum	${options_type[ 'output_num' ]}
-randomSeed	${options_type[ 'random_seed' ]}
-minScore	${options_type[ 'min_score' ]}
-bootstrapPass	${options_type[ 'bootstrap_pass' ]}
-#end if</configfile>
-  </configfiles>
-  <tests>
-    <test>
-      <param name="input_tag_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="options_type_selector" value="advanced" />
-      <param name="fragment_size" value="200" />
-      <param name="sliding_window_size" value="500" />
-      <param name="moving_step" value="50" />
-      <param name="is_strand_sensitive_mode" value="0" />
-      <param name="min_count" value="4" />
-      <param name="output_num" value="100000" />
-      <param name="random_seed" value="123456" />
-      <param name="min_score" value="5.0" />
-      <param name="bootstrap_pass" value="50" />
-      <output name="output_peak_file" file="peakcalling_ccat/3.0/ccat_test_peak_out_1.interval.re_match" compare="re_match" />
-      <output name="output_region_file" file="peakcalling_ccat/3.0/ccat_test_region_out_1.interval.re_match" compare="re_match" />
-      <output name="output_top_file" file="peakcalling_ccat/3.0/ccat_test_top_out_1.interval.sorted.re_match" compare="re_match" sort="True" />
-      <output name="output_log_file" file="peakcalling_ccat/3.0/ccat_test_log_out_1.txt" />
-    </test>
-    <test>
-      <param name="input_tag_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="options_type_selector" value="basic" />
-      <param name="input_config_file" value="ccat_3.0_histone_config" />
-      <output name="output_peak_file" file="peakcalling_ccat/3.0/ccat_test_peak_out_1.interval.re_match" compare="re_match" />
-      <output name="output_region_file" file="peakcalling_ccat/3.0/ccat_test_region_out_1.interval.re_match" compare="re_match" />
-      <output name="output_top_file" file="peakcalling_ccat/3.0/ccat_test_top_out_1.interval.sorted.re_match" compare="re_match" sort="true" />
-      <output name="output_log_file" file="peakcalling_ccat/3.0/ccat_test_log_out_1.txt" />
-    </test>
-    <!-- Test below gives different results on different architectures,
-    e.g.: x86_64 GNU/Linux gave an extra line (additional peak called) when compared to the version running on 10.6.0 Darwin i386
-    slidingWinSize was fixed to be 1000, default as per readme.txt
-    -->
-    <!--
-    <test>
-      <param name="input_tag_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="hg18" />
-      <param name="options_type_selector" value="basic" />
-      <param name="input_config_file" value="ccat_3.0_histone_config_readme" />
-      <output name="output_peak_file" file="peakcalling_ccat/3.0/ccat_test_peak_out_2.interval.re_match" compare="re_match" />
-      <output name="output_region_file" file="peakcalling_ccat/3.0/ccat_test_region_out_2.interval.re_match" compare="re_match" />
-      <output name="output_top_file" file="peakcalling_ccat/3.0/ccat_test_top_out_2.interval.sorted.re_match" compare="re_match" sort="true" />
-      <output name="output_log_file" file="peakcalling_ccat/3.0/ccat_test_log_out_2.txt" />
-    </test>
-  -->
-  </tests>
-  <help>
-**What it does**
-
-This tool allows ChIP-seq peak/region calling using CCAT.
-
-View the original CCAT documentation: http://cmb.gis.a-star.edu.sg/ChIPSeq/paperCCAT.htm.
-  </help>
-</tool>
--- a/tools/peak_calling/macs_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "simplejson" )
-import simplejson
-
-CHUNK_SIZE = 1024
-
-def gunzip_cat_glob_path( glob_path, target_filename, delete = False ):
-    out = open( target_filename, 'wb' )
-    for filename in glob.glob( glob_path ):
-        fh = gzip.open( filename, 'rb' )
-        while True:
-            data = fh.read( CHUNK_SIZE )
-            if data:
-                out.write( data )
-            else:
-                break
-        fh.close()
-        if delete:
-            os.unlink( filename )
-    out.close()
-
-def xls_to_interval( xls_file, interval_file, header = None ):
-    out = open( interval_file, 'wb' )
-    if header:
-        out.write( '#%s\n' % header )
-    wrote_header = False
-    #From macs readme: Coordinates in XLS is 1-based which is different with BED format.
-    for line in open( xls_file ):
-        #keep all existing comment lines
-        if line.startswith( '#' ):
-            out.write( line )
-        elif not wrote_header:
-            out.write( '#%s' % line )
-            wrote_header = True
-        else:
-            fields = line.split( '\t' )
-            if len( fields ) > 1:
-                fields[1] = str( int( fields[1] ) - 1 )
-            out.write( '\t'.join( fields ) )
-    out.close()
-
-def main():
-    options = simplejson.load( open( sys.argv[1] ) )
-    output_bed = sys.argv[2]
-    output_extra_html = sys.argv[3]
-    output_extra_path = sys.argv[4]
-
-    experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier
-    cmdline = "macs -t %s" % ",".join( options['input_chipseq'] )
-    if options['input_control']:
-        cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) )
-    cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] )
-    if 'wig' in options:
-        wigextend = int( options['wig']['wigextend']  )
-        if wigextend >= 0:
-            wigextend = "--wigextend='%s'" % wigextend
-        else:
-            wigextend = ''
-        cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] )
-    if 'nomodel' in options:
-        cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] )
-    if 'diag' in options:
-        cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] )
-
-    tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user
-    stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report
-    proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) )
-    proc.wait()
-    #We don't want to set tool run to error state if only warnings or info, e.g. mfold could be decreased to improve model, but let user view macs log
-    #Do not terminate if error code, allow dataset (e.g. log) creation and cleanup
-    if proc.returncode:
-        stderr_f = open( stderr_name )
-        while True:
-            chunk = stderr_f.read( CHUNK_SIZE )
-            if not chunk:
-                stderr_f.close()
-                break
-            sys.stderr.write( chunk )
-
-    #run R to create pdf from model script
-    if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ):
-        cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name )
-        proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
-        proc.wait()
-
-
-    #move bed out to proper output file
-    created_bed_name =  os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name )
-    if os.path.exists( created_bed_name ):
-        shutil.move( created_bed_name, output_bed )
-
-    #parse xls files to interval files as needed
-    if options['xls_to_interval']:
-        create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name )
-        if os.path.exists( create_peak_xls_file ):
-            xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' )
-        create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name )
-        if os.path.exists( create_peak_xls_file ):
-            xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' )
-
-    #merge and move wig files as needed, delete gz'd files and remove emptied dirs
-    if 'wig' in options:
-        wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name )
-        if os.path.exists( wig_base_dir ):
-            #treatment
-            treatment_dir = os.path.join( wig_base_dir, "treat" )
-            if os.path.exists( treatment_dir ):
-                gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True )
-                os.rmdir( treatment_dir )
-                #control
-                if options['input_control']:
-                    control_dir = os.path.join( wig_base_dir, "control" )
-                    if os.path.exists( control_dir ):
-                        gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True )
-                        os.rmdir( control_dir )
-            os.rmdir( wig_base_dir )
-
-    #move all remaining files to extra files path of html file output to allow user download
-    out_html = open( output_extra_html, 'wb' )
-    out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name )
-    os.mkdir( output_extra_path )
-    for filename in sorted( os.listdir( tmp_dir ) ):
-        shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )
-        out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) )
-    out_html.write( '</ul></p>\n' )
-    out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() )
-    out_html.write( '</body></html>\n' )
-    out_html.close()
-
-    os.unlink( stderr_name )
-    os.rmdir( tmp_dir )
-
-if __name__ == "__main__": main()
--- a/tools/peak_calling/macs_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,230 +0,0 @@
-<tool id="peakcalling_macs" name="MACS" version="1.0.1">
-  <description>Model-based Analysis of ChIP-Seq</description>
-  <command interpreter="python">macs_wrapper.py $options_file $output_bed_file $output_extra_files $output_extra_files.files_path</command>
-  <requirements>
-    <requirement type="python-module">macs</requirement>
-    <requirement type="package">macs</requirement>
-  </requirements>
-  <inputs>
-    <param name="experiment_name" type="text" value="MACS in Galaxy" size="50" label="Experiment Name"/>
-    <conditional name="input_type">
-      <param name="input_type_selector" type="select" label="Paired End Sequencing">
-        <option value="paired_end">Paired End (requires elandmulti format)</option>
-        <option value="single_end" selected="true">Single End</option>
-      </param>
-      <when value="paired_end">
-        <param name="input_chipseq_file1" type="data" format="elandmulti" label="ChIP-Seq Tag File 1" />
-        <param name="input_chipseq_file2" type="data" format="elandmulti" label="ChIP-Seq Tag File 2" />
-        <param name="input_control_file1" type="data" format="elandmulti" optional="True" label="ChIP-Seq Control File 1" />
-        <param name="input_control_file2" type="data" format="elandmulti" optional="True" label="ChIP-Seq Control File 2" />
-        <param name="petdist" type="integer" label="Best distance between Pair-End Tags" value="200"/>
-      </when>
-      <when value="single_end">
-        <param name="input_chipseq_file1" type="data" format="bed,sam,bam,eland,elandmulti" label="ChIP-Seq Tag File" />
-        <param name="input_control_file1" type="data" format="bed,sam,bam,eland,elandmulti" optional="True" label="ChIP-Seq Control File" />
-      </when>
-    </conditional>
-    <param name="gsize" type="float" label="Effective genome size" value="2.7e+9" help="default: 2.7e+9"/>
-    <param name="tsize" type="integer" label="Tag size" value="25"/>
-    <param name="bw" type="integer" label="Band width" value="300"/>
-    <param name="pvalue" type="float" label="Pvalue cutoff for peak detection" value="1e-5" help="default: 1e-5"/>
-    <param name="mfold" type="integer" label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model" value="32"/>
-    <param name="xls_to_interval" label="Parse xls files into into distinct interval files" type="boolean" truevalue="create" falsevalue="do_not_create" checked="False"/>
-    <conditional name="wig_type">
-      <param name="wig_type_selector" type="select" label="Save shifted raw tag count at every bp into a wiggle file">
-        <option value="wig">Save</option>
-        <option value="no_wig" selected="true">Do not create wig file (faster)</option>
-      </param>
-      <when value="wig">
-        <param name="wigextend" type="integer" label="Extend tag from its middle point to a wigextend size fragment." value="-1" help="Use value less than 0 for default (modeled d)"/>
-        <param name="space" type="integer" label="Resolution for saving wiggle files" value="10"/>
-      </when>
-      <when value="no_wig">
-        <!-- do nothing here -->
-      </when>
-    </conditional>
-    <param name="nolambda" label="Use fixed background lambda as local lambda for every peak region" type="boolean" truevalue="--nolambda" falsevalue="" checked="False" help="up to 9X more time consuming"/>
-    <param name="lambdaset" type="text" label="3 levels of regions around the peak region to calculate the maximum lambda as local lambda" value="1000,5000,10000" size="50"/>
-    <conditional name="nomodel_type">
-      <param name="nomodel_type_selector" type="select" label="Build Model">
-        <option value="nomodel">Do not build the shifting model</option>
-        <option value="create_model" selected="true">Build the shifting model</option>
-      </param>
-      <when value="nomodel">
-        <param name="shiftsize" type="integer" label="Arbitrary shift size in bp" value="100"/>
-      </when>
-      <when value="create_model">
-        <!-- do nothing here -->
-      </when>
-    </conditional>
-    <conditional name="diag_type">
-      <param name="diag_type_selector" type="select" label="Diagnosis report" help="up to 9X more time consuming">
-        <option value="diag">Produce a diagnosis report</option>
-        <option value="no_diag" selected="true">Do not produce report (faster)</option>
-      </param>
-      <when value="diag">
-        <param name="fe-min" type="integer" label="Min fold enrichment to consider" value="0"/>
-        <param name="fe-max" type="integer" label="Max fold enrichment to consider" value="32"/>
-        <param name="fe-step" type="integer" label="Fold enrichment step" value="20"/>
-      </when>
-      <when value="no_diag">
-        <!-- do nothing here -->
-      </when>
-    </conditional>
-    <param name="futurefdr" label="Perform the new peak detection method (futurefdr)" type="boolean" truevalue="--futurefdr" falsevalue="" checked="False" help="The default method only consider the peak location, 1k, 5k, and 10k regions in the control data; whereas the new future method also consider the 5k, 10k regions in treatment data to calculate local bias."/>
-  </inputs>
-  <outputs>
-    <data name="output_bed_file" format="bed" label="${tool.name} on ${on_string} (peaks: bed)"/>
-    <data name="output_xls_to_interval_peaks_file" format="interval" label="${tool.name} on ${on_string} (peaks: interval)">
-      <filter>xls_to_interval is True</filter>
-    </data>
-    <data name="output_xls_to_interval_negative_peaks_file" format="interval" label="${tool.name} on ${on_string} (negative peaks: interval)">
-      <filter>xls_to_interval is True</filter>
-      <filter>input_type['input_control_file1'] is not None</filter>
-    </data>
-    <data name="output_treatment_wig_file" format="wig" label="${tool.name} on ${on_string} (treatment: wig)">
-      <filter>wig_type['wig_type_selector']=='wig'</filter>
-    </data>
-    <data name="output_control_wig_file" format="wig" label="${tool.name} on ${on_string} (control: wig)">
-      <filter>wig_type['wig_type_selector'] == 'wig'</filter>
-      <filter>input_type['input_control_file1'] is not None</filter>
-    </data>
-    <data name="output_extra_files" format="html" label="${tool.name} on ${on_string} (html report)"/>
-  </outputs>
-  <configfiles>
-    <configfile name="options_file">&lt;%
-import simplejson
-%&gt;
-#set $__options = { 'experiment_name':str( $experiment_name ), 'gsize':int( float( str( $gsize ) ) ), 'tsize':str( $tsize ), 'bw':str( $bw ), 'pvalue':str( $pvalue ), 'mfold':str( $mfold ), 'nolambda':str( $nolambda ), 'lambdaset': str( $lambdaset ), 'futurefdr':str( $futurefdr ) }
-#if str( $xls_to_interval ) == 'create':
-#set $__options['xls_to_interval'] = { 'peaks_file': str( $output_xls_to_interval_peaks_file ), 'negative_peaks_file': str( $output_xls_to_interval_negative_peaks_file ) }
-#else:
-#set $__options['xls_to_interval'] = False
-#end if
-##treatment/tag input files and format
-#set $__options['input_chipseq'] = [ str( $input_type['input_chipseq_file1'] ) ]
-#if  $input_type['input_type_selector'] == 'paired_end':
-#set $_hole = __options['input_chipseq'].append( str( $input_type['input_chipseq_file2'] ) )
-#set $__options['format'] = 'ELANDMULTIPET'
-#else:
-#set $__options['format'] = $input_type['input_chipseq_file1'].extension.upper()
-#end if
-##control/input files
-#set $__options['input_control'] = []
-#if str( $input_type['input_control_file1'] ) != 'None':
-#set $_hole = __options['input_control'].append( str( $input_type['input_control_file1'] ) )
-#end if
-#if $input_type['input_type_selector'] == 'paired_end' and str( $input_type['input_control_file2'] ) != 'None':
-#set $_hole = __options['input_control'].append( str( $input_type['input_control_file2'] ) )
-#end if
-##wig options
-#if $wig_type['wig_type_selector'] == 'wig':
-#set $__options['wig'] = {}
-#set $__options['wig']['wigextend'] = str( $wig_type['wigextend'] )
-#set $__options['wig']['space'] = str( $wig_type['space'] )
-#set  $__options['wig']['output_treatment_file'] = str( $output_treatment_wig_file )
-#if $input_type['input_control_file1'] is not None:
-#set  $__options['wig']['output_control_file'] = str( $output_control_wig_file )
-#end if
-#end if
-##model options
-#if $nomodel_type['nomodel_type_selector'] == 'nomodel':
-#set $__options['nomodel'] = str( $nomodel_type['shiftsize'] )
-#end if
-##diag options
-#if $diag_type['diag_type_selector'] == 'diag':
-#set $__options['diag'] = { 'fe-min':str( $diag_type['fe-min'] ), 'fe-max':str( $diag_type['fe-max'] ), 'fe-step':str( $diag_type['fe-step'] ) }
-#end if
-${ simplejson.dumps( __options ) }
-    </configfile>
-  </configfiles>
-  <tests>
-    <test>
-      <param name="input_type_selector" value="single_end" />
-      <param name="input_chipseq_file1" value="chipseq_enriched.bed.gz" ftype="bed" />
-      <param name="input_control_file1" value="chipseq_input.bed.gz" ftype="bed" />
-      <param name="experiment_name" value="Galaxy Test Run" />
-      <param name="tsize" value="36" />
-      <param name="mfold" value="13" />
-      <param name="gsize" value="2.7e+9" />
-      <param name="bw" value="300" />
-      <param name="pvalue" value="1e-5" />
-      <param name="xls_to_interval" />
-      <param name="wig_type_selector" value="no_wig" />
-      <param name="nolambda"/>
-      <param name="lambdaset" value="1000,5000,10000"/>
-      <param name="nomodel_type_selector" value="create_model" />
-      <param name="diag_type_selector" value="no_diag" />
-      <param name="futurefdr"/>
-      <output name="output_bed_file" file="peakcalling_macs/macs_test_1_out.bed" />
-      <output name="output_html_file" file="peakcalling_macs/macs_test_1_out.html" compare="re_match" >
-        <extra_files type="file" name="Galaxy_Test_Run_model.pdf" value="peakcalling_macs/test2/Galaxy_Test_Run_model.pdf" compare="re_match"/>
-        <extra_files type="file" name="Galaxy_Test_Run_model.r" value="peakcalling_macs/test2/Galaxy_Test_Run_model.r" compare="re_match"/>
-        <extra_files type="file" name="Galaxy_Test_Run_model.r.log" value="peakcalling_macs/test2/Galaxy_Test_Run_model.r.log"/>
-        <extra_files type="file" name="Galaxy_Test_Run_negative_peaks.xls" value="peakcalling_macs/test2/Galaxy_Test_Run_negative_peaks.xls" compare="re_match"/>
-        <extra_files type="file" name="Galaxy_Test_Run_peaks.xls" value="peakcalling_macs/test2/Galaxy_Test_Run_peaks.xls" compare="re_match"/>
-      </output>
-    </test>
-    <test>
-      <param name="input_type_selector" value="single_end" />
-      <param name="input_chipseq_file1" value="chipseq_enriched.bed.gz" ftype="bed" />
-      <param name="input_control_file1" value="chipseq_input.bed.gz" ftype="bed" />
-      <param name="experiment_name" value="Galaxy Test Run" />
-      <param name="tsize" value="36" />
-      <param name="mfold" value="13" />
-      <param name="gsize" value="2.7e+9" />
-      <param name="bw" value="300" />
-      <param name="pvalue" value="1e-5" />
-      <param name="xls_to_interval" value="true" />
-      <param name="wig_type_selector" value="no_wig" />
-      <param name="nolambda"/>
-      <param name="lambdaset" value="1000,5000,10000"/>
-      <param name="nomodel_type_selector" value="create_model" />
-      <param name="diag_type_selector" value="no_diag" />
-      <param name="futurefdr"/>
-      <output name="output_bed_file" file="peakcalling_macs/macs_test_1_out.bed" />
-      <output name="output_xls_to_interval_peaks_file" file="peakcalling_macs/macs_test_2_peaks_out.interval" lines_diff="4" />
-      <output name="output_xls_to_interval_negative_peaks_file" file="peakcalling_macs/macs_test_2_neg_peaks_out.interval" />
-      <output name="output_html_file" file="peakcalling_macs/macs_test_1_out.html" compare="re_match" >
-        <extra_files type="directory" value="peakcalling_macs/test2/" compare="re_match"/>
-      </output>
-    </test>
-    <!-- <test>
-      <param name="input_type_selector" value="single_end" />
-      <param name="input_chipseq_file1" value="chipseq_enriched.bed.gz" ftype="bed" />
-      <param name="input_control_file1" value="chipseq_input.bed.gz" ftype="bed" />
-      <param name="experiment_name" value="Galaxy Test Run" />
-      <param name="tsize" value="36" />
-      <param name="mfold" value="13" />
-      <param name="gsize" value="2.7e+9" />
-      <param name="bw" value="300" />
-      <param name="pvalue" value="1e-5" />
-      <param name="xls_to_interval" value="true" />
-      <param name="wig_type_selector" value="wig" />
-      <param name="wigextend" value="-1" />
-      <param name="space" value="10" />
-      <param name="nolambda"/>
-      <param name="lambdaset" value="1000,5000,10000"/>
-      <param name="nomodel_type_selector" value="create_model" />
-      <param name="diag_type_selector" value="no_diag" />
-      <param name="futurefdr"/>
-      <output name="output_bed_file" file="peakcalling_macs/macs_test_1_out.bed" />
-      <output name="output_xls_to_interval_peaks_file" file="peakcalling_macs/macs_test_2_peaks_out.interval" lines_diff="4" />
-      <output name="output_xls_to_interval_negative_peaks_file" file="macs_test_2_neg_peaks_out.interval" />
-      <output name="output_treatment_wig_file" file="peakcalling_macs/macs_test_3_treatment_out.wig" />
-      <output name="output_control_wig_file" file="peakcalling_macs/macs_test_3_control_out.wig" />
-      <output name="output_html_file" file="peakcalling_macs/macs_test_3_out.html" compare="re_match" >
-        <extra_files type="directory" value="peakcalling_macs/test2/" compare="re_match"/>
-      </output>
-    </test> -->
-  </tests>
-  <help>
-**What it does**
-
-This tool allows ChIP-seq peak calling using MACS.
-
-Depending upon selected options, 2 to 6 history items will be created; the first output will be a standard BED file and the last will be an HTML report containing links to download additional files generated by MACS. Up to two each of wig and interval files can be optionally created; the interval files are parsed from the xls output.
-
-View the original MACS documentation: http://liulab.dfci.harvard.edu/MACS/00README.html.
-  </help>
-</tool>
--- a/tools/peak_calling/sicer_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,156 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-
-"""
-A wrapper script for running SICER (spatial clustering approach for the identification of ChIP-enriched regions) region caller.
-"""
-
-import sys, optparse, os, tempfile, subprocess, shutil
-
-CHUNK_SIZE = 2**20 #1mb
-
-VALID_BUILDS = [ 'mm8', 'mm9', 'hg18', 'hg19', 'dm2', 'dm3', 'sacCer1', 'pombe', 'rn4', 'tair8' ] #HACK! FIXME: allow using all specified builds, would currently require hacking SICER's "GenomeData.py" on the fly.
-
-def cleanup_before_exit( tmp_dir ):
-    if tmp_dir and os.path.exists( tmp_dir ):
-        shutil.rmtree( tmp_dir )
-
-
-def open_file_from_option( filename, mode = 'rb' ):
-    if filename:
-        return open( filename, mode = mode )
-    return None
-
-def add_one_to_file_column( filename, column, split_char = "\t", startswith_skip = None ):
-    tmp_out = tempfile.TemporaryFile( mode='w+b' )
-    tmp_in = open( filename )
-    for line in tmp_in:
-        if startswith_skip and line.startswith( startswith_skip ):
-            tmp_out.write( line )
-        else:
-            fields = line.rstrip( '\n\r' ).split( split_char )
-            if len( fields ) <= column:
-                tmp_out.write( line )
-            else:
-                fields[ column ] = str( int( fields[ column ] ) + 1 )
-                tmp_out.write( "%s\n" % ( split_char.join( fields )  ) )
-    tmp_in.close()
-    tmp_out.seek( 0 )
-    tmp_in = open( filename, 'wb' )
-    while True:
-        chunk = tmp_out.read( CHUNK_SIZE )
-        if chunk:
-            tmp_in.write( chunk )
-        else:
-            break
-    tmp_in.close()
-    tmp_out.close()
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    #stdout/err
-    parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' )
-    parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' )
-    parser.add_option( '', '--fix_off_by_one_errors', dest='fix_off_by_one_errors', action='store_true', default=False, help='If specified, fix off-by-one errors in output files' )
-    #inputs
-    parser.add_option( '-b', '--bed_file', dest='bed_file', action='store', type="string", default=None, help='Input ChIP BED file.' )
-    parser.add_option( '-c', '--control_file', dest='control_file', action='store', type="string", default=None, help='Input control BED file.' )
-    parser.add_option( '-d', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='Input dbkey.' )
-    parser.add_option( '-r', '--redundancy_threshold', dest='redundancy_threshold', action='store', type="int", default=1, help='Redundancy Threshold: The number of copies of identical reads allowed in a library.' )
-    parser.add_option( '-w', '--window_size', dest='window_size', action='store', type="int", default=200, help='Window size: resolution of SICER algorithm. For histone modifications, one can use 200 bp' )
-    parser.add_option( '-f', '--fragment_size', dest='fragment_size', action='store', type="int", default=150, help='Fragment size: is for determination of the amount of shift from the beginning of a read to the center of the DNA fragment represented by the read. FRAGMENT_SIZE=150 means the shift is 75.' )
-    parser.add_option( '-e', '--effective_genome_fraction', dest='effective_genome_fraction', action='store', type="float", default=0.74, help='Effective genome fraction: Effective Genome as fraction of the genome size. It depends on read length.' )
-    parser.add_option( '-g', '--gap_size', dest='gap_size', action='store', type="int", default=600, help='Gap size: needs to be multiples of window size. Namely if the window size is 200, the gap size should be 0, 200, 400, 600, ... .' )
-    parser.add_option( '-o', '--error_cut_off', dest='error_cut_off', action='store', type="string", default="0.1", help='Error Cut off: FDR or E-value' ) #read as string to construct names properly
-    #outputs
-    parser.add_option( '', '--redundancy_removed_test_bed_output_file', dest='redundancy_removed_test_bed_output_file', action='store', type="string", default=None, help='test-1-removed.bed: redundancy_removed test bed file' )
-    parser.add_option( '', '--redundancy_removed_control_bed_output_file', dest='redundancy_removed_control_bed_output_file', action='store', type="string", default=None, help='control-1-removed.bed: redundancy_removed control bed file' )
-    parser.add_option( '', '--summary_graph_output_file', dest='summary_graph_output_file', action='store', type="string", default=None, help='test-W200.graph: summary graph file for test-1-removed.bed with window size 200, in bedGraph format.' )
-    parser.add_option( '', '--test_normalized_wig_output_file', dest='test_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-normalized.wig: the above file normalized by library size per million and converted into wig format. This file can be uploaded to the UCSC genome browser' )
-    parser.add_option( '', '--score_island_output_file', dest='score_island_output_file', action='store', type="string", default=None, help='test-W200-G600.scoreisland: an intermediate file for debugging usage.' )
-    parser.add_option( '', '--islands_summary_output_file', dest='islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary: summary of all candidate islands with their statistical significance.' )
-    parser.add_option( '', '--significant_islands_summary_output_file', dest='significant_islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary-FDR.01: summary file of significant islands with requirement of FDR=0.01.' )
-    parser.add_option( '', '--significant_islands_output_file', dest='significant_islands_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-island.bed: delineation of significant islands in "chrom start end read-count-from-redundancy_removed-test.bed" format' )
-    parser.add_option( '', '--island_filtered_output_file', dest='island_filtered_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered.bed: library of raw redundancy_removed reads on significant islands.' )
-    parser.add_option( '', '--island_filtered_normalized_wig_output_file', dest='island_filtered_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered-normalized.wig: wig file for the island-filtered redundancy_removed reads.' )
-    (options, args) = parser.parse_args()
-
-    #check if valid build
-    assert options.dbkey in VALID_BUILDS, ValueError( "The specified build ('%s') is not available for this tool." % options.dbkey )
-
-    #everything will occur in this temp directory
-    tmp_dir = tempfile.mkdtemp()
-
-    #link input files into tmp_dir and build command line
-    bed_base_filename = 'input_bed_file'
-    bed_filename = '%s.bed' % bed_base_filename
-    os.symlink( options.bed_file, os.path.join( tmp_dir, bed_filename ) )
-    if options.control_file is not None:
-        cmd = "SICER.sh"
-    else:
-        cmd = "SICER-rb.sh"
-    cmd = '%s "%s" "%s"' % ( cmd, tmp_dir, bed_filename )
-    if options.control_file is not None:
-        control_base_filename = 'input_control_file'
-        control_filename = '%s.bed' % control_base_filename
-        os.symlink( options.control_file, os.path.join( tmp_dir, control_filename ) )
-        cmd = '%s "%s"' % ( cmd, control_filename )
-    cmd = '%s "%s" "%s" "%i" "%i" "%i" "%f" "%i" "%s"' % ( cmd, tmp_dir, options.dbkey, options.redundancy_threshold, options.window_size, options.fragment_size, options.effective_genome_fraction, options.gap_size, options.error_cut_off )
-
-    #set up stdout and stderr output options
-    stdout = open_file_from_option( options.stdout, mode = 'wb' )
-    stderr = open_file_from_option( options.stderr, mode = 'wb' )
-    #if no stderr file is specified, we'll use our own
-    if stderr is None:
-        stderr = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        stderr.close()
-        stderr = open( stderr.name, 'w+b' )
-
-    proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
-    return_code = proc.wait()
-
-    if return_code:
-        stderr_target = sys.stderr
-    else:
-        stderr_target = stdout #sys.stdout
-        stderr_target.write( "\nAdditionally, these warnings were reported:\n" )
-    stderr.flush()
-    stderr.seek(0)
-    while True:
-        chunk = stderr.read( CHUNK_SIZE )
-        if chunk:
-            stderr_target.write( chunk )
-        else:
-            break
-    stderr.close()
-
-    try:
-        #move files to where they belong
-        shutil.move(  os.path.join( tmp_dir,'%s-%i-removed.bed' % ( bed_base_filename, options.redundancy_threshold ) ), options.redundancy_removed_test_bed_output_file )
-        shutil.move(  os.path.join( tmp_dir,'%s-W%i.graph' % ( bed_base_filename, options.window_size ) ), options.summary_graph_output_file )
-        if options.fix_off_by_one_errors: add_one_to_file_column( options.summary_graph_output_file, 2 )
-        shutil.move(  os.path.join( tmp_dir,'%s-W%i-normalized.wig' % ( bed_base_filename, options.window_size ) ), options.test_normalized_wig_output_file )
-        if options.control_file is not None:
-            shutil.move(  os.path.join( tmp_dir,'%s-%i-removed.bed' % ( control_base_filename, options.redundancy_threshold ) ), options.redundancy_removed_control_bed_output_file )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i.scoreisland' % ( bed_base_filename, options.window_size, options.gap_size ) ), options.score_island_output_file )
-            if options.fix_off_by_one_errors: add_one_to_file_column( options.score_island_output_file, 2 )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-islands-summary' % ( bed_base_filename, options.window_size, options.gap_size ) ), options.islands_summary_output_file )
-            if options.fix_off_by_one_errors: add_one_to_file_column( options.islands_summary_output_file, 2 )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-islands-summary-FDR%s' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.significant_islands_summary_output_file )
-            if options.fix_off_by_one_errors: add_one_to_file_column( options.significant_islands_summary_output_file, 2 )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-island.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.significant_islands_output_file )
-            if options.fix_off_by_one_errors: add_one_to_file_column( options.significant_islands_output_file, 2 )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-islandfiltered.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_output_file )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-islandfiltered-normalized.wig' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_normalized_wig_output_file )
-        else:
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-E%s.scoreisland' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.score_island_output_file )
-            if options.fix_off_by_one_errors: add_one_to_file_column( options.score_island_output_file, 2 )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-E%s-islandfiltered.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_output_file )
-            shutil.move(  os.path.join( tmp_dir,'%s-W%i-G%i-E%s-islandfiltered-normalized.wig' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_normalized_wig_output_file )
-    except Exception, e:
-        raise e
-    finally:
-        cleanup_before_exit( tmp_dir )
-
-if __name__=="__main__": __main__()
--- a/tools/peak_calling/sicer_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,176 +0,0 @@
-<tool id="peakcalling_sicer" name="SICER" version="0.0.1">
-  <description>Statistical approach for the Identification of ChIP-Enriched Regions</description>
-  <command interpreter="python">sicer_wrapper.py
-  --bed_file '${input_bed_file}'
-  #if str( $input_control_file ) != 'None':
-      --control_file '${input_control_file}'
-      --significant_islands_output_file "${significant_islands_output_file}"
-      --islands_summary_output_file "${islands_summary_output_file}"
-      --significant_islands_summary_output_file "${significant_islands_summary_output_file}"
-  #end if
-  ${fix_off_by_one_errors}
-  --dbkey '${input_bed_file.dbkey}'
-  --redundancy_threshold '${redundancy_threshold}'
-  --window_size '${window_size}'
-  --fragment_size '${fragment_size}'
-  --effective_genome_fraction '${effective_genome_fraction}'
-  --gap_size '${gap_size}'
-  --error_cut_off '${error_cut_off}'
-  ##output files
-  --stdout "${output_log_file}"
-  --redundancy_removed_test_bed_output_file "${redundancy_removed_test_bed_output_file}"
-  --redundancy_removed_control_bed_output_file "${redundancy_removed_control_bed_output_file}"
-  --score_island_output_file "${score_island_output_file}"
-  --summary_graph_output_file "${summary_graph_output_file}"
-  --test_normalized_wig_output_file "${test_normalized_wig_output_file}"
-  --island_filtered_output_file "${island_filtered_output_file}"
-  --island_filtered_normalized_wig_output_file "${island_filtered_normalized_wig_output_file}"
-  </command>
-  <requirements>
-    <requirement type="package" version="1.1">SICER</requirement>
-  </requirements>
-  <inputs>
-    <param name="input_bed_file" type="data" format="bed" label="ChIP-Seq Tag File" >
-      <validator type="expression" message="SICER is not available for the genome.">value.dbkey in [ 'mm8', 'mm9', 'hg18', 'hg19', 'dm2', 'dm3', 'sacCer1', 'pombe', 'rn4', 'tair8' ]</validator>
-    </param>
-    <param name="input_control_file" type="data" format="bed" label="ChIP-Seq Control File" optional="True"> <!-- fix me, add filter to match dbkeys -->
-      <options>
-        <filter type="data_meta" ref="input_bed_file" key="dbkey" />
-      </options>
-    </param>
-    <param name="fix_off_by_one_errors" type="boolean" truevalue="--fix_off_by_one_errors" falsevalue="" checked="True" label="Fix off-by-one errors in output files" help="SICER creates non-standard output files, this option will fix these coordinates"/>
-    <param name="redundancy_threshold" type="integer" label="Redundancy Threshold" value="1" help="The number of copies of identical reads allowed in a library" />
-    <param name="window_size" type="integer" label="Window size" value="200" help="Resolution of SICER algorithm. For histone modifications, one can use 200 bp" />
-    <param name="fragment_size" type="integer" label="Fragment size" value="150" help="for determination of the amount of shift from the beginning of a read to the center of the DNA fragment represented by the read. FRAGMENT_SIZE=150 means the shift is 75." />
-    <param name="effective_genome_fraction" type="float" label="Effective genome fraction" value="0.74" help="Effective Genome as fraction of the genome size. It depends on read length." />
-    <param name="gap_size" type="integer" label="Gap size" value="600" help="Needs to be multiples of window size. Namely if the window size is 200, the gap size should be 0, 200, 400, 600, ..." />
-    <param name="error_cut_off" type="float" label="Statistic threshold value" value="0.01" help="FDR (with control) or E-value (without control)" />
-  </inputs>
-  <outputs>
-    <data name="redundancy_removed_test_bed_output_file" format="bed" label="${tool.name} on ${on_string} (test-${redundancy_threshold}-removed.bed)"/>
-    <data name="redundancy_removed_control_bed_output_file" format="bed" label="${tool.name} on ${on_string} (control-${redundancy_threshold}-removed.bed)">
-      <filter>input_control_file is not None</filter>
-    </data>
-    <data name="summary_graph_output_file" format="bedgraph" label="${tool.name} on ${on_string} (test-W${window_size}.graph)"/>
-    <data name="test_normalized_wig_output_file" format="wig" label="${tool.name} on ${on_string} (test-W${window_size}-normalized.wig)"/>
-    <data name="significant_islands_output_file" format="interval" label="${tool.name} on ${on_string} (test-W${window_size}-G${gap_size}-FDR${error_cut_off}-island.bed)">
-      <filter>input_control_file is not None</filter>
-    </data>
-    <data name="island_filtered_output_file" format="bed" label="${tool.name} on ${on_string} (#if str( $input_control_file ) != 'None' then ''.join( map( str, [ 'test-W', $window_size, '-G',$gap_size, '-FDR', $error_cut_off, '-islandfiltered.bed' ] ) ) else ''.join( map( str, [ 'test-W', $window_size, '-G', $gap_size, '-E', $error_cut_off, '-islandfiltered.bed' ] ) ) #)"/>
-    <data name="island_filtered_normalized_wig_output_file" format="wig" label="${tool.name} on ${on_string} (#if str( $input_control_file ) != 'None' then ''.join( map( str, [ 'test-W', $window_size, '-G',$gap_size, '-FDR', $error_cut_off, '-islandfiltered-normalized.wig' ] ) ) else ''.join( map( str, [ 'test-W', $window_size, '-G', $gap_size, '-E', $error_cut_off, '-islandfiltered-normalized.wig' ] ) ) #)"/>
-    <data name="score_island_output_file" format="interval" label="${tool.name} on ${on_string} (#if str( $input_control_file ) != 'None' then ''.join( map( str, [ 'test-W', $window_size, '-G',$gap_size, '.scoreisland' ] ) ) else ''.join( map( str, [ 'test-W', $window_size, '-G', $gap_size, '-E', $error_cut_off, '.scoreisland' ] ) ) #)"/>
-    <data name="islands_summary_output_file" format="interval" label="${tool.name} on ${on_string} (test-W${window_size}-G${gap_size}-islands-summary)">
-      <filter>input_control_file is not None</filter>
-    </data>
-    <data name="significant_islands_summary_output_file" format="interval" label="${tool.name} on ${on_string} (test-W${window_size}-G${gap_size}-islands-summary-FDR${error_cut_off})">
-      <filter>input_control_file is not None</filter>
-    </data>
-    <data name="output_log_file" format="txt" label="${tool.name} on ${on_string} (log)"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
-      <param name="input_control_file" />
-      <param name="fix_off_by_one_errors" />
-      <param name="redundancy_threshold" value="1" />
-      <param name="window_size" value="200" />
-      <param name="fragment_size" value="150" />
-      <param name="effective_genome_fraction" value="0.74" />
-      <param name="gap_size" value="600" />
-      <param name="error_cut_off" value="0.01" />
-      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_1/test-1-removed.bed" />
-      <output name="summary_graph_output_file" file="peakcalling_sicer/test_1/test-W200.graph" />
-      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-normalized.wig" />
-      <output name="island_filtered_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered.bed" />
-      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered-normalized.wig" />
-      <output name="score_island_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01.scoreisland" />
-      <output name="output_log_file" file="peakcalling_sicer/test_1/output_log_file.contains" compare="contains"/>
-    </test>
-    <test>
-      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
-      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="mm8" />
-      <param name="fix_off_by_one_errors" />
-      <param name="redundancy_threshold" value="1" />
-      <param name="window_size" value="200" />
-      <param name="fragment_size" value="150" />
-      <param name="effective_genome_fraction" value="0.74" />
-      <param name="gap_size" value="600" />
-      <param name="error_cut_off" value="0.01" />
-      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_2/test-1-removed.bed" />
-      <output name="redundancy_removed_control_bed_output_file" file="peakcalling_sicer/test_2/control-1-removed.bed" />
-      <output name="summary_graph_output_file" file="peakcalling_sicer/test_2/test-W200.graph" />
-      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-normalized.wig" />
-      <output name="significant_islands_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-island.bed" />
-      <output name="island_filtered_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered.bed" />
-      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered-normalized.wig" />
-      <output name="score_island_output_file" file="peakcalling_sicer/test_2/test-W200-G600.scoreisland" />
-      <output name="islands_summary_output_file" file="peakcalling_sicer/test_2/test-W200-G600-islands-summary" />
-      <output name="significant_islands_summary_output_file" file="peakcalling_sicer/test_2/test-W200-G600-islands-summary-FDR0.01" />
-      <output name="output_log_file" file="peakcalling_sicer/test_2/output_log_file.contains" compare="contains"/>
-    </test>
-    <test>
-      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
-      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="mm8" />
-      <param name="fix_off_by_one_errors" value="True" />
-      <param name="redundancy_threshold" value="1" />
-      <param name="window_size" value="200" />
-      <param name="fragment_size" value="150" />
-      <param name="effective_genome_fraction" value="0.74" />
-      <param name="gap_size" value="600" />
-      <param name="error_cut_off" value="0.01" />
-      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_2/test-1-removed.bed" />
-      <output name="redundancy_removed_control_bed_output_file" file="peakcalling_sicer/test_2/control-1-removed.bed" />
-      <output name="summary_graph_output_file" file="peakcalling_sicer/test_3/test-W200.graph" />
-      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-normalized.wig" />
-      <output name="significant_islands_output_file" file="peakcalling_sicer/test_3/test-W200-G600-FDR0.01-island.bed" />
-      <output name="island_filtered_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered.bed" />
-      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered-normalized.wig" />
-      <output name="score_island_output_file" file="peakcalling_sicer/test_3/test-W200-G600.scoreisland" />
-      <output name="islands_summary_output_file" file="peakcalling_sicer/test_3/test-W200-G600-islands-summary" />
-      <output name="significant_islands_summary_output_file" file="peakcalling_sicer/test_3/test-W200-G600-islands-summary-FDR0.01" />
-      <output name="output_log_file" file="peakcalling_sicer/test_2/output_log_file.contains" compare="contains"/>
-    </test>
-    <test>
-      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
-      <param name="input_control_file" />
-      <param name="fix_off_by_one_errors" value="True" />
-      <param name="redundancy_threshold" value="1" />
-      <param name="window_size" value="200" />
-      <param name="fragment_size" value="150" />
-      <param name="effective_genome_fraction" value="0.74" />
-      <param name="gap_size" value="600" />
-      <param name="error_cut_off" value="0.01" />
-      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_1/test-1-removed.bed" />
-      <output name="summary_graph_output_file" file="peakcalling_sicer/test_4/test-W200.graph" />
-      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-normalized.wig" />
-      <output name="island_filtered_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered.bed" />
-      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered-normalized.wig" />
-      <output name="score_island_output_file" file="peakcalling_sicer/test_4/test-W200-G600-E0.01.scoreisland" />
-      <output name="output_log_file" file="peakcalling_sicer/test_1/output_log_file.contains" compare="contains"/>
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-SICER first and foremost is a filtering tool. Its main functions are::
-
-  1. Delineation of the significantly ChIP-enriched regions, which can be used to associate with other genomic landmarks.
-  2. Identification of reads on the ChIP-enriched regions, which can be used for profiling and other quantitative analysis.
-
-View the original SICER documentation: http://home.gwu.edu/~wpeng/Software.htm.
-
-------
-
-.. class:: warningmark
-
-  By default, SICER creates files that do not conform to standards (e.g. BED files are closed, not half-open). This could have implications for downstream analysis.
-  To force the output of SICER to be formatted properly to standard file formats, check the **"Fix off-by-one errors in output files"** option.
-
-------
-
-**Citation**
-
-For the underlying tool, please cite `Zang C, Schones DE, Zeng C, Cui K, Zhao K, Peng W. A clustering approach for identification of enriched domains from histone modification ChIP-Seq data. Bioinformatics. 2009 Aug 1;25(15):1952-8. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505939&gt;`_
-
-  </help>
-</tool>
--- a/tools/picard/picard_AddOrReplaceReadGroups.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,204 +0,0 @@
-<tool name="Add or Replace Groups" id="picard_ARRG" version="0.2.0">
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <command interpreter="python">
-    picard_wrapper.py
-      --input="$inputFile"
-      --rg-lb="$rglb"
-      --rg-pl="$rgpl"
-      --rg-pu="$rgpu"
-      --rg-sm="$rgsm"
-      --rg-id="$rgid"
-      --rg-opts=${readGroupOpts.rgOpts}
-      #if $readGroupOpts.rgOpts == "full"
-        --rg-cn="$readGroupOpts.rgcn"
-        --rg-ds="$readGroupOpts.rgds"
-      #end if
-      --output-format=$outputFormat
-      --output=$outFile
-      -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/AddOrReplaceReadGroups.jar"
-  </command>
-  <inputs>
-    <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to add or replace read groups in"
-      help="If empty, upload or import a SAM/BAM dataset." />
-    <param name="rgid" value="1" type="text" label="Read group ID (ID tag)" help="The most important read group tag. Galaxy will use a value of '1' if nothing provided." />
-    <param name="rgsm" value="" type="text" label="Read group sample name (SM tag)" />
-    <param name="rglb" value="" type="text" label="Read group library (LB tag)" />
-    <param name="rgpl" value="" type="text" label="Read group platform (PL tag)" help="illumina, solid, 454, pacbio, helicos" />
-    <param name="rgpu" value="" type="text" label="Read group platform unit" help="like run barcode, etc." />
-    <conditional name="readGroupOpts">
-      <param name="rgOpts" type="select" label="Specify additional (optional) arguments" help="Allows you to set RGCN and RGDS.">
-        <option value="preSet">Use pre-set defaults</option>
-        <option value="full">Set optional arguments</option>
-      </param>
-      <when value="preSet" />
-      <when value="full">
-        <param name="rgcn" value="" type="text" label="Read group sequencing center name" help="Leave set to &lt;null&gt; for default (none)" />
-        <param name="rgds" value="" type="text" label="Read group description" help="Leave set to &lt;null&gt; for default (none)" />
-      </when>
-    </conditional>
-    <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output bam instead of sam" help="Uncheck for sam output" />
-  </inputs>
-  <outputs>
-    <data name="outFile" format="bam" label="${tool.name} on ${on_string}: ${outputFormat} with read groups replaced">
-      <change_format>
-        <when input="outputFormat" value="sam" format="sam" />
-      </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!-- Command for replacing read groups in bam:
-      java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.bam O=picard_ARRG_output1.sam RGID=one RGLB=lib RGPL=illumina RGPU=peaewe RGSM=sam1
-      -->
-      <param name="inputFile" value="picard_ARRG_input1.bam" />
-      <param name="rglb" value="lib" />
-      <param name="rgpl" value="illumina" />
-      <param name="rgpu" value="peaewe" />
-      <param name="rgsm" value="sam1" />
-      <param name="rgid" value="one" />
-      <param name="rgOpts" value="preSet" />
-      <param name="outputFormat" value="False" />
-      <output name="outFile" file="picard_ARRG_output1.sam" ftype="sam" />
-    </test>
-    <test>
-      <!-- Command for replacing read groups in sam:
-      java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.sam O=picard_ARRG_output2.sam RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp RGID=M5 RGCN=FamousCenter RGDS="description with spaces"
-      picard_ARRG_input1.bam can be created from picard_ARRG_input1.sam
-      -->
-      <param name="inputFile" value="picard_ARRG_input1.sam" />
-      <param name="rglb" value="LIB" />
-      <param name="rgpl" value="IL" />
-      <param name="rgpu" value="PLAT" />
-      <param name="rgsm" value="smp" />
-      <param name="rgid" value="M5" />
-      <param name="rgOpts" value="full" />
-      <param name="rgcn" value="FamousCenter" />
-      <param name="rgds" value="description with spaces" />
-      <param name="outputFormat" value="False" />
-      <output name="outFile" file="picard_ARRG_output2.sam" ftype="sam" />
-    </test>
-    <test>
-      <!-- Command for adding read groups in sam:
-      java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input2.sam O=picard_ARRG_output3.bam RGID=M6 RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp1
-      -->
-      <param name="inputFile" value="picard_ARRG_input2.sam" />
-      <param name="rglb" value="LIB" />
-      <param name="rgpl" value="IL" />
-      <param name="rgpu" value="PLAT" />
-      <param name="rgsm" value="smp1" />
-      <param name="rgid" value="M6" />
-      <param name="rgOpts" value="preSet" />
-      <param name="outputFormat" value="True" />
-      <output name="outFile" file="picard_ARRG_output3.bam" ftype="bam" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**Purpose**
-
-Add or Replace Read Groups in an input BAM or SAM file.
-
-**Read Groups are Important!**
-
-Many downstream analysis tools (such as GATK, for example) require BAM datasets to contain read groups. Even if you are not going to use GATK, setting read groups correctly from the start will simplify your life greatly. Below we provide an explanation of read groups fields taken from GATK FAQ webpage:
-
-.. csv-table::
-   :header-rows: 1
-
-    Tag,Importance,Definition,Meaning
-    "ID","Required","Read group identifier. Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section. Read group IDs may be modified when merging SAM files in order to handle collisions.","Ideally, this should be a globally unique identify across all sequencing data in the world, such as the Illumina flowcell + lane name and number.  Will be referenced by each read with the RG:Z field, allowing tools to determine the read group information associated with each read, including the sample from which the read came.  Also, a read group is effectively treated as a separate run of the NGS instrument in tools like base quality score recalibration (a GATK component) -- all reads within a read group are assumed to come from the same instrument run and to therefore share the same error model."
-    "SM","Sample. Use pool name where a pool is being sequenced.","Required.  As important as ID.","The name of the sample sequenced in this read group.  GATK tools treat all read groups with the same SM value as containing sequencing data for the same sample.  Therefore it's critical that the SM field be correctly specified, especially when using multi-sample tools like the Unified Genotyper (a GATK component)."
-    "PL","Platform/technology used to produce the read. Valid values: ILLUMINA, SOLID, LS454, HELICOS and PACBIO.","Important.  Not currently used in the GATK, but was in the past, and may return.  The only way to known the sequencing technology used to generate the sequencing data","It's a good idea to use this field."
-    "LB","DNA preparation library identify","Essential for MarkDuplicates","MarkDuplicates uses the LB field to determine which read groups might contain molecular duplicates, in case the same DNA library was sequenced on multiple lanes."
-
-**Example of Read Group usage**
-
-Support we have a trio of samples: MOM, DAD, and KID.  Each has two DNA libraries prepared, one with 400 bp inserts and another with 200 bp inserts.  Each of these libraries is run on two lanes of an illumina hiseq, requiring 3 x 2 x 2 = 12 lanes of data.  When the data come off the sequencer, we would create 12 BAM files, with the following @RG fields in the header::
-
- Dad's data:
- @RG     ID:FLOWCELL1.LANE1      PL:illumina     LB:LIB-DAD-1 SM:DAD      PI:200
- @RG     ID:FLOWCELL1.LANE2      PL:illumina     LB:LIB-DAD-1 SM:DAD      PI:200
- @RG     ID:FLOWCELL1.LANE3      PL:illumina     LB:LIB-DAD-2 SM:DAD      PI:400
- @RG     ID:FLOWCELL1.LANE4      PL:illumina     LB:LIB-DAD-2 SM:DAD      PI:400
-
- Mom's data:
- @RG     ID:FLOWCELL1.LANE5      PL:illumina     LB:LIB-MOM-1 SM:MOM      PI:200
- @RG     ID:FLOWCELL1.LANE6      PL:illumina     LB:LIB-MOM-1 SM:MOM      PI:200
- @RG     ID:FLOWCELL1.LANE7      PL:illumina     LB:LIB-MOM-2 SM:MOM      PI:400
- @RG     ID:FLOWCELL1.LANE8      PL:illumina     LB:LIB-MOM-2 SM:MOM      PI:400
-
- Kid's data:
- @RG     ID:FLOWCELL2.LANE1      PL:illumina     LB:LIB-KID-1 SM:KID      PI:200
- @RG     ID:FLOWCELL2.LANE2      PL:illumina     LB:LIB-KID-1 SM:KID      PI:200
- @RG     ID:FLOWCELL2.LANE3      PL:illumina     LB:LIB-KID-2 SM:KID      PI:400
- @RG     ID:FLOWCELL2.LANE4      PL:illumina     LB:LIB-KID-2 SM:KID      PI:400
-
-Note the hierarchical relationship between read groups (unique for each lane) to libraries (sequenced on two lanes) and samples (across four lanes, two lanes for each library).
-
-**Picard documentation**
-
-This is a Galaxy wrapper for AddOrReplaceReadGroups, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
-------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-Either a sam file or a bam file must be supplied. If a bam file is used, it must
-be coordinate-sorted. Galaxy currently coordinate-sorts all bam files.
-
-The output file is either bam (the default) or sam, according to user selection,
-and contains the same information as the input file except for the appropraite
-additional (or modified) read group tags. Bam is recommended since it is smaller.
-
-From the Picard documentation.
-
-AddOrReplaceReadGroups REQUIRED parameters::
-
-  Option (Type)    Description
-
-  RGLB=String      Read Group Library
-  RGPL=String      Read Group platform (e.g. illumina, solid)
-  RGPU=String      Read Group platform unit (eg. run barcode)
-  RGSM=String      Read Group sample name
-  RGID=String      Read Group ID; Default value: null (empty)
-
-AddOrReplaceReadGroups OPTIONAL parameters::
-
-  Option (Type)    Description
-
-  RGCN=String      Read Group sequencing center name; Default value: null (empty)
-  RGDS=String      Read Group description Default value: null (empty)
-
-One parameter that Picard's AddOrReplaceReadGroups offers that is automatically
-set by Galaxy is the SORT_ORDER, which is set to coordinate.
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-
-
-  </help>
-</tool>
-
-
-
-
-
-
-
-
-
-
-
-
--- a/tools/picard/picard_BamIndexStats.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-<tool name="BAM Index Statistics" id="picard_BamIndexStats" version="0.2.0">
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <command interpreter="python">
-    picard_wrapper.py
-      --input "$input_file"
-      --bai-file "$input_file.metadata.bam_index"
-      -t "$htmlfile"
-      -d "$htmlfile.files_path"
-      -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/BamIndexStats.jar"
-  </command>
-  <inputs>
-    <param format="bam" name="input_file" type="data"  label="BAM dataset to generate statistics for"
-      help="If empty, upload or import a BAM dataset" />
-  </inputs>
-  <outputs>
-    <data format="html" name="htmlfile" label="${tool.name}_on_${on_string}.html" />
-  </outputs>
-  <tests>
-    <test>
-      <!-- Command
-      java -jar BamIndexStats.jar I=test-data/picard_input_tiny_coord.bam > picard_BIS_output1.txt
-      picard_input_tiny_coord.bam can be created from picard_input_tiny_coord.sam
-      -->
-      <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" />
-      <output name="htmlfile" file="picard_BIS_output1.txt" ftype="html" compare="contains" lines_diff="12"/>
-    </test>
-    <test>
-      <!-- Command
-      java -jar BamIndexStats.jar I=test-data/picard_BIS_input1.bam > picard_BIS_output2.txt
-      picard_BIS_input1.bam can be created from picard_BIS_input1.sam
-      -->
-      <param name="input_file" value="picard_BIS_input1.bam" ftype="bam" />
-      <output name="htmlfile" file="picard_BIS_output2.txt" ftype="html" compare="contains" lines_diff="12" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**Purpose**
-
-Generate Bam Index Stats for a provided BAM file.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for BamIndexStats, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
-------
-
-.. class:: infomark
-
-**Inputs and outputs**
-
-The only input is the BAM file you wish to obtain statistics for, which is required.
-Note that it must be coordinate-sorted. Galaxy currently coordinate-sorts all BAM files.
-
-This tool outputs an HTML file that contains links to the actual metrics results, as well
-as a log file with info on the exact command run.
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-------
-
-**Example**
-
-Given a BAM file created from the following::
-
-  @HD    VN:1.0     SO:coordinate
-  @SQ    SN:chr1    LN:101
-  @SQ    SN:chr7    LN:404
-  @SQ    SN:chr8    LN:202
-  @SQ    SN:chr10   LN:303
-  @SQ    SN:chr14   LN:505
-  @RG    ID:0       SM:Hi,Mom!
-  @RG    ID:1       SM:samplesample    DS:ClearDescription
-  @PG    ID:1       PN:Hey!   VN:2.0
-  @CO    Just a generic comment to make the header longer
-  read1     83    chr7      1    255    101M             =       302     201    CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN    )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I    RG:Z:0
-  read2     89    chr7      1    255    101M             *         0       0    CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN    )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I    RG:Z:0
-  read3     83    chr7      1    255    101M             =       302     201    CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN    )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I    RG:Z:0
-  read4    147    chr7     16    255    101M             =        21     -96    CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN    )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I    RG:Z:0
-  read5     99    chr7     21    255    101M             =        16      96    CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN    )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I    RG:Z:0
-  read6    163    chr7    302    255    101M             =         1    -201    NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA    I/15445666651/566666553+2/14/I/555512+3/)-'/-I-'*+))*''13+3)'//++''/'))/3+I*5++)I'2+I+/*I-II*)I-./1'1    RG:Z:0
-  read7    163    chr7    302    255    10M1D10M5I76M    =         1    -201    NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA    I/15445666651/566666553+2/14/I/555512+3/)-'/-I-'*+))*''13+3)'//++''/'))/3+I*5++)I'2+I+/*I-II*)I-./1'1    RG:Z:0
-  read8    165       *      0      0    *                chr7      1       0    NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA    I/15445666651/566666553+2/14/I/555512+3/)-'/-I-'*+))*''13+3)'//++''/'))/3+I*5++)I'2+I+/*I-II*)I-./1'1    RG:Z:0
-
-The following metrics file will be produced::
-
-  chr1 length=    101    Aligned= 0    Unaligned= 0
-  chr7 length=    404    Aligned= 7    Unaligned= 0
-  chr8 length=    202    Aligned= 0    Unaligned= 0
-  chr10 length=   303    Aligned= 0    Unaligned= 0
-  chr14 length=   505    Aligned= 0    Unaligned= 0
-  NoCoordinateCount= 1
-
-  </help>
-</tool>
-
-
-
-
-
-
-
-
-
-
-
-
--- a/tools/picard/picard_MarkDuplicates.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,150 +0,0 @@
-<tool name="Mark Duplicates" id="picard_MarkDuplicates" version="0.01">
-  <command interpreter="python">
-    picard_wrapper.py
-      --input="$input_file"
-      --remove-dups="$remDups"
-      --read-regex="$readRegex"
-      --opt-dup-dist="$optDupeDist"
-      --output-format=$outputFormat
-      --output-txt=$outMetrics
-      #if str( $outputFormat ) == "sam"
-        #if str( $remDups ) == "true"
-          --output-sam=$outFileSamRemoved
-        #else
-          --output-sam=$outFileSamMarked
-        #end if
-      #else if str( $outputFormat ) == "bam"
-        #if str( $remDups ) == "true"
-          --output-sam=$outFileBamRemoved
-        #else
-          --output-sam=$outFileBamMarked
-        #end if
-      #end if
-      -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/"
-      --picard-cmd="MarkDuplicates"
-  </command>
-  <inputs>
-    <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in"
-      help="If the select list is empty, you need to upload or import some aligned short read data from a shared library"/>
-    <param name="remDups" type="boolean" label="Remove duplicates from output file" truevalue="true" falsevalue="false" checked="False"
-      help="If true do not write duplicates to the output file instead of writing them with appropriate flags set" />
-    <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80"
-      label="Regular expression that can be used to parse read names in the incoming SAM file"
-      help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
-      <sanitizer>
-        <valid initial="string.printable">
-         <remove value="&apos;"/>
-        </valid>
-        <mapping initial="none">
-          <add source="&apos;" target="__sq__"/>
-        </mapping>
-      </sanitizer>
-    </param>
-    <param name="optDupeDist" value="100" type="text"
-      label="The maximum offset between two duplicate clusters in order to consider them optical duplicates" size="5"
-      help="Common range 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100" />
-    <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output bam instead of sam" help="Uncheck for sam output" />
-  </inputs>
-  <outputs>
-    <data format="txt" name="outMetrics" label="${tool.name} on ${on_string}: metrics" />
-    <data format="sam" name="outFileSamMarked" label="${tool.name} on ${on_string}: duplicates marked sam">
-      <filter>outputFormat is False</filter>
-      <filter>remDups is False</filter>
-    </data>
-    <data format="sam" name="outFileSamRemoved" label="${tool.name} on ${on_string}: duplicates removed sam">
-      <filter>outputFormat is False</filter>
-      <filter>remDups is True</filter>
-    </data>
-    <data format="bam" name="outFileBamMarked" label="${tool.name} on ${on_string}: duplicates marked bam">
-      <filter>outputFormat is True</filter>
-      <filter>remDups is False</filter>
-    </data>
-    <data format="bam" name="outFileBamRemoved" label="${tool.name} on ${on_string}: duplicates removed bam">
-      <filter>outputFormat is True</filter>
-      <filter>remDups is True</filter>
-    </data>
-  </outputs>
-  <tests>
-    <!-- Functional tests with Picard bam outputs currently aren't working
-    <test>
-    -->
-      <!-- Command to run:
-      java -jar MarkDuplicates.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.bam METRICS_FILE=picard_MD_output1.txt OUTPUT=picard_MD_output2.bam REMOVE_DUPLICATES=false ASSUME_SORTED=true READ_NAME_REGEX="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" OPTICAL_DUPLICATE_PIXEL_DISTANCE=100
-      -->
-    <!--
-      <param name="input_file" value="picard_input_tiny_coord.bam" />
-      <param name="remDups" value="false" />
-      <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
-      <param name="optDupeDist" value="100" />
-      <param name="outputFormat" value="bam" />
-      <output name="outMetrics" file="picard_MD_output1.txt" ftype="txt" lines_diff="4" />
-      <output name="outFileBamMarked" file="picard_MD_output2.bam" ftype="bam" />
-    </test>
-    -->
-    <test>
-      <!-- Command to run:
-      java -jar MarkDuplicates.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.sam METRICS_FILE=picard_MD_output3.txt O=picard_MD_output4.sam REMOVE_DUPLICATES=true ASSUME_SORTED=true READ_NAME_REGEX="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" OPTICAL_DUPLICATE_PIXEL_DISTANCE=100
-      -->
-      <param name="input_file" value="picard_input_tiny_coord.sam" />
-      <param name="remDups" value="true" />
-      <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
-      <param name="optDupeDist" value="100" />
-      <param name="outputFormat" value="sam" />
-      <output name="outMetrics" file="picard_MD_output3.txt" ftype="txt" lines_diff="4" />
-      <output name="outFileSamRemoved" file="picard_MD_output4.sam" ftype="sam" />
-    </test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-**Purpose**
-
-MarkDuplicates examines aligned records in the supplied sam or bam file to identify duplicate molecules.
-
-**Picard documentation**
-
-This is a Galaxy interface for MarkDuplicates, a part of Picard-tools_, which is closely related to SAMTools_.
-
- .. _Picard-tools: http://picard.sourceforge.net/index.shtml
- .. _SAMTools: http://samtools.sourceforge.net/
-
-------
-
-**Input**
-
-Either a sam file or a bam file is required. If a bam file is used, it must be coordinate-sorted.
-
-**Outputs**
-
-This tool provides two outputs. The first contains the marked (or kept) records and is either bam (the default) or sam, according to user selection. Bam is recommended since it is smaller. The second output is the metrics file, which is a text file containing information about the duplicates.
-
-**MarkDuplicates parameters**
-
-The two main parameters to be concerned with are the flag for removing duplicates and the regular expression needed to identify reads. If it is set to remove duplicates, they will not be written to the output file; otherwise they will appear in the output but will be flagged appropriately. The read name regular expression is used to parse read names from the input sam file. Read names are parsed to extract three variables: tile/region, x coordinate, and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order (capture groups are enclosed in parentheses). Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.
-
-One other parameter that can be set is the maximum offset between two duplicate clusters in order for them to be considered optical duplicates. Later versions of the Illumina pipeline that multiply pixel values by 10 should generally use 50-100 pixels; otherwise 5-10 is normal. The default is set to 100.
-
-One parameter that Picard's MarkDuplicates offers that is automatically set by Galaxy is the ASSUME_SORTED, which is set to true because Galaxy bam should always be coordinate-sorted.
-
-**Note on the use of regular expressions for read name parsing**
-
-The regular expression (regex) is used to parse the read names, so it's important to get it exactly right (so you probably don't want to edit this unless you know exactly what you're doing). The three parts of the read names identified are tile/region, x coordinate, and y coordinate, which are used in conjunction with the optical duplication rate to more accurately estimate library size.
-
-
-
-  </help>
-</tool>
-
-
-
-
-
-
-
-
-
-
-
-
--- a/tools/picard/picard_ReorderSam.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,165 +0,0 @@
-<tool name="Reorder SAM/BAM" id="picard_ReorderSam" version="0.3.0">
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <command interpreter="python">
-    picard_wrapper.py
-      --input=$inputFile
-      #if $source.indexSource == "built-in"
-        --ref="${ filter( lambda x: str( x[0] ) == str( $source.ref ), $__app__.tool_data_tables[ 'picard_indexes' ].get_fields() )[0][-1] }"
-      #else
-        --ref-file=$refFile
-        --species-name=$source.speciesName
-        --build-name=$source.buildName
-        --trunc-names=$source.truncateSeqNames
-      #end if
-      --allow-inc-dict-concord=$allowIncDictConcord
-      --allow-contig-len-discord=$allowContigLenDiscord
-      --output-format=$outputFormat
-      --output=$outFile
-      -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/ReorderSam.jar"
-  </command>
-  <inputs>
-    <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to be reordered"
-           help="If empty, upload or import a SAM/BAM dataset." />
-    <conditional name="source">
-      <param name="indexSource" type="select" label="Select Reference Genome" help="This tool will re-order SAM/BAM in the same order as reference selected below.">
-        <option value="built-in">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="built-in">
-        <param name="ref" type="select" label="Select a reference genome">
-          <options from_data_table="picard_indexes" />
-        </param>
-      </when>
-      <when value="history">
-        <param name="refFile" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" />
-        <param name="speciesName" type="text" value="" label="Species name" />
-        <param name="buildName" type="text" value="" label="Build name" />
-        <param name="truncateSeqNames" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Truncate sequence names after first whitespace" />
-      </when>
-    </conditional>
-    <param name="allowIncDictConcord" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Allow incomplete dict concordance?" help="Allows a partial overlap of the BAM contigs with the new reference sequence contigs." />
-    <param name="allowContigLenDiscord" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Allow contig length discordance?" help="This is dangerous--don't check it unless you know exactly what you're doing!" />
-    <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output BAM instead of SAM" help="Uncheck for SAM output" />
-  </inputs>
-  <outputs>
-    <data name="outFile" format="bam" label="${tool.name} on ${on_string}: reordered ${outputFormat}">
-      <change_format>
-        <when input="outputFormat" value="sam" format="sam" />
-      </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!-- Commands:
-      cp test-data/phiX.fasta .
-      samtools faidx phiX.fasta
-      java -jar CreateSequenceDictionary.jar R=phiX.fasta O=phiX.dict URI=phiX.fasta TRUNCATE_NAMES_AT_WHITESPACE=false SPECIES=phiX174
-      java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input1.bam O=picard_RS_output1.bam REFERENCE=phiX.fasta ALLOW_INCOMPLETE_DICT_CONCORDANCE=false ALLOW_CONTIG_LENGTH_DISCORDANCE=false
-    -->
-      <param name="inputFile" value="picard_RS_input1.bam" />
-      <param name="indexSource" value="history" />
-      <param name="refFile" value="phiX.fasta" />
-      <param name="speciesName" value="phiX174" />
-      <param name="buildName" value="" />
-      <param name="truncateSeqNames" value="false" />
-      <param name="allowIncDictConcord" value="false" />
-      <param name="allowContigLenDiscord" value="false" />
-      <param name="outputFormat" value="True" />
-      <output name="outFile" file="picard_RS_output1.bam" ftype="bam" lines_diff="4" compare="contains" />
-    </test>
-    <test>
-      <!-- Command:
-      java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input2.sam O=picard_RS_output2.sam REFERENCE=/path/to/phiX/picard_index/phiX.fa ALLOW_INCOMPLETE_DICT_CONCORDANCE=false ALLOW_CONTIG_LENGTH_DISCORDANCE=false
-      /path/to/phiX/srma_index/phiX.fa is path to phiX.fa, phiX.fa.fai, and phiX.dict
-      -->
-      <param name="inputFile" value="picard_RS_input2.sam" />
-      <param name="indexSource" value="built-in" />
-      <param name="ref" value="phiX" />
-      <param name="allowIncDictConcord" value="false" />
-      <param name="allowContigLenDiscord" value="false" />
-      <param name="outputFormat" value="False" />
-      <output name="outFile" file="picard_RS_output2.sam" ftype="sam" lines_diff="4" sort="True" />
-    </test>
-    <test>
-      <!-- Commands:
-      cp test-data/picard_RS_input4.fasta .
-      samtools faidx picard_RS_input4.fasta
-      java -jar CreateSequenceDictionary.jar R=picard_RS_input4.fasta O=picard_RS_input4.dict URI=picard_RS_input4.fasta TRUNCATE_NAMES_AT_WHITESPACE=true SPECIES=phiX174 GENOME_ASSEMBLY=phiX_buildBlah1.1
-      java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input3.bam O=picard_RS_output3.sam REFERENCE=picard_RS_input4.fasta ALLOW_INCOMPLETE_DICT_CONCORDANCE=true ALLOW_CONTIG_LENGTH_DISCORDANCE=false
-      picard_RS_input3.bam can be made from picard_RS_input3.sam
-      -->
-      <param name="inputFile" value="picard_RS_input3.bam" />
-      <param name="indexSource" value="history" />
-      <param name="refFile" value="picard_RS_input4.fasta" />
-      <param name="speciesName" value="phiX174" />
-      <param name="buildName" value="phiX_buildBlah1.1" />
-      <param name="truncateSeqNames" value="true" />
-      <param name="allowIncDictConcord" value="true" />
-      <param name="allowContigLenDiscord" value="false" />
-      <param name="outputFormat" value="False" />
-      <output name="outFile" file="picard_RS_output3.sam" ftype="sam" lines_diff="12" sort="True" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**Purpose**
-
-Reorder SAM/BAM to match contig ordering in a particular reference file. Note that this is
-not the same as sorting as done by the SortSam tool, which sorts by either coordinate
-values or query name. The ordering in ReorderSam is based on exact name matching of
-contigs/chromosomes. Reads that are mapped to a contig that is not in the new reference file are
-not included in the output.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for ReorderSam, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
-------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-For the file that needs to be reordered, either a sam file or a bam file must be supplied.
-If a bam file is used, it must be coordinate-sorted. A reference file is also required,
-so either a fasta file should be supplied or a built-in reference can be selected.
-
-The output contains the same reads as the input file but the reads have been rearranged so
-they appear in the same order as the provided reference file. The tool will output either
-bam (the default) or sam, according to user selection. Bam is recommended since it is smaller.
-
-The only extra parameters that can be set are flags for allowing incomplete dict concordance
-and allowing contig length discordance. If incomplete dict concordance is allowed, only a
-partial overlap of the bam contigs with the new reference sequence contigs is required. By
-default it is off, requiring a corresponding contig in the new reference for each read contig.
-If contig length discordance is allowed, contig names that are the same between a read and the
-new reference contig are allowed even if they have different lengths. This is usually not a
-good idea, unless you know exactly what you're doing. It's off by default.
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-
-  </help>
-</tool>
-
-
-
-
-
-
-
-
-
-
-
-
--- a/tools/picard/picard_ReplaceSamHeader.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<tool name="Replace SAM/BAM Header" id="picard_ReplaceSamHeader" version="0.2.0">
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <command interpreter="python">
-    picard_wrapper.py
-      --input "$inputFile"
-      -o $outFile
-      --header-file $headerFile
-      --output-format $outputFormat
-      -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/ReplaceSamHeader.jar"
-      --tmpdir "${__new_file_path__}"
-  </command>
-  <inputs>
-    <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to replace header in (TARGET)"
-      help="If empty, upload or import a SAM/BAM dataset." />
-    <param format="bam,sam" name="headerFile" type="data" label="SAM/BAM to reader header from (SOURCE)"
-      help="If empty, upload or import a SAM/BAM dataset." />
-    <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output BAM instead of SAM" help="Uncheck for SAM output" />
-  </inputs>
-  <outputs>
-    <data name="outFile" format="bam" label="${tool.name} on ${on_string}: ${outputFormat} with replaced header">
-      <change_format>
-        <when input="outputFormat" value="sam" format="sam" />
-      </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!-- Command:
-      java -jar ReplaceSamHeader.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.bam HEADER=test-data/picard_RSH_input1.bam O=picard_RSH_output1.sam
-      picard_RSH_input1.bam can be made from picard_RSH_input1.sam
-      -->
-      <param name="inputFile" value="picard_input_tiny_coord.bam" ftype="bam" />
-      <param name="headerFile" value="picard_RSH_input1.bam" ftype="bam" />
-      <param name="outputFormat" value="False" />
-      <output name="outFile" file="picard_RSH_output1.sam" ftype="sam" />
-    </test>
-    <test>
-      <!-- Command:
-      java -jar ReplaceSamHeader.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.sam HEADER=test-data/picard_RSH_input1.bam O=picard_RSH_output2.sam
-      picard_RSH_input1.bam can be made from picard_RSH_input1.sam
-      -->
-      <param name="inputFile" value="picard_input_tiny_coord.sam" ftype="sam" />
-      <param name="headerFile" value="picard_RSH_input1.bam" ftype="bam" />
-      <param name="outputFormat" value="False" />
-      <output name="outFile" file="picard_RSH_output2.sam" ftype="sam" />
-    </test>
-    <test>
-      <!-- Command:
-      java -jar ReplaceSamHeader.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.sam HEADER=test-data/picard_RSH_input1.sam O=picard_RSH_output2.bam
-      -->
-      <param name="inputFile" value="picard_input_tiny_coord.sam" ftype="sam" />
-      <param name="headerFile" value="picard_RSH_input1.sam" ftype="sam" />
-      <param name="outputFormat" value="True" />
-      <output name="outFile" file="picard_RSH_output2.bam" ftype="bam" />
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**Purpose**
-
-Replace Sam Header with the header from another sam file. The tool does not do any
-significant validation, so it's up to the user to make sure that the elements in
-the header are relevant and that the new header has all the required things.
-
-Replace the SAMFileHeader in a SAM file with the given header. Validation is
-minimal. It is up to the user to ensure that all the elements referred to in the
-SAMRecords are present in the new header. Sort order of the two input files must
-be the same.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for ReplaceSamHeader, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
-------
-
-.. class:: infomark
-
-**Inputs and outputs**
-
-Either a sam file or a bam file is required as the file whose header will be replaced.
-The header file is also required and can also be either sam or bam (it does not have
-to be the same type as the other file). In both cases, if a bam file is used, it must
-be coordinate-sorted. Galaxy currently coordinate-sorts all bam files.
-
-The tool will output either bam (the default) or sam. Bam is recommended since it is smaller.
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-
-
-  </help>
-</tool>
-
-
-
-
-
-
-
-
-
-
-
-
--- a/tools/picard/picard_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,734 +0,0 @@
-#!/usr/bin/env python
-"""
-Originally written by Kelly Vincent
-pretty output and additional picard wrappers by Ross Lazarus for rgenetics
-Runs all available wrapped Picard tools.
-usage: picard_wrapper.py [options]
-code Ross wrote licensed under the LGPL
-see http://www.gnu.org/copyleft/lesser.html
-"""
-
-import optparse, os, sys, subprocess, tempfile, shutil, time, logging
-
-galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" />
-<title></title>
-<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
-</head>
-<body>
-<div class="document">
-"""
-galhtmlattr = """Galaxy tool %s run at %s</b><br/>"""
-galhtmlpostfix = """</div></body></html>\n"""
-
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-
-class PicardBase():
-    """
-    simple base class with some utilities for Picard
-    adapted and merged with Kelly Vincent's code april 2011 Ross
-    lots of changes...
-    """
-
-    def __init__(self, opts=None,arg0=None):
-        """ common stuff needed at init for a picard tool
-        """
-        assert opts <> None, 'PicardBase needs opts at init'
-        self.opts = opts
-        if self.opts.outdir == None:
-             self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir
-        assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in'
-        self.picname = self.baseName(opts.jar)
-        if self.picname.startswith('picard'):
-            self.picname = opts.picard_cmd # special case for some tools like replaceheader?
-        self.progname = self.baseName(arg0)
-        self.version = '0.002'
-        self.delme = [] # list of files to destroy
-        self.title = opts.title
-        self.inputfile = opts.input
-        try:
-            os.makedirs(opts.outdir)
-        except:
-            pass
-        try:
-            os.makedirs(opts.tmpdir)
-        except:
-            pass
-        self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname)
-        self.metricsOut =  os.path.join(opts.outdir,'%s.metrics.txt' % self.picname)
-        self.setLogging(logfname=self.log_filename)
-
-    def baseName(self,name=None):
-        return os.path.splitext(os.path.basename(name))[0]
-
-    def setLogging(self,logfname="picard_wrapper.log"):
-        """setup a logger
-        """
-        logging.basicConfig(level=logging.INFO,
-                    filename=logfname,
-                    filemode='a')
-
-
-    def readLarge(self,fname=None):
-        """ read a potentially huge file.
-        """
-        try:
-            # get stderr, allowing for case where it's very large
-            tmp = open( fname, 'rb' )
-            s = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    more = tmp.read( buffsize )
-                    if len(more) > 0:
-                        s += more
-                    else:
-                        break
-            except OverflowError:
-                pass
-            tmp.close()
-        except Exception, e:
-            stop_err( 'Error : %s' % str( e ) )
-        return s
-
-    def runCL(self,cl=None,output_dir=None):
-        """ construct and run a command line
-        we have galaxy's temp path as opt.temp_dir so don't really need isolation
-        sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts
-        """
-        assert cl <> None, 'PicardBase runCL needs a command line as cl'
-        if output_dir == None:
-            output_dir = self.opts.outdir
-        if type(cl) == type([]):
-            cl = ' '.join(cl)
-        fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt')
-        tlf = open(templog,'wb')
-        fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt')
-        tef = open(temperr,'wb')
-        process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir)
-        rval = process.wait()
-        tlf.close()
-        tef.close()
-        stderrs = self.readLarge(temperr)
-        stdouts = self.readLarge(templog)
-        if len(stderrs) > 0:
-            s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs)
-        else:
-            s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval)
-        logging.info(s)
-        os.unlink(templog) # always
-        os.unlink(temperr) # always
-        return s, stdouts # sometimes this is an output
-
-    def runPic(self, jar, cl):
-        """
-        cl should be everything after the jar file name in the command
-        """
-        runme = ['java -Xmx%s' % self.opts.maxjheap]
-        runme.append('-jar %s' % jar)
-        runme += cl
-        s,stdout = self.runCL(cl=runme, output_dir=self.opts.outdir)
-        return stdout
-
-    def samToBam(self,infile=None,outdir=None):
-        """
-        use samtools view to convert sam to bam
-        """
-        fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam')
-        cl = ['samtools view -h -b -S -o ',tempbam,infile]
-        tlog,stdouts = self.runCL(cl,outdir)
-        return tlog,tempbam
-
-    #def bamToSam(self,infile=None,outdir=None):
-    #    """
-    #    use samtools view to convert bam to sam
-    #    """
-    #    fd,tempsam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.sam')
-    #    cl = ['samtools view -h -o ',tempsam,infile]
-    #    tlog,stdouts = self.runCL(cl,outdir)
-    #    return tlog,tempsam
-
-    def sortSam(self, infile=None,outfile=None,outdir=None):
-        """
-        """
-        print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir)
-        cl = ['samtools sort',infile,outfile]
-        tlog,stdouts = self.runCL(cl,outdir)
-        return tlog
-
-    def cleanup(self):
-        for fname in self.delme:
-            try:
-                os.unlink(fname)
-            except:
-                pass
-
-    def prettyPicout(self,transpose,maxrows):
-        """organize picard outpouts into a report html page
-        """
-        res = []
-        try:
-            r = open(self.metricsOut,'r').readlines()
-        except:
-            r = []
-        if len(r) > 0:
-            res.append('<b>Picard on line resources</b><ul>\n')
-            res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n')
-            res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n')
-            if transpose:
-                res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n')
-            else:
-                res.append('<b>Picard output</b><hr/>\n')
-            res.append('<table cellpadding="3" >\n')
-            dat = []
-            heads = []
-            lastr = len(r) - 1
-            # special case for estimate library complexity hist
-            thist = False
-            for i,row in enumerate(r):
-                if row.strip() > '':
-                    srow = row.split('\t')
-                    if row.startswith('#'):
-                        heads.append(row.strip()) # want strings
-                    else:
-                        dat.append(srow) # want lists
-                    if row.startswith('## HISTOGRAM'):
-                        thist = True
-            if len(heads) > 0:
-                hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)]
-                res += hres
-                heads = []
-            if len(dat) > 0:
-                if transpose and not thist:
-                    tdat = map(None,*dat) # transpose an arbitrary list of lists
-                    tdat = ['<tr class="d%d"><td>%s</td><td>%s&nbsp;</td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)]
-                else:
-                    tdat = ['\t'.join(x).strip() for x in dat] # back to strings :(
-                    tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)]
-                res += tdat
-                dat = []
-            res.append('</table>\n')
-        return res
-
-    def fixPicardOutputs(self,transpose,maxloglines):
-        """
-        picard produces long hard to read tab header files
-        make them available but present them transposed for readability
-        """
-        logging.shutdown()
-        self.cleanup() # remove temp files stored in delme
-        rstyle="""<style type="text/css">
-        tr.d0 td {background-color: oldlace; color: black;}
-        tr.d1 td {background-color: aliceblue; color: black;}
-        </style>"""
-        res = [rstyle,]
-        res.append(galhtmlprefix % self.progname)
-        res.append(galhtmlattr % (self.picname,timenow()))
-        flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')]
-        pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
-        if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs
-            for p in pdflist:
-                imghref = '%s.jpg' % os.path.splitext(p)[0] # removes .pdf
-                res.append('<table cellpadding="10"><tr><td>\n')
-                res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref))
-                res.append('</tr></td></table>\n')
-        if len(flist) > 0:
-            res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>')
-            res.append('<table>\n')
-            for i,f in enumerate(flist):
-                fn = os.path.split(f)[-1]
-                res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn))
-            res.append('</table><p/>\n')
-        pres = self.prettyPicout(transpose,maxloglines)
-        if len(pres) > 0:
-            res += pres
-        l = open(self.log_filename,'r').readlines()
-        llen = len(l)
-        if llen > 0:
-            res.append('<b>Picard Tool Run Log</b><hr/>\n')
-            rlog = ['<pre>',]
-            if llen > maxloglines:
-                n = min(50,int(maxloglines/2))
-                rlog += l[:n]
-                rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines))
-                rlog += l[-n:]
-            else:
-                rlog += l
-            rlog.append('</pre>')
-            if llen > maxloglines:
-                rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename))
-            res += rlog
-        else:
-            res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename)
-        res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
-        res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool')
-        res.append(galhtmlpostfix)
-        outf = open(self.opts.htmlout,'w')
-        outf.write(''.join(res))
-        outf.write('\n')
-        outf.close()
-
-    def makePicInterval(self,inbed=None,outf=None):
-        """
-        picard wants bait and target files to have the same header length as the incoming bam/sam
-        a meaningful (ie accurate) representation will fail because of this - so this hack
-        it would be far better to be able to supply the original bed untouched
-        """
-        assert inbed <> None
-        bed = open(inbed,'r').readlines()
-        thead = os.path.join(self.opts.outdir,'tempSamHead.txt')
-        if self.opts.datatype == 'sam':
-            cl = ['samtools view -H -S',self.opts.input,'>',thead]
-        else:
-            cl = ['samtools view -H',self.opts.input,'>',thead]
-        self.runCL(cl=cl,output_dir=self.opts.outdir)
-        head = open(thead,'r').readlines()
-        s = '## got %d rows of header\n' % (len(head))
-        logging.info(s)
-        o = open(outf,'w')
-        o.write(''.join(head))
-        o.write(''.join(bed))
-        o.close()
-        return outf
-
-    def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None):
-        """
-        interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs!
-        Do the work of removing all the error sequences
-        pysam is cool
-        infile = pysam.Samfile( "-", "r" )
-        outfile = pysam.Samfile( "-", "w", template = infile )
-        for s in infile: outfile.write(s)
-
-        errors from ValidateSameFile.jar look like
-        WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing
-        ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary.
-        ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041
-
-        """
-        assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam
-        assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path'
-        removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2]
-        remDict = dict(zip(removeNames,range(len(removeNames))))
-        infile = pysam.Samfile(insam,'rb')
-        info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict))
-        if len(removeNames) > 0:
-            outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file
-            i = 0
-            j = 0
-            for row in infile:
-                dropme = remDict.get(row.qname,None) # keep if None
-                if not dropme:
-                    outfile.write(row)
-                    j += 1
-                else: # discard
-                    i += 1
-            info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam))
-            outfile.close()
-            infile.close()
-        else: # we really want a nullop or a simple pointer copy
-            infile.close()
-            if newsam:
-                shutil.copy(insam,newsam)
-        logging.info(info)
-
-
-
-def __main__():
-    doFix = False # tools returning htmlfile don't need this
-    doTranspose = True # default
-    maxloglines = 100 # default
-    #Parse Command Line
-    op = optparse.OptionParser()
-    # All tools
-    op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' )
-    op.add_option('-e', '--inputext', default=None)
-    op.add_option('-o', '--output', default=None)
-    op.add_option('-n', '--title', default="Pick a Picard Tool")
-    op.add_option('-t', '--htmlout', default=None)
-    op.add_option('-d', '--outdir', default=None)
-    op.add_option('-x', '--maxjheap', default='4g')
-    op.add_option('-b', '--bisulphite', default='false')
-    op.add_option('-s', '--sortorder', default='query')
-    op.add_option('','--tmpdir', default='/tmp')
-    op.add_option('-j','--jar',default='')
-    op.add_option('','--picard-cmd',default=None)
-    # Many tools
-    op.add_option( '', '--output-format', dest='output_format', help='Output format' )
-    op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' )
-    op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None )
-    # CreateSequenceDictionary
-    op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None )
-    op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' )
-    op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' )
-    op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' )
-    # MarkDuplicates
-    op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' )
-    op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' )
-    # CollectInsertSizeMetrics
-    op.add_option('', '--taillimit', default="0")
-    op.add_option('', '--histwidth', default="0")
-    op.add_option('', '--minpct', default="0.01")
-    # CollectAlignmentSummaryMetrics
-    op.add_option('', '--maxinsert', default="20")
-    op.add_option('', '--adaptors', action='append', type="string")
-    # FixMateInformation and validate
-    # CollectGcBiasMetrics
-    op.add_option('', '--windowsize', default='100')
-    op.add_option('', '--mingenomefrac', default='0.00001')
-    # AddOrReplaceReadGroups
-    op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' )
-    op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' )
-    op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' )
-    op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' )
-    op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' )
-    op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' )
-    op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' )
-    op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' )
-    # ReorderSam
-    op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' )
-    op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' )
-    # ReplaceSamHeader
-    op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' )
-
-    op.add_option('','--assumesorted', default='true')
-    op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*")
-    #estimatelibrarycomplexity
-    op.add_option('','--minid', default="5")
-    op.add_option('','--maxdiff', default="0.03")
-    op.add_option('','--minmeanq', default="20")
-    #hsmetrics
-    op.add_option('','--baitbed', default=None)
-    op.add_option('','--targetbed', default=None)
-    #validate
-    op.add_option('','--ignoreflags', action='append', type="string")
-    op.add_option('','--maxerrors', default=None)
-    op.add_option('','--datatype', default=None)
-    op.add_option('','--bamout', default=None)
-    op.add_option('','--samout', default=None)
-
-    opts, args = op.parse_args()
-    opts.sortme = opts.assumesorted == 'false'
-    assert opts.input <> None
-    # need to add
-    # instance that does all the work
-    pic = PicardBase(opts,sys.argv[0])
-
-    tmp_dir = opts.outdir
-    haveTempout = False # we use this where sam output is an option
-
-    # set ref and dict files to use (create if necessary)
-    ref_file_name = opts.ref
-    if opts.ref_file <> None:
-        csd = 'CreateSequenceDictionary'
-        realjarpath = os.path.split(opts.jar)[0]
-        jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq
-        tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname)
-        ref_file_name = '%s.fasta' % tmp_ref_name
-        # build dict
-        dict_file_name = '%s.dict' % tmp_ref_name
-        os.symlink( opts.ref_file, ref_file_name )
-        cl = ['REFERENCE=%s' % ref_file_name]
-        cl.append('OUTPUT=%s' % dict_file_name)
-        cl.append('URI=%s' % os.path.basename( opts.ref_file ))
-        cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names)
-        if opts.species_name:
-            cl.append('SPECIES=%s' % opts.species_name)
-        if opts.build_name:
-            cl.append('GENOME_ASSEMBLY=%s' % opts.build_name)
-        pic.delme.append(dict_file_name)
-        pic.delme.append(ref_file_name)
-        pic.delme.append(tmp_ref_name)
-        s = pic.runPic(jarpath, cl)
-        # run relevant command(s)
-
-    # define temporary output
-    # if output is sam, it must have that extension, otherwise bam will be produced
-    # specify sam or bam file with extension
-    if opts.output_format == 'sam':
-        suff = '.sam'
-    else:
-        suff = ''
-    tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff )
-
-    cl = ['VALIDATION_STRINGENCY=LENIENT',]
-
-    if pic.picname == 'AddOrReplaceReadGroups':
-        # sort order to match Galaxy's default
-        cl.append('SORT_ORDER=coordinate')
-        # input
-        cl.append('INPUT=%s' % opts.input)
-        # outputs
-        cl.append('OUTPUT=%s' % tempout)
-        # required read groups
-        cl.append('RGLB="%s"' % opts.rg_library)
-        cl.append('RGPL="%s"' % opts.rg_platform)
-        cl.append('RGPU="%s"' % opts.rg_plat_unit)
-        cl.append('RGSM="%s"' % opts.rg_sample)
-        if opts.rg_id:
-            cl.append('RGID="%s"' % opts.rg_id)
-        # optional read groups
-        if opts.rg_seq_center:
-            cl.append('RGCN="%s"' % opts.rg_seq_center)
-        if opts.rg_desc:
-            cl.append('RGDS="%s"' % opts.rg_desc)
-        pic.runPic(opts.jar, cl)
-        haveTempout = True
-
-    elif pic.picname == 'BamIndexStats':
-        tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir )
-        tmp_bam_name = '%s.bam' % tmp_name
-        tmp_bai_name = '%s.bai' % tmp_bam_name
-        os.symlink( opts.input, tmp_bam_name )
-        os.symlink( opts.bai_file, tmp_bai_name )
-        cl.append('INPUT=%s' % ( tmp_bam_name ))
-        pic.delme.append(tmp_bam_name)
-        pic.delme.append(tmp_bai_name)
-        pic.delme.append(tmp_name)
-        s = pic.runPic( opts.jar, cl )
-        f = open(pic.metricsOut,'a')
-        f.write(s) # got this on stdout from runCl
-        f.write('\n')
-        f.close()
-        doTranspose = False # but not transposed
-
-    elif pic.picname == 'EstimateLibraryComplexity':
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % pic.metricsOut)
-        if float(opts.minid) > 0:
-            cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid)
-        if float(opts.maxdiff) > 0.0:
-            cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff)
-        if float(opts.minmeanq) > 0:
-            cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq)
-        if opts.readregex > '':
-            cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
-        if float(opts.optdupdist) > 0:
-            cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
-        pic.runPic(opts.jar,cl)
-
-    elif pic.picname == 'CollectAlignmentSummaryMetrics':
-        # Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
-        # why? Dunno Seems to work without complaining if the .bai file is AWOL....
-        fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
-        try:
-            os.symlink(ref_file_name,fakefasta)
-        except:
-            s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
-            info = s
-            shutil.copy(ref_file_name,fakefasta)
-        pic.delme.append(fakefasta)
-        cl.append('ASSUME_SORTED=%s' % opts.assumesorted)
-        adaptorseqs = ''.join([' ADAPTER_SEQUENCE=%s' % x for x in opts.adaptors])
-        cl.append(adaptorseqs)
-        cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite)
-        cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert)
-        cl.append('OUTPUT=%s' % pic.metricsOut)
-        cl.append('R=%s' % fakefasta)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        if not opts.assumesorted.lower() == 'true': # we need to sort input
-            fakeinput = '%s.sorted' % opts.input
-            s = pic.sortSam(opts.input, fakeinput, opts.outdir)
-            pic.delme.append(fakeinput)
-            cl.append('INPUT=%s' % fakeinput)
-        else:
-            cl.append('INPUT=%s' % os.path.abspath(opts.input))
-        pic.runPic(opts.jar,cl)
-
-
-    elif pic.picname == 'CollectGcBiasMetrics':
-        assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name
-        # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
-        # why? Dunno
-        fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
-        try:
-            os.symlink(ref_file_name,fakefasta)
-        except:
-            s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
-            info = s
-            shutil.copy(ref_file_name,fakefasta)
-        pic.delme.append(fakefasta)
-        x = 'rgPicardGCBiasMetrics'
-        pdfname = '%s.pdf' % x
-        jpgname = '%s.jpg' % x
-        tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out')
-        temppdf = os.path.join(opts.outdir,pdfname)
-        cl.append('R=%s' % fakefasta)
-        cl.append('WINDOW_SIZE=%s' % opts.windowsize)
-        cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac)
-        cl.append('INPUT=%s' % opts.input)
-        cl.append('OUTPUT=%s' % tempout)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        cl.append('CHART_OUTPUT=%s' % temppdf)
-        cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut)
-        pic.runPic(opts.jar,cl)
-        if os.path.isfile(temppdf):
-            cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find
-            s,stdouts = pic.runCL(cl=cl2,output_dir=opts.outdir)
-        else:
-            s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf
-        lf = open(pic.log_filename,'a')
-        lf.write(s)
-        lf.write('\n')
-        lf.close()
-
-    elif pic.picname == 'CollectInsertSizeMetrics':
-        isPDF = 'InsertSizeHist.pdf'
-        pdfpath = os.path.join(opts.outdir,isPDF)
-        histpdf = 'InsertSizeHist.pdf'
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % pic.metricsOut)
-        cl.append('HISTOGRAM_FILE=%s' % histpdf)
-        if opts.taillimit <> '0':
-            cl.append('TAIL_LIMIT=%s' % opts.taillimit)
-        if  opts.histwidth <> '0':
-            cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth)
-        if float( opts.minpct) > 0.0:
-            cl.append('MINIMUM_PCT=%s' % opts.minpct)
-        pic.runPic(opts.jar,cl)
-        if os.path.exists(pdfpath): # automake thumbnail - will be added to html
-            cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
-            s,stdouts = pic.runCL(cl=cl2,output_dir=opts.outdir)
-        else:
-            s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
-            s += 'This <b>always happens if single ended data was provided</b> to this tool,\n'
-            s += 'so please double check that your input data really is paired-end NGS data.<br/>\n'
-            s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>'
-            stdouts = ''
-        logging.info(s)
-        if len(stdouts) > 0:
-           logging.info(stdouts)
-
-    elif pic.picname == 'MarkDuplicates':
-        # assume sorted even if header says otherwise
-        cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
-        # input
-        cl.append('INPUT=%s' % opts.input)
-        # outputs
-        cl.append('OUTPUT=%s' % opts.output)
-        cl.append('METRICS_FILE=%s' % pic.metricsOut )
-        # remove or mark duplicates
-        cl.append('REMOVE_DUPLICATES=%s' % opts.remdups)
-        # the regular expression to be used to parse reads in incoming SAM file
-        cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
-        # maximum offset between two duplicate clusters
-        cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
-        pic.runPic(opts.jar, cl)
-
-    elif pic.picname == 'FixMateInformation':
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % tempout)
-        cl.append('SORT_ORDER=%s' % opts.sortorder)
-        pic.runPic(opts.jar,cl)
-        haveTempout = True
-
-    elif pic.picname == 'ReorderSam':
-        # input
-        cl.append('INPUT=%s' % opts.input)
-        # output
-        cl.append('OUTPUT=%s' % tempout)
-        # reference
-        cl.append('REFERENCE=%s' % ref_file_name)
-        # incomplete dict concordance
-        if opts.allow_inc_dict_concord == 'true':
-            cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true')
-        # contig length discordance
-        if opts.allow_contig_len_discord == 'true':
-            cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true')
-        pic.runPic(opts.jar, cl)
-        haveTempout = True
-
-    elif pic.picname == 'ReplaceSamHeader':
-        cl.append('INPUT=%s' % opts.input)
-        cl.append('OUTPUT=%s' % tempout)
-        cl.append('HEADER=%s' % opts.header_file)
-        pic.runPic(opts.jar, cl)
-        haveTempout = True
-
-    elif pic.picname == 'CalculateHsMetrics':
-        maxloglines = 100
-        baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait')
-        targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target')
-        baitf = pic.makePicInterval(opts.baitbed,baitfname)
-        if opts.targetbed == opts.baitbed: # same file sometimes
-            targetf = baitf
-        else:
-            targetf = pic.makePicInterval(opts.targetbed,targetfname)
-        cl.append('BAIT_INTERVALS=%s' % baitf)
-        cl.append('TARGET_INTERVALS=%s' % targetf)
-        cl.append('INPUT=%s' % os.path.abspath(opts.input))
-        cl.append('OUTPUT=%s' % pic.metricsOut)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        pic.runPic(opts.jar,cl)
-
-    elif pic.picname == 'ValidateSamFile':
-        import pysam
-        doTranspose = False
-        sortedfile = os.path.join(opts.outdir,'rgValidate.sorted')
-        stf = open(pic.log_filename,'w')
-        tlog = None
-        if opts.datatype == 'sam': # need to work with a bam
-            tlog,tempbam = pic.samToBam(opts.input,opts.outdir)
-            try:
-                tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
-            except:
-                print '## exception on sorting sam file %s' % opts.input
-        else: # is already bam
-            try:
-                tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
-            except: # bug - [bam_sort_core] not being ignored - TODO fixme
-                print '## exception on sorting bam file %s' % opts.input
-        if tlog:
-            print '##tlog=',tlog
-            stf.write(tlog)
-            stf.write('\n')
-        sortedfile = '%s.bam' % sortedfile # samtools does that
-        cl.append('O=%s' % pic.metricsOut)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        cl.append('I=%s' % sortedfile)
-        opts.maxerrors = '99999999'
-        cl.append('MAX_OUTPUT=%s' % opts.maxerrors)
-        if opts.ignoreflags[0] <> 'None': # picard error values to ignore
-            igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None']
-            cl.append(' '.join(igs))
-        if opts.bisulphite.lower() <> 'false':
-            cl.append('IS_BISULFITE_SEQUENCED=true')
-        if opts.ref <> None or opts.ref_file <> None:
-            cl.append('R=%s' %  ref_file_name)
-        pic.runPic(opts.jar,cl)
-        if opts.datatype == 'sam':
-            pic.delme.append(tempbam)
-        newsam = opts.output
-        outformat = 'bam'
-        pe = open(pic.metricsOut,'r').readlines()
-        pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat)
-        pic.delme.append(sortedfile) # not wanted
-        stf.close()
-        pic.cleanup()
-    else:
-        print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname
-        sys.exit(1)
-    if haveTempout:
-        # Some Picard tools produced a potentially intermediate bam file.
-        # Either just move to final location or create sam
-        shutil.move(tempout, os.path.abspath(opts.output))
-
-    if opts.htmlout <> None or doFix: # return a pretty html page
-        pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines)
-
-if __name__=="__main__": __main__()
-
--- a/tools/picard/rgPicardASMetrics.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-<tool name="SAM/BAM Alignment Summary Metrics" id="PicardASMetrics" version="0.03">
-  <command interpreter="python">
-    picard_wrapper.py -i "$input_file" -d "$html_file.files_path" -t "$html_file"
-    --assumesorted "$sorted" -b "$bisulphite" --adaptors "$adaptors" --maxinsert "$maxinsert" -n "$out_prefix"
-    -j ${GALAXY_DATA_INDEX_DIR}/shared/jars/CollectAlignmentSummaryMetrics.jar
-#if $genomeSource.refGenomeSource == "history":
-    --ref-file "$genomeSource.ownFile"
-#else
-    --ref "${ filter( lambda x: str( x[0] ) == str( $genomeSource.index ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }"
-#end if
-  </command>
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <inputs>
-    <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for"
-      help="If empty, upload or import a SAM/BAM dataset."/>
-    <param name="out_prefix" value="Picard Alignment Summary Metrics" type="text"
-      label="Title for the output file" help="Use this remind you what the job was for." size="80" />
-
-      <conditional name="genomeSource">
-
-      <param name="refGenomeSource" type="select" label="Select Reference Genome">
-        <option value="default" selected="true">Use the assigned data genome/build</option>
-        <option value="indexed">Select a different built-in genome</option>
-        <option value="history">Use a genome (fasta format) from my history</option>
-      </param>
-      <when value="default">
-        <param name="index" type="select" label="Check the assigned reference genome" help="Galaxy thinks that the reads in you dataset were aligned against this reference. If this is not correct, use the 'Select a build-in reference genome' option of the 'Select Reference Genome' dropdown to select approprtiate Reference.">
-          <options from_data_table="all_fasta">
-          <filter type="data_meta" ref="input_file" key="dbkey" column="dbkey" multiple="True" separator="," />
-          <validator type="no_options" message="No reference build available for selected input" />
-          </options>
-        </param>
-      </when>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a built-in reference genome" help="This list contains genomes cached at this Galaxy instance. If your genome of interest is not present here request it by using 'Help' link at the top of Galaxy interface or use the 'Use a genome (fasta format) from my history' option of the 'Select Reference Genome' dropdown.">
-          <options from_data_table="all_fasta">
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome from history" help="This option works best for relatively small genomes. If you are working with large human-sized genomes, send request to Galaxy team for adding your reference to this Galaxy instance by using 'Help' link at the top of Galaxy interface."/>
-      </when>
-    </conditional>
-    <param name="sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false"/>
-    <param name="bisulphite" type="boolean" label="Input file contains Bisulphite sequenced reads" checked="false" falsevalue="false" truevalue="true" />
-    <param name="adaptors" value="" type="text" area="true" label="Adapter sequences" help="One per line if multiple" size="5x120" />
-    <param name="maxinsert" value="100000" type="integer" label="Larger paired end reads and inter-chromosomal pairs considered chimeric " size="20" />
-  </inputs>
-  <outputs>
-    <data format="html" name="html_file"  label="${out_prefix}.html" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="out_prefix" value="AsMetrics" />
-      <param name="bisulphite" value="false" />
-      <param name="sorted" value="true" />
-      <param name="adaptors" value="" />
-      <param name="maxinsert" value="100000" />
-      <param name="refGenomeSource" value="history" />
-      <param name="ownFile" value="picard_input_hg18.trimmed.fasta" />
-      <param name="input_file" value="picard_input_tiny.sam" dbkey="hg18" />
-      <output name="html_file" file="picard_output_alignment_summary_metrics.html" ftype="html" lines_diff="55"/>
-    </test>
-    <test>
-      <param name="out_prefix" value="AsMetricsIndexed" />
-      <param name="bisulphite" value="false" />
-      <param name="sorted" value="true" />
-      <param name="adaptors" value="" />
-      <param name="maxinsert" value="100000" />
-      <param name="refGenomeSource" value="indexed" />
-      <param name="index" value="hg19" />
-      <param name="input_file" value="picard_input_sorted_pair.sam" dbkey="hg19" />
-      <output name="html_file" file="picard_output_AsMetrics_indexed_hg18_sorted_pair.html" ftype="html" lines_diff="50"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**Summary**
-
-This Galaxy tool uses Picard to report high-level measures of alignment based on a provided sam or bam file.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for CollectAlignmentSummaryMetrics, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
------
-
-.. class:: infomark
-
-**Syntax**
-
-- **Input** - SAM/BAM format aligned short read data in your current history
-- **Title** - the title to use for all output files from this job - use it for high level metadata
-- **Reference Genome** - Galaxy (and Picard) needs to know which genomic reference was used to generate alignemnts within the input SAM/BAM dataset. Here you have three choices:
-
-  - *Assigned data genome/build* - a genome specified for this dataset. If you your SAM/BAM dataset has an assigned reference genome it will be displayed below this dropdown. If it does not -> use one of the following two options.
-  - *Select a different built-in genome* - this option will list all reference genomes presently cached at this instance of Galaxy.
-  - *Select a reference genome from history* - alternatively you can upload your own version of reference genome into your history and use it with this option. This is however not advisable with large human-sized genomes. If your genome is large contact Galaxy team using "Help" link at the top of the interface and provide exact details on where we can download sequences you would like to use as the refenece. We will then install them as a part of locally cached genomic references.
-
-- **Assume Sorted** - saves sorting time - but only if true!
-- **Bisulphite data** - see Picard documentation http://picard.sourceforge.net/command-line-overview.shtml#CollectAlignmentSummaryMetrics
-- **Maximum acceptable insertion length** - see Picard documentation at http://picard.sourceforge.net/command-line-overview.shtml#CollectAlignmentSummaryMetrics
-
------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-The Picard documentation (reformatted for Galaxy) says:
-
-.. csv-table::
-   :header-rows: 1
-
-    Option,Description
-    "INPUT=File","SAM or BAM file Required."
-    "OUTPUT=File","File to write insert size metrics to Required."
-    "REFERENCE_SEQUENCE=File","Reference sequence file Required."
-    "ASSUME_SORTED=Boolean","If true (default), unsorted SAM/BAM files will be considerd coordinate sorted "
-    "MAX_INSERT_SIZE=Integer","Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. Default value: 100000."
-    "ADAPTER_SEQUENCE=String","This option may be specified 0 or more times. "
-    "IS_BISULFITE_SEQUENCED=Boolean","Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. "
-    "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created."
-
-The output produced by the tool has the following columns::
-
-  1. CATEGORY: One of either UNPAIRED (for a fragment run), FIRST_OF_PAIR when metrics are for only the first read in a paired run, SECOND_OF_PAIR when the metrics are for only the second read in a paired run or PAIR when the metrics are aggregeted for both first and second reads in a pair.
-  2. TOTAL_READS: The total number of reads including all PF and non-PF reads. When CATEGORY equals PAIR this value will be 2x the number of clusters.
-  3. PF_READS: The number of PF reads where PF is defined as passing Illumina's filter.
-  4. PCT_PF_READS: The percentage of reads that are PF (PF_READS / TOTAL_READS)
-  5. PF_NOISE_READS: The number of PF reads that are marked as noise reads. A noise read is one which is composed entirey of A bases and/or N bases. These reads are marked as they are usually artifactual and are of no use in downstream analysis.
-  6. PF_READS_ALIGNED: The number of PF reads that were aligned to the reference sequence. This includes reads that aligned with low quality (i.e. their alignments are ambiguous).
-  7. PCT_PF_READS_ALIGNED: The percentage of PF reads that aligned to the reference sequence. PF_READS_ALIGNED / PF_READS
-  8. PF_HQ_ALIGNED_READS: The number of PF reads that were aligned to the reference sequence with a mapping quality of Q20 or higher signifying that the aligner estimates a 1/100 (or smaller) chance that the alignment is wrong.
-  9. PF_HQ_ALIGNED_BASES: The number of bases aligned to the reference sequence in reads that were mapped at high quality. Will usually approximate PF_HQ_ALIGNED_READS * READ_LENGTH but may differ when either mixed read lengths are present or many reads are aligned with gaps.
- 10. PF_HQ_ALIGNED_Q20_BASES: The subest of PF_HQ_ALIGNED_BASES where the base call quality was Q20 or higher.
- 11. PF_HQ_MEDIAN_MISMATCHES: The median number of mismatches versus the reference sequence in reads that were aligned to the reference at high quality (i.e. PF_HQ_ALIGNED READS).
- 12. PF_HQ_ERROR_RATE: The percentage of bases that mismatch the reference in PF HQ aligned reads.
- 13. MEAN_READ_LENGTH: The mean read length of the set of reads examined. When looking at the data for a single lane with equal length reads this number is just the read length. When looking at data for merged lanes with differing read lengths this is the mean read length of all reads.
- 14. READS_ALIGNED_IN_PAIRS: The number of aligned reads who's mate pair was also aligned to the reference.
- 15. PCT_READS_ALIGNED_IN_PAIRS: The percentage of reads who's mate pair was also aligned to the reference. READS_ALIGNED_IN_PAIRS / PF_READS_ALIGNED
- 16. BAD_CYCLES: The number of instrument cycles in which 80% or more of base calls were no-calls.
- 17. STRAND_BALANCE: The number of PF reads aligned to the positive strand of the genome divided by the number of PF reads aligned to the genome.
- 18. PCT_CHIMERAS: The percentage of reads that map outside of a maximum insert size (usually 100kb) or that have the two ends mapping to different chromosomes.
- 19. PCT_ADAPTER: The percentage of PF reads that are unaligned and match to a known adapter sequence right from the start of the read.
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-
-  </help>
-</tool>
-
--- a/tools/picard/rgPicardFixMate.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-<tool name="Paired Read Mate Fixer" id="rgPicFixMate" version="0.2.0">
-  <description>for paired data</description>
-  <command interpreter="python">
-   picard_wrapper.py -i "$input_file" -o "$out_file" --tmpdir "${__new_file_path__}" -n "$out_prefix"
-   --output-format "$outputFormat" -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/FixMateInformation.jar" --sortorder "$sortOrder"
-  </command>
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <inputs>
-    <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to fix"
-      help="If empty, upload or import a SAM/BAM dataset."/>
-      <param name="sortOrder" type="select" help="If in doubt, leave as default and read Picard/Samtools documentation"
-         label="Sort order">
-        <option value="coordinate" selected ="true">Coordinate sort</option>
-        <option value="queryname">Query name sort</option>
-        <option value="unsorted">Unsorted - docs not clear if this means unchanged or not</option>
-      </param>
-    <param name="out_prefix" value="Fix Mate" type="text"
-      label="Title for the output file" help="Use this remind you what the job was for." size="80" />
-    <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output BAM instead of SAM" help="Uncheck for SAM output" />
-  </inputs>
-  <outputs>
-    <data format="bam" name="out_file" label="${tool.name} on ${on_string}: ${outputFormat} with fixed mates">
-    <change_format>
-     <when input="outputFormat" value="sam" format="sam" />
-    </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="picard_input_sorted_pair.sam" />
-      <param name="sortOrder" value="coordinate" />
-      <param name="outputFormat" value="True" />
-      <param name="out_prefix" value="Test FixMate" />
-      <output name="out_file" file="picard_output_fixmate_sorted_pair.bam" ftype="bam" />
-    </test>
-    <test>
-      <param name="input_file" value="picard_input_sorted_pair.sam" />
-      <param name="sortOrder" value="coordinate" />
-      <param name="outputFormat" value="False" />
-      <param name="out_prefix" value="Test FixMate" />
-      <output name="out_file" file="picard_output_fixmate_sorted_pair.sam" ftype="sam" />
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**Purpose**
-
-Ensure that all mate-pair information is in sync between each read and it's mate pair.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for FixMateInformation, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
-.. class:: warningmark
-
-**Useful for paired data only**
-
-Likely won't do anything helpful for single end sequence data
-Currently, Galaxy doesn't distinguish paired from single ended SAM/BAM so make sure
-the data you choose are valid (paired end) SAM or BAM data - unless you trust this
-tool not to harm your data.
-
------
-
-.. class:: infomark
-
-**Syntax**
-
-- **Input** - a paired read sam/bam format aligned short read data in your current history
-- **Sort order** - can be used to adjust the ordering of reads
-- **Title** - the title to use for all output files from this job - use it for high level metadata
-- **Output Format** - either SAM or compressed as BAM
-
------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-.. csv-table::
-
-   :header-rows: 1
-
-  Option,Description
-  "INPUT=File","The input file to fix. This option may be specified 0 or more times."
-  "OUTPUT=File","The output file to write to"
-  "SORT_ORDER=SortOrder","Optional sort order if the OUTPUT file should be sorted differently than the INPUT file. Default value: null. Possible values: {unsorted, queryname, coordinate}"
-  "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false"
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-
-  </help>
-</tool>
-
-
--- a/tools/picard/rgPicardGCBiasMetrics.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-<tool name="SAM/BAM GC Bias Metrics" id="PicardGCBiasMetrics" version="0.01">
-  <command interpreter="python">
-    picard_wrapper.py -i "$input_file" -d "$html_file.files_path" -t "$html_file"
-    --windowsize "$windowsize" --mingenomefrac "$mingenomefrac" -n "$out_prefix" --tmpdir "${__new_file_path__}"
-    -j ${GALAXY_DATA_INDEX_DIR}/shared/jars/CollectGcBiasMetrics.jar
-#if $genomeSource.refGenomeSource == "history":
- --ref-file "$genomeSource.ownFile"
-#else:
- --ref "${ filter( lambda x: str( x[0] ) == str( $genomeSource.index ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }"
-#end if
-  </command>
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <inputs>
-    <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generateGC bias metrics"
-      help="If empty, upload or import a SAM/BAM dataset."/>
-    <param name="out_prefix" value="Short Read GC Bias Metrics" type="text"
-      label="Title for the output file" help="Use this remind you what the job was for." size="80" />
-    <conditional name="genomeSource">
-      <param name="refGenomeSource" type="select" label="Select Reference Genome">
-        <option value="default" selected="true">Use the assigned data genome/build</option>
-        <option value="indexed">Select a different built-in genome</option>
-        <option value="history">Use a genome (fasta format) from my history</option>
-      </param>
-      <when value="default">
-        <param name="index" type="select" label="Check the assigned reference genome" help="Galaxy thinks that the reads in you dataset were aligned against this reference. If this is not correct, use the 'Select a build-in reference genome' option of the 'Select Reference Genome' dropdown to select approprtiate Reference.">
-          <options from_data_table="all_fasta">
-          <filter type="data_meta" ref="input_file" key="dbkey" column="dbkey" multiple="True" separator=","/>
-          <validator type="no_options" message="No reference build available for the selected input data" />
-          </options>
-        </param>
-      </when>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a built-in reference genome" help="This list contains genomes cached at this Galaxy instance. If your genome of interest is not present here request it by using 'Help' link at the top of Galaxy interface or use the 'Use a genome (fasta format) from my history' option of the 'Select Reference Genome' dropdown.">
-          <options from_data_table="all_fasta"/>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome from history" help="This option works best for relatively small genomes. If you are working with large human-sized genomes, send request to Galaxy team for adding your reference to this Galaxy instance by using 'Help' link at the top of Galaxy interface."/>
-      </when>
-    </conditional>
-    <param name="windowsize" type="integer" label="GC minimum window size" value="100"
-    help="The size of windows on the genome that are used to bin reads. Default value: 100."/>
-    <param name="mingenomefrac" value="0.00001" type="float" label="Minimum Genome Fraction"
-    help="For summary metrics, exclude GC windows that include less than this fraction of the genome. Default value: 1.0E-5." />
-    <!--
-
-    Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag.
-    If commented out the heapsize defaults to the value specified within picard_wrapper.py
-
-    <param name="maxheap" type="select" help="If in doubt, choose 8G and read Picard documentation please"
-     label="Java heap size">
-    <option value="1G">1GB: very small data</option>
-    <option value="2G" selected="true">2GB</option>
-    <option value="4G">4GB for larger datasets </option>
-    <option value="8G" >8GB use if 4GB fails</option>
-    <option value="16G">16GB - try this if 8GB fails</option>
-    </param>
-
-    -->
-
-  </inputs>
-  <outputs>
-    <data format="html" name="html_file"  label="${out_prefix}.html"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="maxheap" value="8G" />
-      <param name="out_prefix" value="CollectGCBias" />
-      <param name="windowsize" value="100" />
-      <param name="mingenomefrac" value="0.00001" />
-      <param name="refGenomeSource" value="history" />
-      <param name="ownFile" value="picard_input_hg18.trimmed.fasta" dbkey="hg18" />
-      <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" dbkey="hg18"/>
-      <output name="html_file" file="picard_output_GcBias_uploaded_hg18_summary_alignment_stats.html" ftype="html" lines_diff="50"/>
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**Summary**
-
-This Galaxy tool uses Picard to report detailed metrics about reads that fall within windows of a certain GC bin on the reference genome.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for CollectGcBiasMetrics, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
------
-
-.. class:: infomark
-
-**Syntax**
-
-- **Input** - SAM/BAM format aligned short read data in your current history
-- **Title** - the title to use for all output files from this job - use it for high level metadata
-- **Reference Genome** - Galaxy (and Picard) needs to know which genomic reference was used to generate alignemnts within the input SAM/BAM dataset. Here you have three choices:
-
-  - *Assigned data genome/build* - a genome specified for this dataset. If you your SAM/BAM dataset has an assigned reference genome it will be displayed below this dropdown. If it does not -> use one of the following two options.
-  - *Select a different built-in genome* - this option will list all reference genomes presently cached at this instance of Galaxy.
-  - *Select a reference genome from history* - alternatively you can upload your own version of reference genome into your history and use it with this option. This is however not advisable with large human-sized genomes. If your genome is large contact Galaxy team using "Help" link at the top of the interface and provide exact details on where we can download sequences you would like to use as the refenece. We will then install them as a part of locally cached genomic references.
-
-- **Window Size** see Picard documentation http://picard.sourceforge.net/command-line-overview.shtml#CollectGCBiasMetrics
-- **Minimum Genome Fraction** See Picard documentation at http://picard.sourceforge.net/command-line-overview.shtml#CollectGCBiasMetrics
-
------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-The Picard documentation (reformatted for Galaxy) says:
-
-.. csv-table::
-   :header-rows: 1
-
-    Option,Description
-    "REFERENCE_SEQUENCE=File","The reference sequence fasta file. Required."
-    "INPUT=File","The BAM or SAM file containing aligned reads. Required."
-    "OUTPUT=File","The text file to write the metrics table to. Required."
-    "CHART_OUTPUT=File","The PDF file to render the chart to. Required."
-    "SUMMARY_OUTPUT=File","The text file to write summary metrics to. Default value: null."
-    "WINDOW_SIZE=Integer","The size of windows on the genome that are used to bin reads. Default value: 100."
-    "MINIMUM_GENOME_FRACTION=Double","For summary metrics, exclude GC windows that include less than this fraction of the genome. Default value: 1.0E-5."
-    "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false."
-
-The output produced by the tool has the following columns::
-
- 1. GC: The G+C content of the reference sequence represented by this bin. Values are from 0% to 100%
- 2. WINDOWS: The number of windows on the reference genome that have this G+C content.
- 3. READ_STARTS: The number of reads who's start position is at the start of a window of this GC.
- 4. MEAN_BASE_QUALITY: The mean quality (determined via the error rate) of all bases of all reads that are assigned to windows of this GC.
- 5. NORMALIZED_COVERAGE: The ration of "coverage" in this GC bin vs. the mean coverage of all GC bins. A number of 1 represents mean coverage, a number less than one represents lower than mean coverage (e.g. 0.5 means half as much coverage as average) while a number greater than one represents higher than mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average).
- 6. ERROR_BAR_WIDTH: The radius of error bars in this bin based on the number of observations made. For example if the normalized coverage is 0.75 and the error bar width is 0.1 then the error bars would be drawn from 0.65 to 0.85.
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-  </help>
-</tool>
--- a/tools/picard/rgPicardHsMetrics.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-<tool name="SAM/BAM Hybrid Selection Metrics" id="PicardHsMetrics" version="0.01">
-  <description>for targeted resequencing data</description>
-  <command interpreter="python">
-
-    picard_wrapper.py -i "$input_file" -d "$html_file.files_path" -t "$html_file" --datatype "$input_file.ext"
-    --baitbed "$bait_bed" --targetbed "$target_bed" -n "$out_prefix" --tmpdir "${__new_file_path__}"
-    -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/CalculateHsMetrics.jar"
-
-  </command>
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <inputs>
-    <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for" />
-    <param name="out_prefix" value="Picard HS Metrics" type="text" label="Title for the output file" help="Use to remind you what the job was for." size="80" />
-    <param name="bait_bed" type="data" format="interval" label="Bait intervals: Sequences for bait in the design" help="In UCSC BED format" size="80" />
-    <param name="target_bed" type="data" format="interval" label="Target intervals: Sequences for targets in the design" help="In UCSC BED format" size="80" />
-    <!--
-
-    Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag.
-    If commented out the heapsize defaults to the value specified within picard_wrapper.py
-
-    <param name="maxheap" type="select"
-       help="If in doubt, try the default. If it fails with a complaint about java heap size, try increasing it please - larger jobs will require your own hardware."
-     label="Java heap size">
-    <option value="4G" selected = "true">4GB default </option>
-    <option value="8G" >8GB use if 4GB fails</option>
-    <option value="16G">16GB - try this if 8GB fails</option>
-    </param>
-
-    -->
-  </inputs>
-  <outputs>
-    <data format="html" name="html_file" label="${out_prefix}.html" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="out_prefix" value="HSMetrics" />
-      <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" />
-      <param name="bait_bed" value="picard_input_bait.bed" />
-      <param name="target_bed" value="picard_input_bait.bed"  />
-      <param name="maxheap" value="8G"  />
-      <output name="html_file" file="picard_output_hs_transposed_summary_alignment_stats.html" ftype="html" lines_diff="212"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**Summary**
-
-Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for CollectAlignmentSummaryMetrics, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-Picard documentation says (reformatted for Galaxy):
-
-Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
-
-.. csv-table::
-   :header-rows: 1
-
-   "Option", "Description"
-   "BAIT_INTERVALS=File","An interval list file that contains the locations of the baits used. Required."
-   "TARGET_INTERVALS=File","An interval list file that contains the locations of the targets. Required."
-   "INPUT=File","An aligned SAM or BAM file. Required."
-   "OUTPUT=File","The output file to write the metrics to. Required. Cannot be used in conjuction with option(s) METRICS_FILE (M)"
-   "METRICS_FILE=File","Legacy synonym for OUTPUT, should not be used. Required. Cannot be used in conjuction with option(s) OUTPUT (O)"
-   "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false"
-
-HsMetrics
-
- The set of metrics captured that are specific to a hybrid selection analysis.
-
-Output Column Definitions::
-
-  1. BAIT_SET: The name of the bait set used in the hybrid selection.
-  2. GENOME_SIZE: The number of bases in the reference genome used for alignment.
-  3. BAIT_TERRITORY: The number of bases which have one or more baits on top of them.
-  4. TARGET_TERRITORY: The unique number of target bases in the experiment where target is usually exons etc.
-  5. BAIT_DESIGN_EFFICIENCY: Target terrirtoy / bait territory. 1 == perfectly efficient, 0.5 = half of baited bases are not target.
-  6. TOTAL_READS: The total number of reads in the SAM or BAM file examine.
-  7. PF_READS: The number of reads that pass the vendor's filter.
-  8. PF_UNIQUE_READS: The number of PF reads that are not marked as duplicates.
-  9. PCT_PF_READS: PF reads / total reads. The percent of reads passing filter.
- 10. PCT_PF_UQ_READS: PF Unique Reads / Total Reads.
- 11. PF_UQ_READS_ALIGNED: The number of PF unique reads that are aligned with mapping score > 0 to the reference genome.
- 12. PCT_PF_UQ_READS_ALIGNED: PF Reads Aligned / PF Reads.
- 13. PF_UQ_BASES_ALIGNED: The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps.
- 14. ON_BAIT_BASES: The number of PF aligned bases that mapped to a baited region of the genome.
- 15. NEAR_BAIT_BASES: The number of PF aligned bases that mapped to within a fixed interval of a baited region, but not on a baited region.
- 16. OFF_BAIT_BASES: The number of PF aligned bases that mapped to neither on or near a bait.
- 17. ON_TARGET_BASES: The number of PF aligned bases that mapped to a targetted region of the genome.
- 18. PCT_SELECTED_BASES: On+Near Bait Bases / PF Bases Aligned.
- 19. PCT_OFF_BAIT: The percentage of aligned PF bases that mapped neither on or near a bait.
- 20. ON_BAIT_VS_SELECTED: The percentage of on+near bait bases that are on as opposed to near.
- 21. MEAN_BAIT_COVERAGE: The mean coverage of all baits in the experiment.
- 22. MEAN_TARGET_COVERAGE: The mean coverage of targets that recieved at least coverage depth = 2 at one base.
- 23. PCT_USABLE_BASES_ON_BAIT: The number of aligned, de-duped, on-bait bases out of the PF bases available.
- 24. PCT_USABLE_BASES_ON_TARGET: The number of aligned, de-duped, on-target bases out of the PF bases available.
- 25. FOLD_ENRICHMENT: The fold by which the baited region has been amplified above genomic background.
- 26. ZERO_CVG_TARGETS_PCT: The number of targets that did not reach coverage=2 over any base.
- 27. FOLD_80_BASE_PENALTY: The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to the mean coverage level in those targets.
- 28. PCT_TARGET_BASES_2X: The percentage of ALL target bases acheiving 2X or greater coverage.
- 29. PCT_TARGET_BASES_10X: The percentage of ALL target bases acheiving 10X or greater coverage.
- 30. PCT_TARGET_BASES_20X: The percentage of ALL target bases acheiving 20X or greater coverage.
- 31. PCT_TARGET_BASES_30X: The percentage of ALL target bases acheiving 30X or greater coverage.
- 32. HS_LIBRARY_SIZE: The estimated number of unique molecules in the selected part of the library.
- 33. HS_PENALTY_10X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 10X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 10 * HS_PENALTY_10X.
- 34. HS_PENALTY_20X: The "hybrid selection penalty" incurred to get 80% of target bases to 20X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 20X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 20 * HS_PENALTY_20X.
- 35. HS_PENALTY_30X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 30X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 30 * HS_PENALTY_30X.
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-
-  </help>
-</tool>
--- a/tools/picard/rgPicardInsertSize.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-<tool name="Insertion size metrics" id="PicardInsertSize" version="0.3.0">
-  <description>for PAIRED data</description>
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <command interpreter="python">
-   picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --taillimit "$tailLimit"
-   --histwidth "$histWidth" --minpct "$minPct"
-   -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file"
-  </command>
-  <inputs>
-    <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for"
-      help="If empty, upload or import a SAM/BAM dataset."/>
-    <param name="out_prefix" value="Insertion size metrics" type="text"
-      label="Title for the output file" help="Use this remind you what the job was for" size="120" />
-    <param name="tailLimit" value="10000" type="integer"
-      label="Tail limit" size="5"
-      help="When calculating mean and stdev stop when the bins in the tail of the distribution contain fewer than mode/TAIL_LIMIT items" />
-     <param name="histWidth" value="0" type="integer"
-      label="Histogram width" size="5"
-      help="Explicitly sets the histogram width, overriding the TAIL_LIMIT option - leave 0 to ignore" />
-     <param name="minPct" value="0.01" type="float"
-      label="Minimum percentage" size="5"
-      help="Discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads" />
-  </inputs>
-  <outputs>
-    <data format="html" name="html_file" label="InsertSize_${out_prefix}.html"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="picard_input_tiny.sam" />
-      <param name="out_prefix" value="Insertion size metrics" />
-      <param name="tailLimit" value="10000" />
-      <param name="histWidth" value="0" />
-      <param name="minPct" value="0.01" />
-      <output name="html_file" file="picard_output_insertsize_tinysam.html" ftype="html" compare="contains" lines_diff="40" />
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**Purpose**
-
-Reads a SAM or BAM file and describes the distribution
-of insert size (excluding duplicates) with metrics and a histogram plot.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for CollectInsertSizeMetrics, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
-.. class:: warningmark
-
-**Useful for paired data only**
-
-This tool works for paired data only and can be expected to fail for single end data.
-
------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-Picard documentation says (reformatted for Galaxy):
-
-.. csv-table::
-   :header-rows: 1
-
-    Option,Description
-    "INPUT=File","SAM or BAM file Required."
-    "OUTPUT=File","File to write insert size metrics to Required."
-    "HISTOGRAM_FILE=File","File to write insert size histogram chart to Required."
-    "TAIL_LIMIT=Integer","When calculating mean and stdev stop when the bins in the tail of the distribution contain fewer than mode/TAIL_LIMIT items. This also limits how much data goes into each data category of the histogram."
-    "HISTOGRAM_WIDTH=Integer","Explicitly sets the histogram width, overriding the TAIL_LIMIT option. Also, when calculating mean and stdev, only bins LE HISTOGRAM_WIDTH will be included. "
-    "MINIMUM_PCT=Float","When generating the histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1) Default value: 0.01."
-    "STOP_AFTER=Integer","Stop after processing N reads, mainly for debugging. Default value: 0."
-    "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false."
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-  </help>
-</tool>
--- a/tools/picard/rgPicardLibComplexity.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-<tool name="Estimate Library Complexity" id="rgEstLibComp" version="0.01">
-  <command interpreter="python">
-   picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --minid "$minIDbases"
-   --maxdiff "$maxDiff" --minmeanq "$minMeanQ" --readregex "$readRegex" --optdupdist "$optDupeDist"
-   -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/EstimateLibraryComplexity.jar" -d "$html_file.files_path" -t "$html_file"
-  </command>
-  <inputs>
-    <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset"
-      help="If empty, upload or import a SAM/BAM dataset."/>
-    <param name="out_prefix" value="Library Complexity" type="text"
-      label="Title for the output file" help="Use this remind you what the job was for." size="80" />
-    <param name="minIDbases" value="5" type="integer"  label="Minimum identical bases at starts of reads for grouping" size="5"
-      help="Total_reads / 4^max_id_bases reads will be compared at a time. Lower numbers = more accurate results and exponentially more time/memory." />
-     <param name="maxDiff" value="0.03" type="float"
-      label="Maximum difference rate for identical reads" size="5"
-      help="The maximum rate of differences between two reads to call them identical" />
-     <param name="minMeanQ" value="20" type="integer"
-      label="Minimum percentage" size="5"
-      help="The minimum mean quality of bases in a read pair. Lower average quality reads filtered out from all calculations" />
-     <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="120"
-      label="Regular expression that can be used to parse read names in the incoming SAM file"
-      help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
-      <sanitizer>
-        <valid initial="string.printable">
-         <remove value="&apos;"/>
-        </valid>
-        <mapping initial="none">
-          <add source="&apos;" target="__sq__"/>
-        </mapping>
-      </sanitizer>
-     </param>
-     <param name="optDupeDist" value="100" type="text"
-      label="The maximum offset between two duplicte clusters in order to consider them optical duplicates." size="5"
-      help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100" />
-
-  </inputs>
-  <outputs>
-    <data format="html" name="html_file" label="${out_prefix}_lib_complexity.html"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="picard_input_tiny.sam" />
-      <param name="out_prefix" value="Library Complexity" />
-      <param name="minIDbases" value="5" />
-      <param name="maxDiff" value="0.03" />
-      <param name="minMeanQ" value="20" />
-      <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
-      <param name="optDupeDist" value="100" />
-      <output name="html_file" file="picard_output_estlibcomplexity_tinysam.html" ftype="html" lines_diff="30" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**Purpose**
-
-Attempts to estimate library complexity from sequence alone.
-Does so by sorting all reads by the first N bases (5 by default) of each read and then
-comparing reads with the first N bases identical to each other for duplicates. Reads are considered to be
-duplicates if they match each other with no gaps and an overall mismatch rate less than or equal to MAX_DIFF_RATE (0.03 by default).
-
-Reads of poor quality are filtered out so as to provide a more accurate estimate.
-The filtering removes reads with any no-calls in the first N bases or with a mean base quality lower than
-MIN_MEAN_QUALITY across either the first or second read.
-
-The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the
-calculation of library size. Also, since there is no alignment to screen out technical reads one
-further filter is applied on the data. After examining all reads a histogram is built of
-[#reads in duplicate set -> #of duplicate sets]; all bins that contain exactly one duplicate set are
-then removed from the histogram as outliers before library size is estimated.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for EstimateLibraryComplexity, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-Picard documentation says (reformatted for Galaxy):
-
-.. csv-table::
-   :header-rows: 1
-
-    Option	Description
-    "INPUT=File","One or more files to combine and estimate library complexity from. Reads can be mapped or unmapped. This option may be specified 0 or more times."
-    "OUTPUT=File","Output file to writes per-library metrics to. Required."
-    "MIN_IDENTICAL_BASES=Integer","The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection. In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU. Default value: 5."
-    "MAX_DIFF_RATE=Double","The maximum rate of differences between two reads to call them identical. Default value: 0.03. "
-    "MIN_MEAN_QUALITY=Integer","The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads with lower average quality are filtered out and not considered in any calculations. Default value: 20."
-    "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*. This option can be set to 'null' to clear the default value."
-    "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100"
-    "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false. This option can be set to 'null' to clear the default value. "
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-
-.. class:: infomark
-
-**Note on the Regular Expression**
-
-(from the Picard docs)
-This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file.
-These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size.
-The regular expression should contain three capture groups for the three variables, in order.
-Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.
-
-
-  </help>
-</tool>
-
-
--- a/tools/picard/rgPicardMarkDups.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-<tool name="Mark Duplicate reads" id="rgPicardMarkDups" version="0.01">
-  <command interpreter="python">
-   picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" -o "$out_file"
-   --remdups "$remDups" --assumesorted "$assumeSorted" --readregex "$readRegex" --optdupdist "$optDupeDist"
-   -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/MarkDuplicates.jar" -d "$html_file.files_path" -t "$html_file" -e "$input_file.ext"
-  </command>
-  <requirements><requirement type="package">picard</requirement></requirements>
-  <inputs>
-    <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in"
-      help="If empty, upload or import a SAM/BAM dataset."/>
-    <param name="out_prefix" value="Dupes Marked" type="text"
-      label="Title for the output file" help="Use this remind you what the job was for" size="80" />
-    <param name="remDups" value="false" type="boolean"  label="Remove duplicates from output file"
-      truevalue="true" falsevalue="false" checked="yes"
-      help="If true do not write duplicates to the output file instead of writing them with appropriate flags set." />
-    <param name="assumeSorted" value="true" type="boolean"  label="Assume reads are already ordered"
-      truevalue="true" falsevalue="false" checked="yes"
-      help="If true assume input data are already sorted (most Galaxy SAM/BAM should be)." />
-     <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80"
-      label="Regular expression that can be used to parse read names in the incoming SAM file"
-      help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
-      <sanitizer>
-        <valid initial="string.printable">
-         <remove value="&apos;"/>
-        </valid>
-        <mapping initial="none">
-          <add source="&apos;" target="__sq__"/>
-        </mapping>
-      </sanitizer>
-     </param>
-     <param name="optDupeDist" value="100" type="integer"
-      label="The maximum offset between two duplicate clusters in order to consider them optical duplicates." size="5"
-      help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100." >
-      <validator type="in_range" message="Minimum optical dupe distance must be positive" min="0" />
-     </param>
-
-  </inputs>
-  <outputs>
-    <data format="bam" name="out_file" label="MarkDups_${out_prefix}.bam"/>
-    <data format="html" name="html_file" label="MarkDups_${out_prefix}.html"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" />
-      <param name="out_prefix" value="Dupes Marked" />
-      <param name="remDups" value="false" />
-      <param name="assumeSorted" value="true" />
-      <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
-      <param name="optDupeDist" value="100" />
-      <output name="out_file" file="picard_output_markdups_sortedpairsam.bam" ftype="bam" compare="diff" />
-      <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
-    </test>
-    <test>
-      <param name="input_file" value="picard_input_tiny_coord.sam" ftype="sam" />
-      <param name="out_prefix" value="Dupes Marked" />
-      <param name="remDups" value="true" />
-      <param name="assumeSorted" value="true" />
-      <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
-      <param name="optDupeDist" value="100" />
-      <output name="out_file" file="picard_output_markdups_remdupes.bam" ftype="bam" compare="diff" />
-      <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
-    </test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-**Purpose**
-
-Marks all duplicate reads in a provided SAM or BAM file and either removes them or flags them.
-
-**Picard documentation**
-
-This is a Galaxy wrapper for MarkDuplicates, a part of the external package Picard-tools_.
-
- .. _Picard-tools: http://www.google.com/search?q=picard+samtools
-
------
-
-.. class:: infomark
-
-**Inputs, outputs, and parameters**
-
-Picard documentation says (reformatted for Galaxy):
-
-.. csv-table:: Mark Duplicates docs
-   :header-rows: 1
-
-    Option,Description
-    "INPUT=File","The input SAM or BAM file to analyze. Must be coordinate sorted. Required."
-    "OUTPUT=File","The output file to right marked records to Required."
-    "METRICS_FILE=File","File to write duplication metrics to Required."
-    "REMOVE_DUPLICATES=Boolean","If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false."
-    "ASSUME_SORTED=Boolean","If true, assume that the input file is coordinate sorted, even if the header says otherwise. Default value: false."
-    "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=Integer","This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000."
-    "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=Integer","Maximum number of file handles to keep open when spilling read ends to disk."
-    "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. "
-    "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100"
-
-.. class:: warningmark
-
-**Warning on SAM/BAM quality**
-
-Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
-flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
-to be the only way to deal with SAM/BAM that cannot be parsed.
-.. class:: infomark
-
-**Note on the Regular Expression**
-
-(from the Picard docs)
-This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).
-
-Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged unless the remove duplicates option is selected. In some cases you may want to do this, but please only do this if you really understand what you are doing.
-
-  </help>
-</tool>
-
-
-
-
-
-
-
-
-
-
-
-
--- a/tools/plotting/bar_chart.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,144 +0,0 @@
-#!/usr/bin/env python
-
-
-"""
-histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
-a generic histogram builder based on gnuplot backend
-
-   data_file    - tab delimited file with data
-   xtic_column  - column containing labels for x ticks [integer, 0 means no ticks]
-   column_list  - comma separated list of columns to plot
-   title        - title for the entire histrogram
-   ylabel       - y axis label
-   yrange_max   - minimal value at the y axis (integer)
-   yrange_max   - maximal value at the y_axis (integer)
-                  to set yrange to autoscaling assign 0 to yrange_min and yrange_max
-   graph_file   - file to write histogram image to
-   img_size     - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
-
-
-   This tool required gnuplot and gnuplot.py
-
-anton nekrutenko | anton@bx.psu.edu
-
-"""
-
-import Gnuplot, Gnuplot.funcutils
-import sys, string, tempfile, os
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main(tmpFileName):
-    skipped_lines_count = 0
-    skipped_lines_index = []
-    gf = open(tmpFileName, 'w')
-
-
-    try:
-        in_file   = open( sys.argv[1], 'r' )
-        xtic      = int( sys.argv[2] )
-        col_list  = string.split( sys.argv[3],"," )
-        title     = 'set title "' + sys.argv[4] + '"'
-        ylabel    = 'set ylabel "' + sys.argv[5] + '"'
-        ymin      = sys.argv[6]
-        ymax      = sys.argv[7]
-        img_file  = sys.argv[8]
-        img_size  = sys.argv[9]
-    except:
-        stop_err("Check arguments\n")
-
-    try:
-        int( col_list[0] )
-    except:
-        stop_err('You forgot to set columns for plotting\n')
-
-
-    for i, line in enumerate( in_file ):
-        valid = True
-        line = line.rstrip('\r\n')
-        if line and not line.startswith( '#' ):
-            row = []
-            try:
-                fields = line.split( '\t' )
-                for col in col_list:
-                    row.append( str( float( fields[int( col )-1] ) ) )
-
-            except:
-                valid = False
-                skipped_lines_count += 1
-                skipped_lines_index.append(i)
-
-        else:
-            valid = False
-            skipped_lines_count += 1
-            skipped_lines_index.append(i)
-
-        if valid and xtic > 0:
-            row.append( fields[xtic-1] )
-        elif valid and xtic == 0:
-            row.append( str( i ) )
-
-        if valid:
-            gf.write( '\t'.join( row ) )
-            gf.write( '\n' )
-
-    if skipped_lines_count < i:
-
-        #prepare 'using' clause of plot statement
-
-        g_plot_command = ' ';
-
-        #set the first column
-        if xtic > 0:
-            g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % ( tmpFileName, str( len( row ) ), col_list[0] )
-        else:
-            g_plot_command = "'%s' using 1 ti 'Column %s', " % ( tmpFileName, col_list[0] )
-
-        #set subsequent columns
-
-        for i in range(1,len(col_list)):
-            g_plot_command += "'%s' using %s t 'Column %s', " % ( tmpFileName, str( i+1 ), col_list[i] )
-
-        g_plot_command = g_plot_command.rstrip( ', ' )
-
-        yrange = 'set yrange [' + ymin + ":" + ymax + ']'
-
-        try:
-            g = Gnuplot.Gnuplot()
-            g('reset')
-            g('set boxwidth 0.9 absolute')
-            g('set style fill  solid 1.00 border -1')
-            g('set style histogram clustered gap 5 title  offset character 0, 0, 0')
-            g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
-            g('set key invert reverse Left outside')
-            if xtic == 0:  g('unset xtics')
-            g(title)
-            g(ylabel)
-            g_term = 'set terminal png tiny size ' + img_size
-            g(g_term)
-            g_out = 'set output "' + img_file + '"'
-            if ymin != ymax:
-                g(yrange)
-            g(g_out)
-            g('set style data histograms')
-            g.plot(g_plot_command)
-        except:
-            stop_err("Gnuplot error: Data cannot be plotted")
-    else:
-        sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' %sys.argv[3] )
-
-    if skipped_lines_count > 0:
-        sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d.  These lines were skipped while building the graph.\n' % ( skipped_lines_count, skipped_lines_index[0]+1 ) )
-
-
-if __name__ == "__main__":
-    # The tempfile initialization is here because while inside the main() it seems to create a condition
-    # when the file is removed before gnuplot has a chance of accessing it
-    gp_data_file = tempfile.NamedTemporaryFile('w')
-    Gnuplot.gp.GnuplotOpts.default_term = 'png'
-    main(gp_data_file.name)
-
--- a/tools/plotting/bar_chart.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="barchart_gnuplot" name="Bar chart">
-  <description>for multiple columns</description>
-  <command interpreter="python">
-    #if $xtic.userSpecified == "Yes" #bar_chart.py $input $xtic.xticColumn $colList "$title" "$ylabel" $ymin $ymax $out_file1 "$pdf_size"
-    #else                            #bar_chart.py $input 0 $colList "$title" "$ylabel" $ymin $ymax $out_file1 "$pdf_size"
-    #end if
-  </command>
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Dataset" help="Dataset missing? See TIP below"/>
-    <conditional name="xtic">
-        <param name="userSpecified" type="select" label="Use X Tick labels?" help="see example below">
-            <option value="Yes">Yes</option>
-            <option value="No">No</option>
-        </param>
-        <when value="Yes">
-            <param name="xticColumn" type="data_column" data_ref="input" numerical="False" label="Use this column for X Tick labels" />
-        </when>
-        <when value="No">
-        </when>
-    </conditional>
-    <param name="colList" label="Numerical columns" type="data_column" numerical="True" multiple="True" data_ref="input" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
-    <param name="title" type="text" size="30" value="Bar Chart" label="Plot title"/>
-    <param name="ylabel" type="text" size="30" value="V1" label="Label for Y axis"/>
-    <param name="ymin" type="integer" size="4" value="0" label="Minimal value on Y axis" help="set to 0 for autoscaling"/>
-    <param name="ymax" type="integer" size="4" value="0" label="Maximal value on Y axis" help="set to 0 for autoscaling"/>
-    <param name="pdf_size" type="select" label="Choose chart size (pixels)">
-        <option value="800,600">Normal: 800 by 600</option>
-        <option value="640,480">Small: 640 by 480</option>
-        <option value="1480,800">Large: 1480 by 800</option>
-        <option value="600,800">Normal Flipped: 600 by 800</option>
-        <option value="480,640">Small Flipped: 480 by 640</option>
-        <option value="800,1480">Large Flipped: 800 by 1480</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">Gnuplot</requirement>
-    <requirement type="python-module">Numeric</requirement>
-  </requirements>
-  <help>
-
-**What it does**
-
-This tool builds a bar chart on one or more columns. Suppose you have dataset like this one::
-
-  Gene1	10	15
-  Gene2	20	14
-  Gene3	67	45
-  Gene4	55	12
-
-Graphing columns 2 and 3 while using column 1 for X Tick Labels will produce the following plot:
-
-.. image:: ./static/images/bar_chart.png
-   :height: 324
-   :width: 540
-
-</help>
-</tool>
--- a/tools/plotting/boxplot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-<tool id="qual_stats_boxplot" name="Boxplot" version="1.0.0">
-  <description>of quality statistics</description>
-  <command>gnuplot &lt; '$gnuplot_commands' 2&gt;&amp;1 || echo "Error running gnuplot." >&amp;2</command>
-  <requirements>
-    <requirement type="binary" version="gnuplot 4.2 patchlevel 2">gnuplot</requirement>
-  </requirements>
-  <inputs>
-    <param name="input_file" type="data" format="tabular" label="Quality Statistics File"/>
-    <param name="title" type="text" value="Box plot in Galaxy" label="Title for plot" size="50"/>
-    <param name="graph_size" type="text" value="2048,768" label="Dimensions of Graph"/>
-    <param name="xlabel" type="text" value="X Axis Label" label="X axis label" size="50"/>
-    <param name="ylabel" type="text" value="Score Value" label="Y axis label" size="50"/>
-    <param name="xcol" type="data_column" data_ref="input_file" label="Column for X axis position" default_value="1" help="A unique number; c1 if plotting output of FASTQ summary"/>
-    <param name="q1col" type="data_column" data_ref="input_file" label="Column for Q1" default_value="7" help="c7 if plotting output of FASTQ summary"/>
-    <param name="medcol" type="data_column" data_ref="input_file" label="Column for Median" default_value="8" help="c8 if plotting output of FASTQ summary"/>
-    <param name="q3col" type="data_column" data_ref="input_file" label="Column for Q3" default_value="9" help="c9 if plotting output of FASTQ summary"/>
-    <param name="lwcol" type="data_column" data_ref="input_file" label="Column for left whisker" default_value="11" help="c11 if plotting output of FASTQ summary"/>
-    <param name="rwcol" type="data_column" data_ref="input_file" label="Column for right whisker" default_value="12" help="c12 if plotting output of FASTQ summary"/>
-    <conditional name="use_outliers">
-      <param name="use_outliers_type" type="select" label="Plot Outliers">
-        <option value="use_outliers" selected="true">Plot Outliers</option>
-        <option value="dont_use_outliers">Don't Plot Outliers</option>
-      </param>
-      <when value="use_outliers">
-        <param name="outliercol" type="data_column" data_ref="input_file" label="Column for Outliers" default_value="13" help="c13 if plotting output of FASTQ summary"/>
-      </when>
-      <when value="dont_use_outliers">
-      </when>
-    </conditional>
-  </inputs>
-  <configfiles>
-    <configfile name="gnuplot_commands">
-set output '$output_file'
-set term png size ${graph_size}
-set boxwidth 0.8
-set key right tmargin
-set xlabel "${xlabel}"
-set ylabel "${ylabel}"
-set title  "${title}"
-set xtics 1
-set ytics 1
-set grid ytics
-set offsets 1, 1, 1, 1
-plot '${input_file}' using ${xcol}:${q1col}:${lwcol}:${rwcol}:${q3col} with candlesticks lt 1  lw 1 title 'Quartiles' whiskerbars, \
-      ''         using ${xcol}:${medcol}:${medcol}:${medcol}:${medcol} with candlesticks lt -1 lw 2 title 'Medians'\
-#if str( $use_outliers['use_outliers_type'] ) == 'use_outliers':
-,      "&lt; python -c \"for xval, yvals in [ ( fields[${xcol} - 1], fields[${use_outliers['outliercol']} - 1].split( ',' ) ) for fields in [ line.rstrip( '\\n\\r' ).split( '\\t' ) for line in open( '${input_file}' ) if not line.startswith( '#' ) ] if len( fields ) &gt; max( ${xcol} - 1, ${use_outliers['outliercol']} - 1 ) ]: print '\\n'.join( [ '%s\\t%s' % ( xval, yval ) for yval in yvals if yval ] )\"" using 1:2 with points pt 29 title 'Outliers'
-#end if
-    </configfile>
-  </configfiles>
-  <outputs>
-    <data name="output_file" format="png" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="fastq_stats_1_out.tabular" ftype="tabular" />
-      <param name="title" value="Boxplot of Summary Statistics for Sanger Reads" />
-      <param name="graph_size" value="2048,768" />
-      <param name="xlabel" value="Read Column" />
-      <param name="ylabel" value="Quality Score Value" />
-      <param name="xcol" value="1" />
-      <param name="q1col" value="7" />
-      <param name="medcol" value="8" />
-      <param name="q3col" value="9" />
-      <param name="lwcol" value="11" />
-      <param name="rwcol" value="12" />
-      <param name="use_outliers_type" value="use_outliers" />
-      <param name="outliercol" value="13" />
-      <output name="output_file" file="boxplot_summary_statistics_out.png" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Creates a boxplot graph. Its main purpose is to display a distribution of quality scores produced by *NGS: QC and maniupulation -> FASTQ Summary Statistics* tool.
-
-.. class:: warningmark
-
-**TIP:** If you want to display a distribution of quality scores produced by *NGS: QC and maniupulation -> FASTQ Summary Statistics* and the column assignments within the tool's interface are not automatically set (they will all read "c1" in that case) set columns manually to the following values::
-
-  Column for X axis           c1
-  Column for Q1               c7
-  Column for Median           c8
-  Column for Q3               c9
-  Column for left whisker     c11
-  Column for right whisker    c12
-  Column for Outliers         c13
-
------
-
-**Output Example**
-
-* Black horizontal lines are medians
-* Rectangular red boxes show the Inter-quartile Range (IQR) (top value is Q3, bottom value is Q1)
-* Whiskers show outliers at max. 1.5*IQR
-
-.. image:: ./static/images/solid_qual.png
-
-
-  </help>
-</tool>
--- a/tools/plotting/histogram.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-#!/usr/bin/env python
-#Greg Von Kuster
-
-import sys
-from rpy import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-
-    # Handle input params
-    in_fname = sys.argv[1]
-    out_fname = sys.argv[2]
-    try:
-        column = int( sys.argv[3] ) - 1
-    except:
-        stop_err( "Column not specified, your query does not contain a column of numerical data." )
-    title = sys.argv[4]
-    xlab = sys.argv[5]
-    breaks = int( sys.argv[6] )
-    if breaks == 0:
-        breaks = "Sturges"
-    if sys.argv[7] == "true":
-        density = True
-    else: density = False
-    if len( sys.argv ) >= 9 and sys.argv[8] == "true":
-        frequency = True
-    else: frequency = False
-
-    matrix = []
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_value = ''
-    i = 0
-    for i, line in enumerate( file( in_fname ) ):
-        valid = True
-        line = line.rstrip('\r\n')
-        # Skip comments
-        if line and not line.startswith( '#' ):
-            # Extract values and convert to floats
-            row = []
-            try:
-                fields = line.split( "\t" )
-                val = fields[column]
-                if val.lower() == "na":
-                    row.append( float( "nan" ) )
-            except:
-                valid = False
-                skipped_lines += 1
-                if not first_invalid_line:
-                    first_invalid_line = i+1
-            else:
-                try:
-                    row.append( float( val ) )
-                except ValueError:
-                    valid = False
-                    skipped_lines += 1
-                    if not first_invalid_line:
-                        first_invalid_line = i+1
-                        invalid_value = fields[column]
-        else:
-            valid = False
-            skipped_lines += 1
-            if not first_invalid_line:
-                first_invalid_line = i+1
-
-        if valid:
-            matrix += row
-
-    if skipped_lines < i:
-        try:
-            a = r.array( matrix )
-            r.pdf( out_fname, 8, 8 )
-            histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks )
-            if density:
-                density = r.density( a )
-                if frequency:
-                    scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints
-                    density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] )
-                r.lines( density )
-            r.dev_off()
-        except Exception, exc:
-            stop_err( "%s" %str( exc ) )
-    else:
-        if i == 0:
-            stop_err("Input dataset is empty.")
-        else:
-            stop_err( "All values in column %s are non-numeric." %sys.argv[3] )
-
-    print "Histogram of column %s. " %sys.argv[3]
-    if skipped_lines > 0:
-        print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value )
-
-    r.quit( save="no" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/plotting/histogram2.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-<tool id="histogram_rpy" name="Histogram" version="1.0.3">
-  <description>of a numeric column</description>
-  <command interpreter="python">histogram.py $input $out_file1 $numerical_column "$title" "$xlab" $breaks $density $frequency</command>
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Dataset" help="Dataset missing? See TIP below"/>
-    <param name="numerical_column" type="data_column" data_ref="input" numerical="True" label="Numerical column for x axis" />
-    <param name="breaks" type="integer" size="4" value="0" label="Number of breaks (bars)"/>
-    <param name="title" type="text" size="30" value="Histogram" label="Plot title"/>
-    <param name="xlab" type="text" size="30" value="V1" label="Label for x axis"/>
-    <param name="density" type="boolean" checked="yes" label="Include smoothed density"/>
-    <param name="frequency" type="boolean" checked="no" label="Plot as frequency (counts)"/>
-  </inputs>
-  <outputs>
-    <data format="pdf" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="histogram_in1.tabular" ftype="tabular"/>
-      <param name="numerical_column" value="2"/>
-      <param name="breaks" value="0"/>
-      <param name="title" value="Histogram"/>
-      <param name="xlab" value="V1"/>
-      <param name="density" value="true"/>
-      <param name="frequency" value="false"/>
-      <output name="out_file1" file="histogram_out1.pdf"/>
-    </test>
-  </tests>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <help>
-
-.. class:: infomark
-
-**TIP:** To remove comment lines that do not begin with a *#* character, use *Text Manipulation-&gt;Remove beginning*
-
- .. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-This tool computes a histogram of the numerical values in a column of a dataset.
-
-- All invalid, blank and comment lines in the dataset are skipped.  The number of skipped lines is displayed in the resulting history item.
-- **Column for x axis** - only numerical columns are possible.
-- **Number of breaks(bars)** - breakpoints between histogram cells. Value of '0' will determine breaks automatically.
-- **Plot title** - the histogram title.
-- **Label for x axis** - the label of the x axis for the histogram.
-- **Include smoothed density** - if checked, the resulting graph will join the given corresponding points with line segments.
-
------
-
-**Example**
-
-- Input file::
-
-    1	68	4.1
-    2	71	4.6
-    3	62	3.8
-    4	75	4.4
-    5	58	3.2
-    6	60	3.1
-    7	67	3.8
-    8	68	4.1
-    9	71	4.3
-    10	69	3.7
-
-- Create a histogram on column 2 of the above dataset.
-
-.. image:: ./static/images/histogram2.png
-
-</help>
-</tool>
--- a/tools/plotting/plot_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-
-def validate(incoming):
-    """Validator for the plotting program"""
-
-    bins = incoming.get("bins","")
-    col  = incoming.get("col","")
-
-    if not bins or not col:
-        raise Exception, "You need to specify a number for bins and columns"
-
-    try:
-        bins = int(bins)
-        col  = int(col)
-    except:
-        raise Exception, "Parameters are not valid numbers, columns:%s, bins:%s" % (col, bins)
-
-    if not 1<bins<100:
-        raise Exception, "The number of bins %s must be a number between 1 and 100" % bins
-
--- a/tools/plotting/plotter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#!/usr/bin/env python
-
-# python histogram input_file output_file column bins
-import sys, os
-import matplotlib; matplotlib.use('Agg')
-
-from pylab import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-if __name__ == '__main__':
-    # parse the arguments
-
-    if len(sys.argv) != 6:
-        stop_err('Usage: python histogram.py input_file column bins output_file style')
-        sys.exit()
-
-    mode = sys.argv[5]
-    HIST = mode == 'hist'
-    try:
-        col =  int(float(sys.argv[2]))
-        if HIST:
-            bin = int(float(sys.argv[3]))
-        else:
-            # hack, this parameter is the plotting style for scatter plots
-            if sys.argv[3] == 'P':
-                style = 'o'
-            elif sys.argv[3] == 'LP':
-                style = 'o-'
-            else:
-                style = '-'
-
-    except:
-        msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4])
-        stop_err(msg)
-
-    # validate arguments
-    inp_file = sys.argv[1]
-    out_file = sys.argv[4]
-
-    if HIST:
-        print "Histogram on column %s (%s bins)" % (col, bin)
-    else:
-        print "Scatterplot on column %s" % (col)
-
-    xcol= col -1
-    # read the file
-    values = []
-    try:
-        count = 0
-        for line in file(inp_file):
-            count += 1
-            line = line.strip()
-            if line and line[0] != '#':
-                values.append(float(line.split()[xcol]))
-    except Exception, e:
-        stop_err('%s' % e)
-        stop_err("Non numerical data at line %d, column %d" % (count, col) )
-
-    # plot the data
-
-    if HIST:
-        n, bins, patches = hist(values, bins=bin, normed=0)
-    else:
-        plot(values, style)
-
-    xlabel('values')
-    ylabel('counts')
-
-    if HIST:
-        title('Histogram of values over column %s (%s bins)' % (col, len(bins)) )
-    else:
-        title('Scatterplot over column %s' % col )
-    grid(True)
-
-    # the plotter detects types by file extension
-    png_out = out_file + '.png' # force it to png
-    savefig(png_out)
-
-    # shuffle it back and clean up
-    data = file(png_out, 'rb').read()
-    fp = open(out_file, 'wb')
-    fp.write(data)
-    fp.close()
-    os.remove(png_out)
--- a/tools/plotting/r_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-#!/bin/sh
-
-### Run R providing the R script in $1 as standard input and passing
-### the remaining arguments on the command line
-
-# Function that writes a message to stderr and exits
-function fail
-{
-    echo "$@" >&2
-    exit 1
-}
-
-# Ensure R executable is found
-which R > /dev/null || fail "'R' is required by this tool but was not found on path"
-
-# Extract first argument
-infile=$1; shift
-
-# Ensure the file exists
-test -f $infile || fail "R input file '$infile' does not exist"
-
-# Invoke R passing file named by first argument to stdin
-R --vanilla --slave $* < $infile
--- a/tools/plotting/scatterplot.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-#Greg Von Kuster
-
-import sys
-from rpy import *
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-
-    in_fname = sys.argv[1]
-    out_fname = sys.argv[2]
-    try:
-        columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
-    except:
-        stop_err( "Columns not specified, your query does not contain a column of numerical data." )
-    title = sys.argv[5]
-    xlab = sys.argv[6]
-    ylab = sys.argv[7]
-
-    matrix = []
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_value = ''
-    invalid_column = 0
-    i = 0
-    for i, line in enumerate( file( in_fname ) ):
-        valid = True
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            row = []
-            fields = line.split( "\t" )
-            for column in columns:
-                try:
-                    val = fields[column]
-                    if val.lower() == "na":
-                        row.append( float( "nan" ) )
-                    else:
-                        row.append( float( fields[column] ) )
-                except:
-                    valid = False
-                    skipped_lines += 1
-                    if not first_invalid_line:
-                        first_invalid_line = i + 1
-                        try:
-                            invalid_value = fields[column]
-                        except:
-                            invalid_value = ''
-                        invalid_column = column + 1
-                    break
-        else:
-            valid = False
-            skipped_lines += 1
-            if not first_invalid_line:
-                first_invalid_line = i+1
-
-        if valid:
-            matrix.append( row )
-
-    if skipped_lines < i:
-        try:
-            r.pdf( out_fname, 8, 8 )
-            r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
-            r.dev_off()
-        except Exception, exc:
-            stop_err( "%s" %str( exc ) )
-    else:
-        stop_err( "All values in both columns %s and %s are non-numeric or empty." % ( sys.argv[3], sys.argv[4] ) )
-
-    print "Scatter plot on columns %s, %s. " % ( sys.argv[3], sys.argv[4] )
-    if skipped_lines > 0:
-        print "Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric." % ( skipped_lines, first_invalid_line, invalid_value, invalid_column )
-
-    r.quit( save="no" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/plotting/scatterplot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-<tool id="scatterplot_rpy" name="Scatterplot">
-  <description>of two numeric columns</description>
-  <command interpreter="python">scatterplot.py $input $out_file1 $col1 $col2 "$title" "$xlab" "$ylab"</command>
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Dataset" help="Dataset missing? See TIP below"/>
-    <param name="col1" type="data_column" data_ref="input" numerical="True" label="Numerical column for x axis" />
-    <param name="col2" type="data_column" data_ref="input" numerical="True" label="Numerical column for y axis" />
-    <param name="title" size="30" type="text" value="Scatterplot" label="Plot title"/>
-    <param name="xlab" size="30" type="text" value="V1" label="Label for x axis"/>
-    <param name="ylab" size="30" type="text" value="V2" label="Label for y axis"/>
-  </inputs>
-  <outputs>
-    <data format="pdf" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <!-- TODO: uncomment the following test when we have tools.update_state() working for
-       multiple dependents with the same dependency.
-  <tests>
-    <test>
-      <param name="input" value="scatterplot_in1.tabular" ftype="tabular"/>
-      <param name="col1" value="2"/>
-      <param name="col2" value="3"/>
-      <param name="title" value="Scatterplot"/>
-      <param name="xlab" value="V1"/>
-      <param name="ylab" value="V2"/>
-      <output name="out_file1" file="scatterplot_out1.pdf" />
-    </test>
-  </tests>
-  -->
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-This tool creates a simple scatter plot between two variables containing numeric values of a selected dataset.
-
-- All invalid, blank and comment lines in the dataset are skipped.  The number of skipped lines is displayed in the resulting history item.
-
-- **Plot title** The scatterplot title
-- **Label for x axis** and **Label for y axis** The labels for x and y axis of the scatterplot.
-
------
-
-**Example**
-
-- Input file::
-
-    1   68  4.1
-    2   71  4.6
-    3   62  3.8
-    4   75  4.4
-    5   58  3.2
-    6   60  3.1
-    7   67  3.8
-    8   68  4.1
-    9   71  4.3
-    10  69  3.7
-
-- Create a simple scatterplot between the variables in column 2 and column 3 of the above dataset.
-
-.. image:: ./static/images/scatterplot.png
-
-</help>
-</tool>
--- a/tools/plotting/xy_plot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-<tool id="XY_Plot_1" name="Plotting tool" version="1.0.1">
-  <description>for multiple series and graph types</description>
-  <command interpreter="bash">r_wrapper.sh $script_file</command>
-
-  <inputs>
-    <param name="main" type="text" value="" size="30" label="Plot Title"/>
-    <param name="xlab" type="text" value="" size="30" label="Label for x axis"/>
-    <param name="ylab" type="text" value="" size="30" label="Label for y axis"/>
-    <repeat name="series" title="Series">
-      <param name="input" type="data" format="tabular" label="Dataset"/>
-      <param name="xcol" type="data_column" data_ref="input" label="Column for x axis"/>
-      <param name="ycol" type="data_column" data_ref="input" label="Column for y axis"/>
-      <conditional name="series_type">
-        <param name="type" type="select" label="Series Type">
-          <option value="line" selected="true">Line</option>
-          <option value="points">Points</option>
-        </param>
-        <when value="line">
-          <param name="lty" type="select" label="Line Type">
-            <option value="1">Solid</option>
-            <option value="2">Dashed</option>
-            <option value="3">Dotted</option>
-          </param>
-          <param name="col" type="select" label="Line Color">
-            <option value="1">Black</option>
-            <option value="2">Red</option>
-            <option value="3">Green</option>
-            <option value="4">Blue</option>
-            <option value="5">Cyan</option>
-            <option value="6">Magenta</option>
-            <option value="7">Yellow</option>
-            <option value="8">Gray</option>
-          </param>
-          <param name="lwd" type="float" label="Line Width" value="1.0"/>
-        </when>
-        <when value="points">
-          <param name="pch" type="select" label="Point Type">
-            <option value="1">Circle (hollow)</option>
-            <option value="2">Triangle (hollow)</option>
-            <option value="3">Cross</option>
-            <option value="4">Diamond (hollow)</option>
-            <option value="15">Square (filled)</option>
-            <option value="16">Circle (filled)</option>
-            <option value="17">Triangle (filled)</option>
-          </param>
-          <param name="col" type="select" label="Point Color">
-            <option value="1">Black</option>
-            <option value="2">Red</option>
-            <option value="3">Green</option>
-            <option value="4">Blue</option>
-            <option value="5">Cyan</option>
-            <option value="6">Magenta</option>
-            <option value="7">Yellow</option>
-            <option value="8">Gray</option>
-          </param>
-          <param name="cex" type="float" label="Point Scale" value="1.0"/>
-        </when>
-      </conditional>
-    </repeat>
-  </inputs>
-
-  <configfiles>
-    <configfile name="script_file">
-      ## Setup R error handling to go to stderr
-      options( show.error.messages=F,
-               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-      ## Determine range of all series in the plot
-      xrange = c( NULL, NULL )
-      yrange = c( NULL, NULL )
-      #for $i, $s in enumerate( $series )
-        s${i} = read.table( "${s.input.file_name}" )
-        x${i} = s${i}[,${s.xcol}]
-        y${i} = s${i}[,${s.ycol}]
-        xrange = range( x${i}, xrange )
-        yrange = range( y${i}, yrange )
-      #end for
-      ## Open output PDF file
-      pdf( "${out_file1}" )
-      ## Dummy plot for axis / labels
-      plot( NULL, type="n", xlim=xrange, ylim=yrange, main="${main}", xlab="${xlab}", ylab="${ylab}" )
-      ## Plot each series
-      #for $i, $s in enumerate( $series )
-        #if $s.series_type['type'] == "line"
-          lines( x${i}, y${i}, lty=${s.series_type.lty}, lwd=${s.series_type.lwd}, col=${s.series_type.col} )
-        #elif $s.series_type.type == "points"
-          points( x${i}, y${i}, pch=${s.series_type.pch}, cex=${s.series_type.cex}, col=${s.series_type.col} )
-        #end if
-      #end for
-      ## Close the PDF file
-      devname = dev.off()
-    </configfile>
-  </configfiles>
-
-  <outputs>
-    <data format="pdf" name="out_file1" />
-  </outputs>
-
-    <tests>
-        <test>
-            <param name="main" value="Example XY Plot"/>
-            <param name="xlab" value="Column 1"/>
-            <param name="ylab" value="Column 2"/>
-            <param name="input" value="2.tabular" ftype="tabular"/>
-            <param name="xcol" value="1"/>
-            <param name="ycol" value="2"/>
-            <param name="type" value="line"/>
-            <param name="lty" value="2"/>
-            <param name="col" value="2"/>
-            <param name="lwd" value="1.0"/>
-            <output name="out_file1" file="XY_Plot_1_out.pdf"/>
-        </test>
-    </tests>
-<help>
-.. class:: infomark
-
-This tool allows you to plot values contained in columns of a dataset against each other and also allows you to have different series corresponding to the same or different datasets in one plot.
-
------
-
-.. class:: warningmark
-
-This tool throws an error if the columns selected for plotting are absent or are not numeric and also if the lengths of these columns differ.
-
------
-
-**Example**
-
-Input file::
-
-    1   68  4.1
-    2   71  4.6
-    3   62  3.8
-    4   75  4.4
-    5   58  3.2
-    6   60  3.1
-    7   67  3.8
-    8   68  4.1
-    9   71  4.3
-    10  69  3.7
-
-Create a two series XY plot on the above data:
-
-- Series 1: Red Dashed-Line plot between columns 1 and 2
-- Series 2: Blue Circular-Point plot between columns 3 and 2
-
-.. image:: ./static/images/xy_example.jpg
-</help>
-</tool>
--- a/tools/regVariation/best_regression_subsets.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-
-from galaxy import eggs
-
-import sys, string
-from rpy import *
-import numpy
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-infile = sys.argv[1]
-y_col = int(sys.argv[2])-1
-x_cols = sys.argv[3].split(',')
-outfile = sys.argv[4]
-outfile2 = sys.argv[5]
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
-fout = open(outfile,'w')
-
-for i, line in enumerate( file ( infile )):
-    line = line.rstrip('\r\n')
-    if len( line )>0 and not line.startswith( '#' ):
-        elems = line.split( '\t' )
-        break
-    if i == 30:
-        break # Hopefully we'll never get here...
-
-if len( elems )<1:
-    stop_err( "The data in your input dataset is either missing or not formatted properly." )
-
-y_vals = []
-x_vals = []
-
-for k,col in enumerate(x_cols):
-    x_cols[k] = int(col)-1
-    x_vals.append([])
-
-NA = 'NA'
-for ind,line in enumerate( file( infile )):
-    if line and not line.startswith( '#' ):
-        try:
-            fields = line.split("\t")
-            try:
-                yval = float(fields[y_col])
-            except Exception, ey:
-                yval = r('NA')
-            y_vals.append(yval)
-            for k,col in enumerate(x_cols):
-                try:
-                    xval = float(fields[col])
-                except Exception, ex:
-                    xval = r('NA')
-                x_vals[k].append(xval)
-        except:
-            pass
-
-response_term = ""
-
-x_vals1 = numpy.asarray(x_vals).transpose()
-
-dat= r.list(x=array(x_vals1), y=y_vals)
-
-r.library("leaps")
-
-set_default_mode(NO_CONVERSION)
-try:
-    leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat))
-except RException, rex:
-    stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
-set_default_mode(BASIC_CONVERSION)
-
-summary = r.summary(leaps)
-tot = len(x_vals)
-pattern = "["
-for i in range(tot):
-    pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' '
-pattern = pattern.strip() + ']'
-print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern)
-for ind,item in enumerate(summary['outmat']):
-    print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
-
-
-r.pdf( outfile2, 8, 8 )
-r.plot(leaps, scale="Cp", main="Best subsets using Cp Criterion")
-r.plot(leaps, scale="r2", main="Best subsets using R-sq Criterion")
-r.plot(leaps, scale="adjr2", main="Best subsets using Adjusted R-sq Criterion")
-r.plot(leaps, scale="bic", main="Best subsets using bic Criterion")
-
-r.dev_off()
--- a/tools/regVariation/best_regression_subsets.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<tool id="BestSubsetsRegression1" name="Perform Best-subsets Regression">
-  <description> </description>
-  <command interpreter="python">
-    best_regression_subsets.py
-      $input1
-      $response_col
-      $predictor_cols
-      $out_file1
-      $out_file2
-      1>/dev/null
-      2>/dev/null
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="response_col" label="Response column (Y)" type="data_column" data_ref="input1" />
-    <param name="predictor_cols" label="Predictor columns (X)" type="data_column" data_ref="input1" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-    <data format="pdf" name="out_file2" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <!-- Testing this tool will not be possible because this tool produces a pdf output file.
-    -->
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool uses the 'regsubsets' function from R statistical package for regression subset selection. It outputs two files, one containing a table with the best subsets and the corresponding summary statistics, and the other containing the graphical representation of the results.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- This tool currently treats all predictor and response variables as continuous variables.
-
-- Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
-
-- The 6 columns in the output are described below:
-
-  - Column 1 (Vars): denotes the number of variables in the model
-  - Column 2 ([c2 c3 c4...]): represents a list of the user-selected predictor variables (full model). An asterix denotes the presence of the corresponding predictor variable in the selected model.
-  - Column 3 (R-sq): the fraction of variance explained by the model
-  - Column 4 (Adj. R-sq): the above R-squared statistic adjusted, penalizing for higher number of predictors (p)
-  - Column 5 (Cp): Mallow's Cp statistics
-  - Column 6 (bic): Bayesian Information Criterion.
-
-
-  </help>
-</tool>
--- a/tools/regVariation/categorize_elements_satisfying_criteria.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,172 +0,0 @@
-#!/usr/bin/perl -w
-
-# The program takes as input a set of categories, such that each category contains many elements.
-# It also takes a table relating elements with criteria, such that each element is assigned a number
-# representing the number of times the element satisfies a certain criterion.
-# The first input is a TABULAR format file, such that the left column represents the name of categories and,
-# all other columns represent the names of elements.
-# The second input is a TABULAR format file relating elements with criteria, such that the first line
-# represents the names of criteria and the left column represents the names of elements.
-# The output is a TABULAR format file relating catergories with criteria, such that each categoy is
-# assigned a number representing the total number of times its elements satisfies a certain criterion.
-# Each category is assigned as many numbers as criteria.
-
-use strict;
-use warnings;
-
-#variables to handle information of the categories input file
-my @categoryElementsArray = ();
-my @categoriesArray = ();
-my $categoryMemberNames;
-my $categoryName;
-my %categoryMembersHash = ();
-my $memberNumber = 0;
-my $totalMembersNumber = 0;
-my $totalCategoriesNumber = 0;
-my @categoryCountersTwoDimArray = ();
-my $lineCounter1 = 0;
-
-#variables to handle information of the criteria and elements data input file
-my $elementLine;
-my @elementDataArray = ();
-my $elementName;
-my @criteriaArray = ();
-my $criteriaNumber = 0;
-my $totalCriteriaNumber = 0;
-my $lineCounter2 = 0;
-
-#variable representing the row and column indices used to store results into a two-dimensional array
-my $row = 0;
-my $column = 0;
-
-# check to make sure having correct files
-my $usage = "usage: categorize_motifs_significance.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n";
-die $usage unless @ARGV == 3;
-
-#get the categories input file
-my $categories_inputFile = $ARGV[0];
-
-#get the criteria and data input file
-my $elements_data_inputFile = $ARGV[1];
-
-#get the output file
-my $categorized_data_outputFile = $ARGV[2];
-
-#open the input and output files
-open (INPUT1, "<", $categories_inputFile) || die("Could not open file $categories_inputFile \n");
-open (INPUT2, "<", $elements_data_inputFile ) || die("Could not open file $elements_data_inputFile  \n");
-open (OUTPUT, ">", $categorized_data_outputFile) || die("Could not open file $categorized_data_outputFile \n");
-
-#store the first input file into an array
-my @categoriesData = <INPUT1>;
-
-#reset the value of $lineCounter1 to 0
-$lineCounter1 = 0;
-
-#iterate through the first input file to get the names of categories and their corresponding elements
-foreach $categoryMemberNames (@categoriesData){
-	chomp ($categoryMemberNames);
-
-	@categoryElementsArray = split(/\t/, $categoryMemberNames);
-
-	#store the name of the current category into an array
-	$categoriesArray [$lineCounter1] = $categoryElementsArray[0];
-
-	#store the name of the current category into a two-dimensional array
-	$categoryCountersTwoDimArray [$lineCounter1] [0] = $categoryElementsArray[0];
-
-	#get the total number of elements in the current category
-	$totalMembersNumber = @categoryElementsArray;
-
-	#store the names of categories and their corresponding elements	into a hash
-	for ($memberNumber = 1; $memberNumber < $totalMembersNumber; $memberNumber++) {
-
-		$categoryMembersHash{$categoryElementsArray[$memberNumber]} = $categoriesArray[$lineCounter1];
-	}
-
-	$lineCounter1++;
-}
-
-#store the second input file into an array
-my @elementsData = <INPUT2>;
-
-#reset the value of $lineCounter2 to 0
-$lineCounter2 = 0;
-
-#iterate through the second input file in order to count the number of elements
-#in each category that satisfy each criterion
-foreach $elementLine (@elementsData){
-	chomp ($elementLine);
-
-	$lineCounter2++;
-
-	@elementDataArray = split(/\t/, $elementLine);
-
-	#if at the first line, get the total number of criteria and the total
-	#number of catergories and initialize the two-dimensional array
-	if ($lineCounter2 == 1){
-		@criteriaArray = @elementDataArray;
-		$totalCriteriaNumber = @elementDataArray;
-
-		$totalCategoriesNumber = @categoriesArray;
-
-		#initialize the two-dimensional array
-		for ($row = 0; $row < $totalCategoriesNumber; $row++) {
-
-			for ($column = 1; $column <= $totalCriteriaNumber; $column++) {
-
-				$categoryCountersTwoDimArray [$row][$column] = 0;
-			}
-		}
-	}
-	else{
-		#get the element data
-		$elementName = $elementDataArray[0];
-
-		#do the counting and store the result in the two-dimensional array
-		for ($criteriaNumber = 0; $criteriaNumber < $totalCriteriaNumber; $criteriaNumber++) {
-
-			if ($elementDataArray[$criteriaNumber + 1] > 0){
-
-				$categoryName = $categoryMembersHash{$elementName};
-
-				my ($categoryIndex) = grep $categoriesArray[$_] eq $categoryName, 0 .. $#categoriesArray;
-
-				$categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] = $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] + $elementDataArray[$criteriaNumber + 1];
-			}
-		}
-	}
-}
-
-print OUTPUT "\t";
-
-#store the criteria names into the output file
-for ($column = 1; $column <= $totalCriteriaNumber; $column++) {
-
-	if ($column < $totalCriteriaNumber){
-		print OUTPUT $criteriaArray[$column - 1] . "\t";
-	}
-	else{
-		print OUTPUT $criteriaArray[$column - 1] . "\n";
-	}
-}
-
-#store the category names and their corresponding number of elements satisfying criteria into the output file
-for ($row = 0; $row < $totalCategoriesNumber; $row++) {
-
-	for ($column = 0; $column <= $totalCriteriaNumber; $column++) {
-
-		if ($column < $totalCriteriaNumber){
-			print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\t";
-		}
-		else{
-			print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\n";
-		}
-	}
-}
-
-#close the input and output file
-close(OUTPUT);
-close(INPUT2);
-close(INPUT1);
-
--- a/tools/regVariation/categorize_elements_satisfying_criteria.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-<tool id="categorize_elements_satisfying_criteria" name="Categorize Elements" version="1.0.0">
-  <description>satisfying criteria</description>
-
-  <command interpreter="perl">
-  	categorize_elements_satisfying_criteria.pl $inputFile1 $inputFile2 $outputFile1
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select file containing categories and their elements"/>
-  	<param format="tabular" name="inputFile2" type="data" label="Select file containing criteria and elements data"/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="categories.tabular" ftype="tabular" />
-  		<param name="inputFile2" value="criteria_elements_data.tabular" ftype="tabular" />
-    	<output name="outputFile1" file="categorized_elements.tabular" />
-  	</test>
-  </tests>
-
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-The program takes as input a set of categories, such that each category contains many elements. It also takes a table relating elements with criteria, such that each element is assigned a number representing the number of times the element satisfies a certain criterion.
-
-- The first input is a TABULAR format file, such that the left column represents the names of categories and, all other columns represent the names of elements in each category.
-- The second input is a TABULAR format file relating elements with criteria, such that the first line represents the names of criteria and the left column represents the names of elements.
-- The output is a TABULAR format file relating catergories with criteria, such that each categoy is assigned a number representing the total number of times its elements satisfies a certain criterion.. Each category is assigned as many numbers as criteria.
-
-
-**Example**
-
-Let the first input file be a group of motif categories as follows::
-
-	Deletion_Hotspots		deletionHoptspot1		deletionHoptspot2		deletionHoptspot3
-	Dna_Pol_Pause_Frameshift	dnaPolPauseFrameshift1		dnaPolPauseFrameshift2		dnaPolPauseFrameshift3		dnaPolPauseFrameshift4
-	Indel_Hotspots			indelHotspot1
-	Insertion_Hotspots		insertionHotspot1		insertionHotspot2
-	Topoisomerase_Cleavage_Sites	topoisomeraseCleavageSite1	topoisomeraseCleavageSite2	topoisomeraseCleavageSite3
-
-
-And let the second input file represent the number of times each motif occurs in a certain window size of indel flanking regions, as follows::
-
-					10bp	20bp	40bp
-	deletionHoptspot1		1	1	2
-	deletionHoptspot2		1	1	1
-	deletionHoptspot3		0	0	0
-	dnaPolPauseFrameshift1		1	1	1
-	dnaPolPauseFrameshift2		0	2	1
-	dnaPolPauseFrameshift3		0	0	0
-	dnaPolPauseFrameshift4		0	1	2
-	indelHotspot1			0	0	0
-	insertionHotspot1		0	0	1
-	insertionHotspot2		1	1	1
-	topoisomeraseCleavageSite1	1	1	1
-	topoisomeraseCleavageSite2	1	2	1
-	topoisomeraseCleavageSite3	0	0	2
-
-Running the program will give the total number of times the motifs of each category occur in every window size of indel flanking regions::
-
-					10bp	20bp	40bp
-	Deletion_Hotspots		2	2	3
-	Dna_Pol_Pause_Frameshift	1	4	4
-	Indel_Hotspots			0	0	0
-	Insertion_Hotspots		1	1	2
-	Topoisomerase_Cleavage_Sites	2	3	4
-
-    </help>
-
-</tool>
--- a/tools/regVariation/compute_motif_frequencies_for_all_motifs.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,153 +0,0 @@
-#!/usr/bin/perl -w
-
-# a program to compute the frequencies of each motif at a window size, determined by the user, in both
-# upstream and downstream sequences flanking indels in all chromosomes.
-# the first input is a TABULAR format file containing the motif names and sequences, such that the file
-# consists of two columns: the left column represents the motif names and the right column represents
-# the motif sequence, one line per motif.
-# the second input is a TABULAR format file containing the windows into which both upstream and downstream
-# sequences flanking indels have been divided.
-# the fourth input is an integer number representing the number of windows to be considered in both
-# upstream and downstream flanking sequences.
-# the output is a TABULAR format file consisting of three columns: the left column represents the motif
-# name, the middle column represents the motif frequency in the window of the upstream sequence flanking
-# an indel, and the the right column represents the motif frequency in the window of the downstream
-# sequence flanking an indel, one line per indel.
-# The total number of lines in the output file = number of motifs x number of indels.
-
-use strict;
-use warnings;
-
-#variable to handle the window information
-my $window = "";
-my $windowNumber = 0;
-my $totalWindowsNumber = 0;
-my $upstreamAndDownstreamFlankingSequencesWindows = "";
-
-#variable to handle the motif information
-my $motif = "";
-my $motifName = "";
-my $motifSequence = "";
-my $motifNumber = 0;
-my $totalMotifsNumber = 0;
-my $upstreamMotifFrequencyCounter = 0;
-my $downstreamMotifFrequencyCounter = 0;
-
-#arrays to sotre window and motif data
-my @windowsArray = ();
-my @motifNamesArray = ();
-my @motifSequencesArray = ();
-
-#variable to handle the indel information
-my $indelIndex = 0;
-
-#variable to store line counter value
-my $lineCounter = 0;
-
-# check to make sure having correct files
-my $usage = "usage: compute_motif_frequencies_for_all_motifs.pl [TABULAR.in] [TABULAR.in] [windowSize] [TABULAR.out] \n";
-die $usage unless @ARGV == 4;
-
-#get the input arguments
-my $motifsInputFile = $ARGV[0];
-my $indelFlankingSequencesWindowsInputFile = $ARGV[1];
-my $numberOfConsideredWindows = $ARGV[2];
-my $motifFrequenciesOutputFile = $ARGV[3];
-
-#open the input files
-open (INPUT1, "<", $motifsInputFile) || die("Could not open file $motifsInputFile \n");
-open (INPUT2, "<", $indelFlankingSequencesWindowsInputFile) || die("Could not open file indelFlankingSequencesWindowsInputFile \n");
-open (OUTPUT, ">", $motifFrequenciesOutputFile) || die("Could not open file $motifFrequenciesOutputFile \n");
-
-#store the motifs input file in the array @motifsData
-my @motifsData = <INPUT1>;
-
-#iterated through the motifs (lines) of the motifs input file
-foreach $motif (@motifsData){
-	chomp ($motif);
-	#print ($motif . "\n");
-
-	#split the motif data into its name and its sequence
-	my @motifNameAndSequenceArray = split(/\t/, $motif);
-
-	#store the name of the motif into the array @motifNamesArray
-	push @motifNamesArray, $motifNameAndSequenceArray[0];
-
-	#store the sequence of the motif into the array @motifSequencesArray
-	push @motifSequencesArray, $motifNameAndSequenceArray[1];
-}
-
-#compute the size of the motif names array
-$totalMotifsNumber = @motifNamesArray;
-
-
-#store the first output file containing the windows of both upstream and downstream flanking sequences in the array @windowsData
-my @windowsData = <INPUT2>;
-
-#check if the number of considered window entered by the user is 0 or negative, if so make it equal to 1
-if ($numberOfConsideredWindows <= 0){
-	$numberOfConsideredWindows = 1;
-}
-
-#iterated through the motif sequences to check their occurrences in the considered windows
-#and store the count of their occurrences in the corresponding ouput file
-for ($motifNumber = 0; $motifNumber < $totalMotifsNumber; $motifNumber++){
-
-	#get the motif name
-	$motifName = $motifNamesArray[$motifNumber];
-
-	#get the motif sequence
-    $motifSequence = $motifSequencesArray[$motifNumber];
-
-	#iterated through the lines of the second input file. Each line represents
-	#the windows of the upstream and downstream flanking sequences of an indel
-	foreach $upstreamAndDownstreamFlankingSequencesWindows (@windowsData){
-
-		chomp ($upstreamAndDownstreamFlankingSequencesWindows);
-		$lineCounter++;
-
-		#split both upstream and downstream flanking sequences into their windows
-		my @windowsArray = split(/\t/, $upstreamAndDownstreamFlankingSequencesWindows);
-
-		if ($lineCounter == 1){
-			$totalWindowsNumber = @windowsArray;
-			$indelIndex = ($totalWindowsNumber - 1)/2;
-		}
-
-		#reset the motif frequency counters
-		$upstreamMotifFrequencyCounter = 0;
-		$downstreamMotifFrequencyCounter = 0;
-
-		#iterate through the considered windows of the upstream flanking sequence and increment the motif frequency counter
-		for ($windowNumber = $indelIndex - 1; $windowNumber > $indelIndex - $numberOfConsideredWindows - 1; $windowNumber--){
-
-			#get the window
-			$window = $windowsArray[$windowNumber];
-
-			#if the motif is found in the window, then increment its corresponding counter
-			if ($window =~ m/$motifSequence/i){
-	        	$upstreamMotifFrequencyCounter++;
-	        }
-		}
-
-		#iterate through the considered windows of the upstream flanking sequence and increment the motif frequency counter
-		for ($windowNumber = $indelIndex + 1; $windowNumber < $indelIndex + $numberOfConsideredWindows + 1; $windowNumber++){
-
-			#get the window
-		    $window = $windowsArray[$windowNumber];
-
-		    #if the motif is found in the window, then increment its corresponding counter
-			if ($window =~ m/$motifSequence/i){
-	        	$downstreamMotifFrequencyCounter++;
-	        }
-		}
-
-		#store the result into the output file of the motif
-		print OUTPUT $motifName . "\t" . $upstreamMotifFrequencyCounter . "\t" . $downstreamMotifFrequencyCounter . "\n";
-	}
-}
-
-#close the input and output files
-close(OUTPUT);
-close(INPUT2);
-close(INPUT1);
\ No newline at end of file
--- a/tools/regVariation/compute_motif_frequencies_for_all_motifs.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="compute_motif_frequencies_for_all_motifs" name="Compute Motif Frequencies For All Motifs" version="1.0.0">
-  <description>motif by motif</description>
-
-  <command interpreter="perl">
-  	compute_motif_frequencies_for_all_motifs.pl $inputFile1 $inputFile2 $inputWindowSize3 $outputFile1
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the motifs file"/>
-  	<param format="tabular" name="inputFile2" type="data" label="Select the indel flanking sequences windows file"/>
-    <param type="integer" name="inputWindowSize3" size="6" value="0" label="What is the number of 10bp windows in which the motif frequencies will be computed?" help="'0' = one window only"/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="motifs2.tabular" />
-  		<param name="inputFile2" value="flankingSequencesWindows10_2.tabular" />
-    	<param name="inputWindowSize3" value="0" />
-    	<output name="outputFile1" file="motifFrequencies_every_indels0.tabular" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="motifs2.tabular" />
-  		<param name="inputFile2" value="flankingSequencesWindows10_2.tabular" />
-    	<param name="inputWindowSize3" value="4" />
-    	<output name="outputFile1" file="motifFrequencies_every_indels4.tabular" />
-  	</test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program computes the frequencies of each motif at a window size, determined by the user, in both upstream and downstream sequences flanking indels in all chromosomes.
-
-- The first input is a TABULAR format file containing the motif names and sequences, one line per motif, such that the file consists of two columns:
-
- - The left column represents the motif names
- - The right column represents the motif sequence, as follows::
-
- 	dnaPolPauseFrameshift1	GAG
-	dnaPolPauseFrameshift2	ACG
-	xSites1			CCG
-
-- The second input is a TABULAR format file representing the windows of both upstream  and downstream flanking sequences. It consists of multiple left columns representing the windows of the upstream flanking sequences, followed by one column representing the indels, then followed by multiple right columns representing the windows of the downstream flanking sequences, as follows::
-
-	cgaggtcagg	agatcgagac	catcctggct	aacatggtga	aatcccgtct	ctactaaaaa	indel	aaatttatat	ttataaacaa	ttttaataca	cctatgttta	ttatacattt
-	GCCAGTTTAT	GGTCTAACAA	GGAGAGAAAC	AGGGGGCTGA	AGGGGTTTCT	TAACCTCCAG	indel	TTCCGGGCTC	TGTCCCTAAC	CCCCAGCTAG	GTAAGTGGCA	AAGCACTTCT
-	CAGTGGGACC	AAGCACTGAA	CCACTTTGGG	GAGAATCTCA	CACTGGGGCC	CTCTGACACC	indel	tatatatttt	tttttttttt	tttttttttt	tttttttttg	agatggtgtc
-	AGAGCAGCAG	CACCCACTTT	TGCAGTGTGT	GACGTTGGTG	GAGCCATCGA	AGTCTGTGCT	indel	GAGCCCTCCC	CAGTGCTCCG	AGGAGCTGCT	GTTCCCCCTG	GAGCTCAGAA
-
-- The third input is an integer number representing the number of windows to be considered starting from the indel and leftward for the upstream flanking sequence and, starting from the indel and rightward for the downstream flanking sequence.
-
-- The output is a TABULAR format file consisting of three columns:
-
- - The left column represents the motif name
- - The middle column represents the motif frequency in the specified windows of the upstream sequence flanking an indel
- - The right column represents the motif frequency in the specified windows of the downstream sequence flanking an indel
-
- There is line per indel in the output file, such that the total number of lines in the output file = number of motifs x number of indels.
-
-Note: The number of windows entered by the user must be a positive integer >= 1. if negative integer or 0 is entered by the user, the program will consider it as 1.
-
-  </help>
-
-</tool>
--- a/tools/regVariation/compute_motifs_frequency.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,252 +0,0 @@
-#!/usr/bin/perl -w
-
-# a program to compute the frequency of each motif at each window in both upstream and downstream sequences flanking indels
-# in a chromosome/genome.
-# the first input is a TABULAR format file containing the motif names and sequences, such that the file consists of two
-# columns: the left column represents the motif names and the right column represents the motif sequence, one line per motif.
-# the second input is a TABULAR format file containing the upstream and downstream sequences flanking indels, one line per indel.
-# the fourth input is an integer number representing the window size according to which the upstream and downstream sequences
-# flanking each indel will be divided.
-# the first output is a TABULAR format file containing the windows into which both upstream and downstream sequences flanking
-# indels are divided.
-# the second output is a TABULAR format file containing the motifs and their corresponding frequencies at each window in both
-# upstream and downstream sequences flanking indels, one line per motif.
-
-use strict;
-use warnings;
-
-#variable to handle the falnking sequences information
-my $sequence = "";
-my $upstreamFlankingSequence = "";
-my $downstreamFlankingSequence = "";
-my $discardedSequenceLength = 0;
-my $lengthOfDownstreamFlankingSequenceAfterTrimming = 0;
-
-#variable to handle the window information
-my $window = "";
-my $windowStartIndex = 0;
-my $windowNumber = 0;
-my $totalWindowsNumber = 0;
-my $totalNumberOfWindowsInUpstreamSequence = 0;
-my $totalNumberOfWindowsInDownstreamSequence = 0;
-my $totalWindowsNumberInBothFlankingSequences = 0;
-my $totalWindowsNumberInMotifCountersTwoDimArray = 0;
-my $upstreamAndDownstreamFlankingSequencesWindows = "";
-
-#variable to handle the motif information
-my $motif = "";
-my $motifSequence = "";
-my $motifNumber = 0;
-my $totalMotifsNumber = 0;
-
-#arrays to sotre window and motif data
-my @windowsArray = ();
-my @motifNamesArray = ();
-my @motifSequencesArray = ();
-my @motifCountersTwoDimArray = ();
-
-#variables to store line counter values
-my $lineCounter1 = 0;
-my $lineCounter2 = 0;
-
-# check to make sure having correct files
-my $usage = "usage: compute_motifs_frequency.pl [TABULAR.in] [TABULAR.in] [windowSize] [TABULAR.out] [TABULAR.out]\n";
-die $usage unless @ARGV == 5;
-
-#get the input and output arguments
-my $motifsInputFile = $ARGV[0];
-my $indelFlankingSequencesInputFile = $ARGV[1];
-my $windowSize = $ARGV[2];
-my $indelFlankingSequencesWindowsOutputFile = $ARGV[3];
-my $motifFrequenciesOutputFile = $ARGV[4];
-
-#open the input and output files
-open (INPUT1, "<", $motifsInputFile) || die("Could not open file $motifsInputFile \n");
-open (INPUT2, "<", $indelFlankingSequencesInputFile) || die("Could not open file $indelFlankingSequencesInputFile \n");
-open (OUTPUT1, ">", $indelFlankingSequencesWindowsOutputFile) || die("Could not open file $indelFlankingSequencesWindowsOutputFile \n");
-open (OUTPUT2, ">", $motifFrequenciesOutputFile) || die("Could not open file $motifFrequenciesOutputFile \n");
-
-#store the motifs input file in the array @motifsData
-my @motifsData = <INPUT1>;
-
-#iterated through the motifs (lines) of the motifs input file
-foreach $motif (@motifsData){
-	chomp ($motif);
-	#print ($motif . "\n");
-
-	#split the motif data into its name and its sequence
-	my @motifNameAndSequenceArray = split(/\t/, $motif);
-
-	#store the name of the motif into the array @motifNamesArray
-	push @motifNamesArray, $motifNameAndSequenceArray[0];
-
-	#store the sequence of the motif into the array @motifSequencesArray
-	push @motifSequencesArray, $motifNameAndSequenceArray[1];
-}
-
-#compute the size of the motif names array
-$totalMotifsNumber = @motifNamesArray;
-
-#store the input file in the array @sequencesData
-my @sequencesData = <INPUT2>;
-
-#iterated through the sequences of the second input file in order to create windwos file
-foreach $sequence (@sequencesData){
-	chomp ($sequence);
-	$lineCounter1++;
-
-	my @indelAndSequenceArray = split(/\t/, $sequence);
-
-	#get the upstream falnking sequence
-	$upstreamFlankingSequence = $indelAndSequenceArray[3];
-
-	#if the window size is 0, then the whole upstream will be one window only
-	if ($windowSize == 0){
-		$totalNumberOfWindowsInUpstreamSequence = 1;
-		$windowSize = length ($upstreamFlankingSequence);
-	}
-	else{
-		#compute the total number of windows into which the upstream flanking sequence will be divided
-		$totalNumberOfWindowsInUpstreamSequence = length ($upstreamFlankingSequence) / $windowSize;
-
-		#compute the length of the subsequence to be discared from the upstream flanking sequence if any
-		$discardedSequenceLength = length ($upstreamFlankingSequence) % $windowSize;
-
-		#check if the sequence could be split into windows of equal sizes
-	    if ($discardedSequenceLength != 0) {
-	    	#trim the upstream flanking sequence
-			$upstreamFlankingSequence = substr($upstreamFlankingSequence, $discardedSequenceLength);
-		}
-	}
-
-	#split the upstream flanking sequence into windows
-	for ($windowNumber = 0; $windowNumber < $totalNumberOfWindowsInUpstreamSequence; $windowNumber++){
-		$windowStartIndex = $windowNumber * $windowSize;
-		print OUTPUT1 (substr($upstreamFlankingSequence, $windowStartIndex, $windowSize) . "\t");
-	}
-
-	#add a column representing the indel
-	print OUTPUT1 ("indel" . "\t");
-
-	#get the downstream falnking sequence
-	$downstreamFlankingSequence = $indelAndSequenceArray[4];
-
-	#if the window size is 0, then the whole upstream will be one window only
-	if ($windowSize == 0){
-		$totalNumberOfWindowsInDownstreamSequence = 1;
-		$windowSize = length ($downstreamFlankingSequence);
-	}
-	else{
-		#compute the total number of windows into which the downstream flanking sequence will be divided
-		$totalNumberOfWindowsInDownstreamSequence = length ($downstreamFlankingSequence) / $windowSize;
-
-		#compute the length of the subsequence to be discared from the upstream flanking sequence if any
-		$discardedSequenceLength = length ($downstreamFlankingSequence) % $windowSize;
-
-		#check if the sequence could be split into windows of equal sizes
-	    if ($discardedSequenceLength != 0) {
-	    	#compute the length of the sequence to be discarded
-	    	$lengthOfDownstreamFlankingSequenceAfterTrimming = length ($downstreamFlankingSequence) - $discardedSequenceLength;
-
-	    	#trim the downstream flanking sequence
-			$downstreamFlankingSequence = substr($downstreamFlankingSequence, 0, $lengthOfDownstreamFlankingSequenceAfterTrimming);
-		}
-	}
-
-	#split the downstream flanking sequence into windows
-	for ($windowNumber = 0; $windowNumber < $totalNumberOfWindowsInDownstreamSequence; $windowNumber++){
-		$windowStartIndex = $windowNumber * $windowSize;
-		print OUTPUT1 (substr($downstreamFlankingSequence, $windowStartIndex, $windowSize) . "\t");
-	}
-
-	print OUTPUT1 ("\n");
-}
-
-#compute the total number of windows on both upstream and downstream sequences flanking the indel
-$totalWindowsNumberInBothFlankingSequences = $totalNumberOfWindowsInUpstreamSequence + $totalNumberOfWindowsInDownstreamSequence;
-
-#add an additional cell to store the name of the motif and another one for the indel itself
-$totalWindowsNumberInMotifCountersTwoDimArray = $totalWindowsNumberInBothFlankingSequences + 1 + 1;
-
-#initialize the two dimensional array $motifCountersTwoDimArray. the first column will be initialized with motif names
-for ($motifNumber = 0; $motifNumber < $totalMotifsNumber; $motifNumber++){
-
-	for ($windowNumber = 0; $windowNumber < $totalWindowsNumberInMotifCountersTwoDimArray; $windowNumber++){
-
-		if ($windowNumber == 0){
-			$motifCountersTwoDimArray [$motifNumber] [0] = $motifNamesArray[$motifNumber];
-		}
-		elsif ($windowNumber == $totalNumberOfWindowsInUpstreamSequence + 1){
-			$motifCountersTwoDimArray [$motifNumber] [$windowNumber] = "indel";
-		}
-		else{
-			$motifCountersTwoDimArray [$motifNumber] [$windowNumber] = 0;
-		}
-	}
-}
-
-close(OUTPUT1);
-
-#open the file the contains the windows of the upstream and downstream flanking sequences, which is actually the first output file
-open (INPUT3, "<", $indelFlankingSequencesWindowsOutputFile) || die("Could not open file $indelFlankingSequencesWindowsOutputFile \n");
-
-#store the first output file containing the windows of both upstream and downstream flanking sequences in the array @windowsData
-my @windowsData = <INPUT3>;
-
-#iterated through the lines of the first output file. Each line represents
-#the windows of the upstream and downstream flanking sequences of an indel
-foreach $upstreamAndDownstreamFlankingSequencesWindows (@windowsData){
-
-	chomp ($upstreamAndDownstreamFlankingSequencesWindows);
-	$lineCounter2++;
-
-	#split both upstream and downstream flanking sequences into their windows
-	my @windowsArray = split(/\t/, $upstreamAndDownstreamFlankingSequencesWindows);
-
-	$totalWindowsNumber = @windowsArray;
-
-	#iterate through the windows to search for matched motifs and increment their corresponding counters accordingly
-	WINDOWS:
-	for ($windowNumber = 0; $windowNumber < $totalWindowsNumber; $windowNumber++){
-
-		#get the window
-		$window = $windowsArray[$windowNumber];
-
-        #if the window is the one that contains the indel, then skip the indel window
-        if ($window eq "indel") {
-        	next WINDOWS;
-        }
-        else{  #iterated through the motif sequences to check their occurrences in the winodw
-               #and increment their corresponding counters accordingly
-
-	        for ($motifNumber = 0; $motifNumber < $totalMotifsNumber; $motifNumber++){
-	        	#get the motif sequence
-	        	$motifSequence = $motifSequencesArray[$motifNumber];
-
-	        	#if the motif is found in the window, then increment its corresponding counter
-	        	if ($window =~ m/$motifSequence/i){
-	        		$motifCountersTwoDimArray [$motifNumber] [$windowNumber + 1]++;
-	        	}
-	        }
-        }
-	}
-}
-
-#store the motif counters values in the second output file
-for ($motifNumber = 0; $motifNumber < $totalMotifsNumber; $motifNumber++){
-
-	for ($windowNumber = 0; $windowNumber <= $totalWindowsNumber; $windowNumber++){
-
-		print OUTPUT2 $motifCountersTwoDimArray [$motifNumber] [$windowNumber] . "\t";
-		#print ($motifCountersTwoDimArray [$motifNumber] [$windowNumber] . " ");
-	}
-	print OUTPUT2 "\n";
-	#print ("\n");
-}
-
-#close the input and output files
-close(OUTPUT2);
-close(OUTPUT1);
-close(INPUT3);
-close(INPUT2);
-close(INPUT1);
\ No newline at end of file
--- a/tools/regVariation/compute_motifs_frequency.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-<tool id="compute_motifs_frequency" name="Compute Motif Frequencies" version="1.0.0">
-  <description>in indel flanking regions</description>
-
-
-  <command interpreter="perl">
-    compute_motifs_frequency.pl $inputFile1 $inputFile2 $inputNumber3 $outputFile1 $outputFile2
-  </command>
-
-
-  <inputs>
-
-    <param format="tabular" name="inputFile1" type="data" label="Select motifs file"/>
-
-    <param format="tabular" name="inputFile2" type="data" label="Select indel flanking regions file from your history"/>
-
-    <param type="integer" name="inputNumber3" size="5" value="0" label="What is the size of each window?" help="'0' = all the upstream flanking sequence will be one window only, and the same for the downstream flanking sequence."/>
-
-  </inputs>
-
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-    <data format="tabular" name="outputFile2"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="motifs1.tabular" />
-  		<param name="inputFile2" value="indelsFlankingSequences1.tabular" />
-    	<param name="inputNumber3" value="0" />
-    	<output name="outputFile1" file="flankingSequencesWindows0.tabular" />
-    	<output name="outputFile2" file="motifFrequencies0.tabular" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="motifs1.tabular" />
-  		<param name="inputFile2" value="indelsFlankingSequences1.tabular" />
-    	<param name="inputNumber3" value="10" />
-    	<output name="outputFile1" file="flankingSequencesWindows10.tabular" />
-    	<output name="outputFile2" file="motifFrequencies10.tabular" />
-  	</test>
-  </tests>
-
-
-   <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program computes the frequency of motifs in the flanking regions of indels found in a chromosome or a genome.
-Each indel has an upstream flanking sequence and a downstream flanking one. Each of the upstream and downstream flanking
-sequences will be divided into a certain number of windows according to the window size input by the user.
-The frequency of a motif in a certain window in one of the two flanking sequences is the total sum of occurrences of
-that motif in that window of that flanking sequence over all indels. The indel flanking regions file will be taken
-from your history or it will be uploaded, whereas the motifs file should be uploaded.
-
-- The first input file is the motifs file and it is a tabular file consisting of two columns:
-
- - the first column represents the motif name
- - the second column represents the motif sequence, as follows::
-
-	dnaPolPauseFrameshift1	GAG
-	dnaPolPauseFrameshift2	ACG
-	xSites1			CCG
-
-- The second input file is the indels flanking regions file and it is a tabular file consisting of five columns:
-
- - the first column represents the indel start coordinate
- - the second column represents the indel end coordinate
- - the third column represents the indel length
- - the fourth column represents the upstream flanking sequence
- - the fifth column represents the upstream flanking sequence, as follows::
-
-  	16694766   16694768   3   GTGGGTCCTGCCCAGCCTCTGCCTCAGAGGGAAGAGTAGAGAACTGGG   AGAGCAGGTCCTTAGGGAGCCCGAGGAAGTCCCTGACGCCAGCTGTTCTCGCGGACGAA
-	25169542   25169545   4   caagcccacaagccttcagaccatagcaCGGGCTCCAGAGGTGTGAGG   CAGGTCAGGTGCTTTAGAAGTCAAAAACTCTCAGTAAGGCAAATCACCCCCTATCTCCT
-	41929580   41929585   6   ggctgtcgtatggaatctggggctcaggactctgtcccatttctctaa   accattctgcTTCAACCCAGACACTGACTGTTTTCCAAATTTACTTGTTTGTTTGTTTT
-
-
------
-
-.. class:: warningmark
-
-**Notes**
-
-- The lengths of the upstream flanking sequences must be equal for all indels.
-- The lengths of the downstream flanking sequences must be equal for all indels.
-- If the length of the upstream flanking sequence L is not an integer multiple of the window size S, in other words if L/S = m + r where m is the result of division and r is the remainder, then the upstream flanking sequence will be divided into m windows only starting from the indel, and the rest of the sequence will not be considered. The same rule applies to the downstream flanking sequence.
-
------
-
-The **output** of this program is two files:
-
-- The first output file is a tabular file and represents the windows of both upstream  and downstream flanking sequences. It consists of multiple left columns representing the windows of the upstream flanking sequence, followed by one column representing the indels, then followed by multiple right columns representing the windows of the downstream flanking sequence, as follows::
-
-	cgaggtcagg	agatcgagac	catcctggct	aacatggtga	aatcccgtct	ctactaaaaa	indel	aaatttatat	ttataaacaa	ttttaataca	cctatgttta	ttatacattt
-	GCCAGTTTAT	GGTCTAACAA	GGAGAGAAAC	AGGGGGCTGA	AGGGGTTTCT	TAACCTCCAG	indel	TTCCGGGCTC	TGTCCCTAAC	CCCCAGCTAG	GTAAGTGGCA	AAGCACTTCT
-	CAGTGGGACC	AAGCACTGAA	CCACTTTGGG	GAGAATCTCA	CACTGGGGCC	CTCTGACACC	indel	tatatatttt	tttttttttt	tttttttttt	tttttttttg	agatggtgtc
-	AGAGCAGCAG	CACCCACTTT	TGCAGTGTGT	GACGTTGGTG	GAGCCATCGA	AGTCTGTGCT	indel	GAGCCCTCCC	CAGTGCTCCG	AGGAGCTGCT	GTTCCCCCTG	GAGCTCAGAA
-
-- The second output file is a tabular file and represents the motif frequencies in every window of every flanking sequence. The first column on the left represents the names of motifs. The other columns represent the frequencies of motifs in the windows that correspond to the ones in the first output file, as follows::
-
-	dnaPolPauseFrameshift1	2	3	1	0	1	2	indel	0	2	2	1	3
-	dnaPolPauseFrameshift2	2	3	1	0	1	2	indel	0	2	2	1	3
-	xSites1			3	2	0	1	1	2	indel	1	1	3	2	3
-
-  </help>
-
-</tool>
--- a/tools/regVariation/compute_q_values.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-# A program to compute the q-values based on the p-values of multiple simultaneous tests.
-# The q-valules are computed using a specific R package created by John Storey called "qvalue".
-# The input is a TABULAR format file consisting of one column only that represents the p-values
-# of multiple simultaneous tests, one line for every p-value.
-# The first output is a TABULAR format file consisting of one column only that represents the q-values
-# corresponding to p-values, one line for every q-value.
-# the second output is a TABULAR format file consisting of three pages: the first page represents
-# the p-values histogram, the second page represents the q-values histogram, and the third page represents
-# the four Q-plots as introduced in the "qvalue" package manual.
-
-use strict;
-use warnings;
-use IO::Handle;
-use File::Temp qw/ tempfile tempdir /;
-my $tdir = tempdir( CLEANUP => 0 );
-
-# check to make sure having correct input and output files
-my $usage = "usage: compute_q_values.pl [TABULAR.in] [lambda] [pi0_method] [fdr_level] [robust] [TABULAR.out] [PDF.out] \n";
-die $usage unless @ARGV == 7;
-
-#get the input arguments
-my $p_valuesInputFile = $ARGV[0];
-my $lambdaValue =  $ARGV[1];
-my $pi0_method =  $ARGV[2];
-my $fdr_level =  $ARGV[3];
-my $robustValue =  $ARGV[4];
-my $q_valuesOutputFile = $ARGV[5];
-my $p_q_values_histograms_QPlotsFile = $ARGV[6];
-
-if($lambdaValue =~ /sequence/){
-	$lambdaValue = "seq(0, 0.95, 0.05)";
-}
-
-#open the input files
-open (INPUT, "<", $p_valuesInputFile) || die("Could not open file $p_valuesInputFile \n");
-open (OUTPUT1, ">", $q_valuesOutputFile) || die("Could not open file $q_valuesOutputFile \n");
-open (OUTPUT2, ">", $p_q_values_histograms_QPlotsFile) || die("Could not open file $p_q_values_histograms_QPlotsFile \n");
-#open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
-
-#save all error messages into the error file $errorFile using the error file handle ERROR
-#STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
-
-#warn "Hello Error File \n";
-
-#variable to store the name of the R script file
-my $r_script;
-
-# R script to implement the calcualtion of q-values based on multiple simultaneous tests p-values
-# construct an R script file and save it in a temp directory
-chdir $tdir;
-$r_script = "q_values_computation.r";
-
-open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n";
-print Rcmd "
-	#options(show.error.messages = FALSE);
-
-	#load necessary packages
-	suppressPackageStartupMessages(library(tcltk));
-	library(qvalue);
-
-	#read the p-values of the multiple simultaneous tests from the input file $p_valuesInputFile
-	p <- scan(\"$p_valuesInputFile\", quiet = TRUE);
-
-	#compute the q-values that correspond to the p-values of the multiple simultaneous tests
-	qobj <- qvalue(p, pi0.meth = \"$pi0_method\", lambda = $lambdaValue, fdr.level = $fdr_level, robust = $robustValue);
-	#qobj <- qvalue(p, pi0.meth = \"smoother\", lambda = seq(0, 0.95, 0.05), fdr.level = 0.05);
-	#qobj <- qvalue(p, pi0.meth = \"bootstrap\", fdr.level = 0.05);
-
-	#draw the p-values histogram, the q-values histogram, and the four Q-plots
-	# and save them on multiple pages of the output file $p_q_values_histograms_QPlotsFile
-	pdf(file = \"$p_q_values_histograms_QPlotsFile\", width = 6.25, height = 6, family = \"Times\", pointsize = 12, onefile = TRUE)
-	hist(qobj\$pvalues);
-	#dev.off();
-
-	hist(qobj\$qvalues);
-	#dev.off();
-
-	qplot(qobj);
-	dev.off();
-
-	#save the q-values in the output file $q_valuesOutputFile
-	qobj\$pi0 <- signif(qobj\$pi0,digits=6)
-	qwrite(qobj, filename=\"$q_valuesOutputFile\");
-
-	#options(show.error.messages = TRUE);
-	#eof\n";
-close Rcmd;
-
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
-
-#close the input and output and error files
-#close(ERROR);
-close(OUTPUT2);
-close(OUTPUT1);
-close(INPUT);
--- a/tools/regVariation/compute_q_values.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-<tool id="compute_q_values" name="Compute q-values" version="1.0.1">
-  <description>based on multiple simultaneous tests p-values</description>
-
-  <command interpreter="perl">
-  	compute_q_values.pl $inputFile1 $inputLambda2 $inputPI0_method3 $inputFDR_level4 $inputRobust5 $outputFile1 $outputFile2
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the p-values file"/>
-
-  	<param type="text" name="inputLambda2" size="100" value="sequence_from_0_to_0.95_increment_0.05" label="What is the lambda value?" help="Either choose the default sequence or one deciaml value between 0 and 1"/>
-
-  	<param name="inputPI0_method3" type="select" label="Choose the PI method:">
-    	<option value="smoother">smoother</option>
-      	<option value="bootstrap">bootstrap</option>
-    </param>
-
-    <param type="float" name="inputFDR_level4" size="5" value="" label="What is the FDR level?" help="The FDR level must be between 0 and 1"/>
-
-    <param name="inputRobust5" type="select" label="Do you want to make the estimate more robust:" help="Choose TRUE for small p-values">
-  		<option value="FALSE">FALSE</option>
-    	<option value="TRUE">TRUE</option>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-    <data format="pdf" name="outputFile2"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="p_values.tabular" ftype="tabular" />
-    	<param name="inputLambda2" value="sequence_from_0_to_0.95_increment_0.05" />
-    	<param name="inputPI0_method3" value="smoother" />
-    	<param name="inputFDR_level4" value="0.05" />
-    	<param name="inputRobust5" value="FALSE" />
-    	<output name="outputFile1" file="q_values.tabular" />
-    	<output name="outputFile1" file="p_q_hists_Q_plots.pdf" />
-  		</test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program computes the q-values based on the p-values of multiple simultaneous tests. The q-values are computed using a specific R package, created by John Storey and Alan Dabney, called "qvalue". The program takes five inputs:
-
-- The first input is a TABULAR format file consisting of one column only that represents the p-values of multiple simultaneous tests, one line for every p-value.
-- The second input is the lambda parameter. The user can choose either the default: seq(0, 0.95, 0.05) or a decimal number between 0.0 and 1.0.
-- The third input is PI method which is either "smoother" or "bootstrap".
-- The fourth input is the FDR (false discovery rate) level which is a decimal number between 0.0 and 1.0.
-- The fifth input is either TRUE or FALSE for the estimate robustness.
-
-The program gives two outputs:
-
-- The first output is a TABULAR format file consisting of three columns:
-
- - the left column represents the p-values of multiple simultaneous tests, one line for every p-value
- - the middle column represents the q-values corresponding to the p-values
- - the third column represent the significance values, either 1 for significant or 0 for non-significant
-
-- The second output is a PDF format file consisting of three pages:
-
- - the first page represents the p-values histogram
- - the second page represents the q-values histogram
- - the third page represents the four Q-plots as introduced in the "qvalue" package manual.
-
-
-**Example**
-
-Let us have the first input file of p-values as follows::
-
-	0.140627492
-	0.432249886
-	0.122120877
-	0.142010182
-	0.012909858
-	0.000142807
-	0.039841941
-	0.035173303
-	0.011340057
-	1.01E-05
-	0.212738282
-	0.091256284
-	0.547375415
-	0.189589833
-	6.18E-12
-	0.001235875
-	1.10E-05
-	9.75E-07
-	2.13E-18
-	2.54E-16
-	1.20E-19
-	9.76E-14
-	0.359181534
-	0.03661672
-	0.400459987
-	0.387436466
-	0.342075061
-	0.904129283
-	0.031152635
-
-Running the program will give the following output::
-
-	pi0: 0.140311054
-
-	FDR level: 0.05
-
-	p-value		q-value		significant
-	0.1406275	0.02889212	1
-	0.4322499	0.06514199	0
-	0.1221209	0.02760624	1
-	0.1420102	0.02889212	1
-	0.01290986	0.00437754	1
-	0.000142807	6.46E-05	1
-	0.03984194	0.01013235	1
-	0.0351733	0.009932946	1
-	0.01134006	0.004194811	1
-	1.01E-05	5.59E-06	1
-	0.2127383	0.03934711	1
-	0.09125628	0.02184257	1
-	0.5473754	0.07954578	0
-	0.1895898	0.03673547	1
-	6.18E-12	5.03E-12	1
-	0.001235875	0.00050288	1
-	1.10E-05	5.59E-06	1
-	9.75E-07	6.61E-07	1
-	2.13E-18	4.33E-18	1
-	2.54E-16	3.45E-16	1
-	1.20E-19	4.88E-19	1
-	9.76E-14	9.93E-14	1
-	0.3591815	0.06089654	0
-	0.03661672	0.009932946	1
-	0.40046	0.0626723	0
-	0.3874365	0.0626723	0
-	0.3420751	0.06051785	0
-	0.9041293	0.1268593	0
-	0.03115264	0.009750824	1
-
-
-.. image:: ./static/operation_icons/p_hist.png
-
-
-.. image:: ./static/operation_icons/q_hist.png
-
-
-.. image:: ./static/operation_icons/Q_plots.png
-
-
-  </help>
-
-</tool>
--- a/tools/regVariation/delete_overlapping_indels.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-#!/usr/bin/perl -w
-
-# This program detects overlapping indels in a chromosome and keeps all non-overlapping indels. As for overlapping indels,
-# the first encountered one is kept and all others are removed. It requires three inputs:
-# The first input is a TABULAR format file containing coordinates of indels in blocks extracted from multi-alignment.
-# The second input is an integer number representing the number of the column where indel start coordinates are stored in the input file.
-# The third input is an integer number representing the number of the column where indel end coordinates are stored in the input file.
-# The output is a TABULAR format file containing all non-overlapping indels in the input file, and the first encountered indel of overlapping ones.
-# Note: The number of the first column is 1.
-
-use strict;
-use warnings;
-
-#varaibles to handle information related to indels
-my $indel1 = "";
-my $indel2 = "";
-my @indelArray1 = ();
-my @indelArray2 = ();
-my $lineCounter1 = 0;
-my $lineCounter2 = 0;
-my $totalNumberofNonOverlappingIndels = 0;
-
-# check to make sure having correct files
-my $usage = "usage: delete_overlapping_indels.pl [TABULAR.in] [indelStartColumn] [indelEndColumn] [TABULAR.out]\n";
-die $usage unless @ARGV == 4;
-
-my $inputFile = $ARGV[0];
-my $indelStartColumn = $ARGV[1] - 1;
-my $indelEndColumn = $ARGV[2] - 1;
-my $outputFile = $ARGV[3];
-
-#verifie column numbers
-if ($indelStartColumn < 0 ){
-	die ("The indel start column number is invalid \n");
-}
-if ($indelEndColumn < 0 ){
-	die ("The indel end column number is invalid \n");
-}
-
-#open the input and output files
-open (INPUT, "<", $inputFile) || die ("Could not open file $inputFile \n");
-open (OUTPUT, ">", $outputFile) || die ("Could not open file $outputFile \n");
-
-#store the input file in the array @rawData
-my @indelsRawData = <INPUT>;
-
-#iterated through the indels of the input file
-INDEL1:
-foreach $indel1 (@indelsRawData){
-	chomp ($indel1);
-	$lineCounter1++;
-
-	#get the first indel
-	@indelArray1 = split(/\t/, $indel1);
-
-	#our purpose is to detect overlapping indels and to store one copy of them only in the output file
-	#all other non-overlapping indels will stored in the output file also
-
-	$lineCounter2 = 0;
-
-	#iterated through the indels of the input file
-	INDEL2:
-	foreach $indel2 (@indelsRawData){
-		chomp ($indel2);
-		$lineCounter2++;
-
-		if ($lineCounter2 > $lineCounter1){
-			#get the second indel
-			@indelArray2 = split(/\t/, $indel2);
-
- 			#check if the two indels are overlapping
- 			if (($indelArray2[$indelEndColumn] >= $indelArray1[$indelStartColumn] && $indelArray2[$indelEndColumn] <= $indelArray1[$indelEndColumn]) || ($indelArray2[$indelStartColumn] >= $indelArray1[$indelStartColumn] && $indelArray2[$indelStartColumn] <= $indelArray1[$indelEndColumn])){
- 				#print ("There is an overlap between" . "\n" . $indel1 . "\n" . $indel2 . "\n");
- 				#print("The two overlapping indels are located at the lines: " . $lineCounter1 . " " . $lineCounter2 . "\n\n");
-
- 				#break out of the loop and go back to the outerloop
- 				next INDEL1;
- 			}
- 			else{
- 				#print("The two non-overlaapping indels are located at the lines: " . $lineCounter1 . " " . $lineCounter2 . "\n");
- 			}
-		}
-	}
-
-	print OUTPUT $indel1 . "\n";
-	$totalNumberofNonOverlappingIndels++;
-}
-
-#print("The total number of indels is: " . $lineCounter1 . "\n");
-#print("The total number of non-overlapping indels is: " . $totalNumberofNonOverlappingIndels . "\n");
-
-#close the input and output files
-close(OUTPUT);
-close(INPUT);
\ No newline at end of file
--- a/tools/regVariation/delete_overlapping_indels.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<tool id="delete_overlapping_indels" name="Delete Overlapping Indels" version="1.0.0">
-  <description>from a chromosome indels file</description>
-
-  <command interpreter="perl">
-  	delete_overlapping_indels.pl $inputFile1 $inputIndelStartColumnNumber2 $inputIndelEndColumnNumber3 $outputFile1
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select indels file"/>
-  	<param type="data_column" name="inputIndelStartColumnNumber2" data_ref="inputFile1" accept_default="true" label="Choose the indel start coordinate column number" />
-    <param type="data_column" name="inputIndelEndColumnNumber3" data_ref="inputFile1" accept_default="true" label="Choose the the indel end coordinate column number" />
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFile1"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="indels1.tabular" />
-    	<param name="inputIndelStartColumnNumber2" value="5" />
-    	<param name="inputIndelEndColumnNumber3" value="6" />
-    	<output name="outputFile1" file="non_overlapping_indels1.tabular" />
-  	</test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program detects overlapping indels in a chromosome and keeps all non-overlapping indels. As for overlapping indels, the first encountered one is kept and all others are removed.
-It requires three inputs:
-
-- The first input is a TABULAR format file containing coordinates of indels in blocks extracted from multi-alignment.
-- The second input is an integer number representing the number of the column where indel start coordinates are stored in the input file.
-- The third input is an integer number representing the number of the column where indel end coordinates are stored in the input file.
-- The output is a TABULAR format file containing all non-overlapping indels in the input file, and the first encountered indel of overlapping ones.
-
-Note: The number of the first column is 1.
-
-
-**Example**
-
-Let us have the following insertions in the human genome. The start and end coordinates of insertions are on columns 5 and 6 respectively::
-
-	3	hg18.chr22_insert	3	hg18.chr22	14508610	14508612	3924	-	panTro2.chr2b	132518950	132518951	3910	+	rheMac2.chr17	14311798	14311799	3896	+
-	7	hg18.chr22_insert	13	hg18.chr22	14513678	14513690	348	-	panTro2.chr2b	132517876	132517877	321	+	rheMac2.chr17	14274462	14274463	337	+
-	7	hg18.chr22_insert	6	hg18.chr22	14513688	14513699	348	-	panTro2.chr2b	132517879	132517880	321	+	rheMac2.chr17	14274465	14274466	337	+
-	25	hg18.chr22_insert	9	hg18.chr22	14529501	14529509	385	-	panTro2.chr22	14528775	14528776	376	-	rheMac2.chr9	42869449	42869450	375	-
-	36	hg18.chr22_insert	4	hg18.chr22	14566316	14566319	540	-	panTro2.chr2b	132492077	132492078	533	+	rheMac2.chr10	59230438	59230439	533	-
-	40	hg18.chr22_insert	7	hg18.chr22	14508610	14508616	2337	-	panTro2.chr2b	132487750	132487751	2313	+	rheMac2.chr10	59128305	59128306	2332	+
-	41	hg18.chr22_insert	4	hg18.chr22	14571556	14571559	2483	-	panTro2.chr2b	132485878	132485879	2481	+	rheMac2.chr10	59126094	59126095	2508	+
-
-By removing the overlapping indels which, we get::
-
-	3	hg18.chr22_insert	3	hg18.chr22	14508610	14508612	3924	-	panTro2.chr2b	132518950	132518951	3910	+	rheMac2.chr17	14311798	14311799	3896	+
-	7	hg18.chr22_insert	13	hg18.chr22	14513678	14513690	348	-	panTro2.chr2b	132517876	132517877	321	+	rheMac2.chr17	14274462	14274463	337	+
-	25	hg18.chr22_insert	9	hg18.chr22	14529501	14529509	385	-	panTro2.chr22	14528775	14528776	376	-	rheMac2.chr9	42869449	42869450	375	-
-	36	hg18.chr22_insert	4	hg18.chr22	14566316	14566319	540	-	panTro2.chr2b	132492077	132492078	533	+	rheMac2.chr10	59230438	59230439	533	-
-	41	hg18.chr22_insert	4	hg18.chr22	14571556	14571559	2483	-	panTro2.chr2b	132485878	132485879	2481	+	rheMac2.chr10	59126094	59126095	2508	+
-
-  </help>
-
-</tool>
\ No newline at end of file
--- a/tools/regVariation/draw_stacked_barplots.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#!/usr/bin/perl -w
-
-# This program draws, in a pdf file, a stacked bars plot for different categories of data and for
-# different criteria. For each criterion a stacked bar is drawn, such that the height of each stacked
-# sub-bar represents the number of elements in each category satisfying that criterion.
-# The input consists of a TABULAR format file, where the left column represents the names of categories
-# and the other columns are headed by the names of criteria, such that each data value in the file
-# represents the number of elements in a certain category satisfying a certain criterion.
-# The output is a PDF file containing a stacked bars plot representing the number of elements in each
-# category satisfying each criterion. The drawing is done using R code.
-
-
-use strict;
-use warnings;
-
-my $criterion;
-my @criteriaArray = ();
-my $criteriaNumber = 0;
-my $lineCounter = 0;
-
-#variable to store the names of R script file
-my $r_script;
-
-# check to make sure having correct files
-my $usage = "usage: draw_stacked_bar_plot.pl [TABULAR.in] [PDF.out] \n";
-die $usage unless @ARGV == 2;
-
-my $categoriesInputFile = $ARGV[0];
-
-my $categories_criteria_bars_plot_outputFile = $ARGV[1];
-
-#open the input file
-open (INPUT, "<", $categoriesInputFile) || die("Could not open file $categoriesInputFile \n");
-open (OUTPUT, ">", $categories_criteria_bars_plot_outputFile) || die("Could not open file $categories_criteria_bars_plot_outputFile \n");
-
-# R script to implement the drawing of a stacked bar plot representing thes significant motifs in each category of motifs
-#construct an R script file
-$r_script = "motif_significance_bar_plot.r";
-open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n";
-print Rcmd "
-			#store the table content of the first file into a matrix
-			categoriesTable <- read.table(\"$categoriesInputFile\", header = TRUE);
-			categoriesMatrix <- as.matrix(categoriesTable);
-
-
-			#compute the sum of elements in the column with the maximum sum in each matrix
-			columnSumsVector <- colSums(categoriesMatrix);
-			maxColumn <- max (columnSumsVector);
-
-			if (maxColumn %% 10 != 0){
-				maxColumn <- maxColumn + 10;
-			}
-
-			plotHeight = maxColumn/8;
-			criteriaVector <- names(categoriesTable);
-
-			pdf(file = \"$categories_criteria_bars_plot_outputFile\", width = length(criteriaVector), height = plotHeight, family = \"Times\", pointsize = 12, onefile = TRUE);
-
-
-
-			#draw the first barplot
-			barplot(categoriesMatrix, ylab = \"No. of elements in each category\", xlab = \"Criteria\", ylim = range(0, maxColumn), col = \"black\", density = c(10, 20, 30, 40, 50, 60, 70, 80), angle = c(45, 90, 135), names.arg = criteriaVector);
-
-			#draw the legend
-			legendX = 0.2;
-			legendY = maxColumn;
-
-			legend (legendX, legendY, legend = rownames(categoriesMatrix), density = c(10, 20, 30, 40, 50, 60, 70, 80), angle = c(45, 90, 135));
-
-   			dev.off();
-
-			#eof\n";
-close Rcmd;
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
-
-#close the input files
-close(OUTPUT);
-close(INPUT);
--- a/tools/regVariation/draw_stacked_barplots.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-<tool id="draw_stacked_barplots" name="Draw Stacked Bar Plots" version="1.0.0">
-  <description>for different categories and different criteria</description>
-
-  <command interpreter="perl">
-  	draw_stacked_barplots.pl $inputFile1 $outputFile1
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the input file"/>
-  </inputs>
-
-  <outputs>
-    <data format="pdf" name="outputFile1"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="categories_criteria.tabular" />
-    	<output name="outputFile1" file="stacked_barplot.pdf" />
-  	</test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program draws, in a pdf file, a stacked bars plot for different categories of data and for different criteria. For each criterion a stacked bar is
-drawn, such that the height of each stacked sub-bar represents the number of elements in each category satisfying that criterion.
-
-- The input consists of a TABULAR format file, where the left column represents the names of categories and the other columns are headed by the names of criteria, such that each data value in the file represents the number of elements in a certain category satisfying a certain criterion:
-
-- The output is a PDF file containing a stacked bars plot representing the number of elements in each category satisfying each criterion. The drawing is done using R code.
-
-**Example**
-
-Let us suppose that the input file represent the number of significant motifs in each motif category for each window size::
-
- 						10bp	20bp	40bp	80bp	160bp	320bp	640bp	1280bp
-	Deletion_Hotspots			2	3	4	4	5	6	7	7
-	Dna_Pol_Pause/Frameshift_Hotspots	8	10	14	17	18	15	19	20
-	Indel_Hotspots				1	1	1	2	1	0	0	0
-	Insertion_Hotspots			0	0	1	2	2	2	2	5
-	Topoisomerase_Cleavage_Sites		2	3	5	4	3	3	4	4
-	Translin_Targets			0	0	2	2	3	3	3	2
-	VDJ_Recombination_Signals		0	0	1	1	1	2	2	2
-	X-like_Sites				4	4	4	5	6	7	7	10
-
-
-Runnig the program will give the following output::
-
-	The stacked bars plot representing the data in the input file.
-
-.. image:: ./static/operation_icons/stacked_bars_plot.png
-
-  </help>
-
-</tool>
--- a/tools/regVariation/featureCounter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-"""
-Calculate count and coverage of one query on another, and append the Coverage and counts to
-the last four columns as bases covered, percent coverage, number of completely present features, number of partially present/overlapping features.
-
-usage: %prog bed_file_1 bed_file_2 out_file
-    -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
-    -2, --cols2=N,N,N,N: Columns for chr, start, end, strand in second file
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
-from bx.intervals.io import *
-from bx.cookbook import doc_optparse
-from bx.intervals.operations import quicksect
-from galaxy.tools.util.galaxyops import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def counter(node, start, end):
-    global full, partial
-    if node.start <= start and node.maxend > start:
-        if node.end >= end or (node.start == start and end > node.end > start):
-            full += 1
-        elif end > node.end > start:
-            partial += 1
-        if node.left and node.left.maxend > start:
-            counter(node.left, start, end)
-        if node.right:
-            counter(node.right, start, end)
-    elif start < node.start < end:
-        if node.end <= end:
-            full += 1
-        else:
-            partial += 1
-        if node.left and node.left.maxend > start:
-            counter(node.left, start, end)
-        if node.right:
-            counter(node.right, start, end)
-    else:
-        if node.left:
-            counter(node.left, start, end)
-
-def count_coverage( readers, comments=True ):
-    primary = readers[0]
-    secondary = readers[1]
-    secondary_copy = readers[2]
-
-    rightTree = quicksect.IntervalTree()
-    for item in secondary:
-        if type( item ) is GenomicInterval:
-            rightTree.insert( item, secondary.linenum, item.fields )
-
-    bitsets = secondary_copy.binned_bitsets()
-
-    global full, partial
-
-    for interval in primary:
-        if type( interval ) is Header:
-            yield interval
-        if type( interval ) is Comment and comments:
-            yield interval
-        elif type( interval ) == GenomicInterval:
-            chrom = interval.chrom
-            start = int(interval.start)
-            end = int(interval.end)
-            full = 0
-            partial = 0
-            if chrom not in bitsets:
-                bases_covered = 0
-                percent = 0.0
-                full = 0
-                partial = 0
-            else:
-                bases_covered = bitsets[ chrom ].count_range( start, end-start )
-                if (end - start) == 0:
-                    percent = 0
-                else:
-                    percent = float(bases_covered) / float(end - start)
-                if bases_covered:
-                    root = rightTree.chroms[chrom]    #root node for the chrom tree
-                    counter(root, start, end)
-            interval.fields.append(str(bases_covered))
-            interval.fields.append(str(percent))
-            interval.fields.append(str(full))
-            interval.fields.append(str(partial))
-            yield interval
-
-def main():
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
-        in1_fname, in2_fname, out_fname = args
-    except:
-        stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
-
-    g1 = NiceReaderWrapper( fileinput.FileInput( in1_fname ),
-                            chrom_col=chr_col_1,
-                            start_col=start_col_1,
-                            end_col=end_col_1,
-                            strand_col=strand_col_1,
-                            fix_strand=True )
-    g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
-                            chrom_col=chr_col_2,
-                            start_col=start_col_2,
-                            end_col=end_col_2,
-                            strand_col=strand_col_2,
-                            fix_strand=True )
-    g2_copy = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
-                                 chrom_col=chr_col_2,
-                                 start_col=start_col_2,
-                                 end_col=end_col_2,
-                                 strand_col=strand_col_2,
-                                 fix_strand=True )
-
-
-    out_file = open( out_fname, "w" )
-
-    try:
-        for line in count_coverage([g1,g2,g2_copy]):
-            if type( line ) is GenomicInterval:
-                out_file.write( "%s\n" % "\t".join( line.fields ) )
-            else:
-                out_file.write( "%s\n" % line )
-    except ParseError, exc:
-        out_file.close()
-        fail( str( exc ) )
-
-    out_file.close()
-
-    if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
-    if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 2nd dataset" )
-    elif g2_copy.skipped > 0:
-        print skipped( g2_copy, filedesc=" of 2nd dataset" )
-
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/featureCounter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="featureCoverage1" name="Feature coverage" version="2.0.0">
-  <description></description>
-  <command interpreter="python">featureCounter.py $input1 $input2 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}</command>
-  <inputs>
-    <param format="interval" name="input1" type="data" help="First dataset">
-      <label>What portion of</label>
-    </param>
-    <param format="interval" name="input2" type="data" help="Second dataset">
-      <label>is covered by</label>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="interval" name="output" metadata_source="input1" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <output name="output" file="6_feature_coverage.bed" />
-    </test>
-    <test>
-      <param name="input1" value="chrY1.bed" />
-      <param name="input2" value="chrY2.bed" />
-      <output name="output" file="chrY_Coverage.bed" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool finds the coverage of intervals in the first dataset on intervals in the second dataset. The coverage and count are appended as 4 new columns in the resulting dataset.
-
------
-
-**Example**
-
-- If **First dataset** consists of the following windows::
-
-    chrX 1     10001 seg 0 -
-    chrX 10001 20001 seg 0 -
-    chrX 20001 30001 seg 0 -
-    chrX 30001 40001 seg 0 -
-
-- and **Second dataset** consists of the following exons::
-
-    chrX 5000  6000  seg2 0 -
-    chrX 5500  7000  seg2 0 -
-    chrX 9000  22000 seg2 0 -
-    chrX 24000 34000 seg2 0 -
-    chrX 36000 38000 seg2 0 -
-
-- the **Result** is the coverage of exons of the second dataset in each of the windows contained in first dataset::
-
-    chrX 1     10001 seg 0 - 3001  0.3001 2 1
-    chrX 10001 20001 seg 0 - 10000 1.0    1 0
-    chrX 20001 30001 seg 0 - 8000  0.8    0 2
-    chrX 30001 40001 seg 0 - 5999  0.5999 1 1
-
-- To clarify, the following line of output ( added columns are indexed by a, b and c )::
-
-                         a    b      c d
-    chrX 1 10001 seg 0 - 3001 0.3001 2 1
-
-  implies that 2 exons (c) fall fully in this window (chrX:1-10001), 1 exon (d) partially overlaps this window, and these 3 exons cover 30.01% (c) of the window size, spanning 3001 nucleotides (a).
-
-  * a: number of nucleotides in this window covered by the features in (c) and (d) - features overlapping with each other will be merged to calculate (a)
-  * b: fraction of window size covered by features in (c) and (d) - features overlapping with each other will be merged to calculate (b)
-  * c: number of features in the 2nd dataset that fall **completely** within this window
-  * d: number of features in the 2nd dataset that **partially** overlap this window
-
-</help>
-</tool>
--- a/tools/regVariation/getIndelRates_3way.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,249 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
-import sys, os, tempfile
-import traceback
-import fileinput
-from warnings import warn
-
-from galaxy.tools.util.galaxyops import *
-from bx.intervals.io import *
-
-from bx.intervals.operations import quicksect
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def counter(node, start, end, sort_col):
-    global full, blk_len, blk_list
-    if node.start < start:
-        if node.right:
-            counter(node.right, start, end, sort_col)
-    elif start <= node.start <= end and start <= node.end <= end:
-        full += 1
-        if node.other[0] not in blk_list:
-            blk_list.append(node.other[0])
-            blk_len += int(node.other[sort_col+2])
-        if node.left and node.left.maxend > start:
-            counter(node.left, start, end, sort_col)
-        if node.right:
-            counter(node.right, start, end, sort_col)
-    elif node.start > end:
-        if node.left:
-            counter(node.left, start, end, sort_col)
-
-
-infile = sys.argv[1]
-fout = open(sys.argv[2],'w')
-int_file = sys.argv[3]
-if int_file != "None": #User has specified an interval file
-    try:
-        fint = open(int_file, 'r')
-        dbkey_i = sys.argv[4]
-        chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
-    except:
-        stop_err("Unable to open input Interval file")
-
-def main():
-
-    for i, line in enumerate( file ( infile )):
-        line = line.rstrip('\r\n')
-        if len( line )>0 and not line.startswith( '#' ):
-            elems = line.split( '\t' )
-            break
-        if i == 30:
-            break # Hopefully we'll never get here...
-
-    if len( elems ) != 18:
-        stop_err( "This tool only works on tabular data output by 'Fetch Indels from 3-way alignments' tool. The data in your input dataset is either missing or not formatted properly." )
-
-    for i, line in enumerate( file ( infile )):
-        line = line.rstrip('\r\n')
-        elems = line.split('\t')
-        try:
-            assert int(elems[0])
-            assert len(elems) == 18
-            if int_file != "None":
-                if dbkey_i not in elems[3] and  dbkey_i not in elems[8] and dbkey_i not in elems[13]:
-                    stop_err("The species build corresponding to your interval file is not present in the Indel file.")
-                if dbkey_i in elems[3]:
-                    sort_col = 4
-                elif dbkey_i in elems[8]:
-                    sort_col = 9
-                elif dbkey_i in elems[13]:
-                    sort_col = 14
-            else:
-                species = []
-                species.append( elems[3].split('.')[0] )
-                species.append( elems[8].split('.')[0] )
-                species.append( elems[13].split('.')[0] )
-                sort_col = 0    #Based on block numbers
-            break
-        except:
-            continue
-
-
-    fin = open(infile, 'r')
-    skipped = 0
-
-    if int_file == "None":
-        sorted_infile = tempfile.NamedTemporaryFile()
-        cmdline = "sort -n -k"+str(1)+" -o "+sorted_infile.name+" "+infile
-        try:
-            os.system(cmdline)
-        except:
-            stop_err("Encountered error while sorting the input file.")
-        print >>fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" %(species[0],species[1],species[2],species[0],species[1],species[2])
-        prev_bnum = -1
-        sorted_infile.seek(0)
-        for line in sorted_infile.readlines():
-            line = line.rstrip('\r\n')
-            elems = line.split('\t')
-            try:
-                assert int(elems[0])
-                assert len(elems) == 18
-                new_bnum = int(elems[0])
-                if new_bnum != prev_bnum:
-                    if prev_bnum != -1:
-                        irate = []
-                        drate = []
-                        for i,elem in enumerate(inserts):
-                            try:
-                                irate.append(str("%.2e" %(inserts[i]/blen[i])))
-                            except:
-                                irate.append('0')
-                            try:
-                                drate.append(str("%.2e" %(deletes[i]/blen[i])))
-                            except:
-                                drate.append('0')
-                        print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate))
-                    inserts = [0.0, 0.0, 0.0]
-                    deletes = [0.0, 0.0, 0.0]
-                    blen = []
-                    blen.append( int(elems[6]) )
-                    blen.append( int(elems[11]) )
-                    blen.append( int(elems[16]) )
-                line_sp = elems[1].split('.')[0]
-                sp_ind = species.index(line_sp)
-                if elems[1].endswith('insert'):
-                    inserts[sp_ind] += 1
-                elif elems[1].endswith('delete'):
-                    deletes[sp_ind] += 1
-                prev_bnum = new_bnum
-            except Exception, ei:
-                #print >>sys.stderr, ei
-                continue
-        irate = []
-        drate = []
-        for i,elem in enumerate(inserts):
-            try:
-                irate.append(str("%.2e" %(inserts[i]/blen[i])))
-            except:
-                irate.append('0')
-            try:
-                drate.append(str("%.2e" %(deletes[i]/blen[i])))
-            except:
-                drate.append('0')
-        print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate))
-        sys.exit()
-
-
-    inf = open(infile, 'r')
-    start_met = False
-    end_met = False
-    sp_file = tempfile.NamedTemporaryFile()
-    for n, line in enumerate(inf):
-        line = line.rstrip('\r\n')
-        elems = line.split('\t')
-        try:
-            assert int(elems[0])
-            assert len(elems) == 18
-            if dbkey_i not in elems[1]:
-                if not(start_met):
-                    continue
-                else:
-                    sp_end = n
-                    break
-            else:
-                print >>sp_file, line
-                if not(start_met):
-                    start_met = True
-                    sp_start = n
-        except:
-            continue
-
-    try:
-        assert sp_end
-    except:
-        sp_end = n+1
-
-    sp_file.seek(0)
-    win = NiceReaderWrapper( fileinput.FileInput( int_file ),
-                                chrom_col=chr_col_i,
-                                start_col=start_col_i,
-                                end_col=end_col_i,
-                                strand_col=strand_col_i,
-                                fix_strand=True)
-
-    indel = NiceReaderWrapper( fileinput.FileInput( sp_file.name ),
-                                chrom_col=1,
-                                start_col=sort_col,
-                                end_col=sort_col+1,
-                                strand_col=-1,
-                                fix_strand=True)
-
-    indelTree = quicksect.IntervalTree()
-    for item in indel:
-        if type( item ) is GenomicInterval:
-            indelTree.insert( item, indel.linenum, item.fields )
-    result=[]
-
-    global full, blk_len, blk_list
-    for interval in win:
-        if type( interval ) is Header:
-            pass
-        if type( interval ) is Comment:
-            pass
-        elif type( interval ) == GenomicInterval:
-            chrom = interval.chrom
-            start = int(interval.start)
-            end = int(interval.end)
-            if start > end:
-                warn( "Interval start after end!" )
-            ins_chr = "%s.%s_insert" %(dbkey_i,chrom)
-            del_chr = "%s.%s_delete" %(dbkey_i,chrom)
-            irate = 0
-            drate = 0
-            if ins_chr not in indelTree.chroms and del_chr not in indelTree.chroms:
-                pass
-            else:
-                if ins_chr in indelTree.chroms:
-                    full = 0.0
-                    blk_len = 0
-                    blk_list = []
-                    root = indelTree.chroms[ins_chr]    #root node for the chrom insertion tree
-                    counter(root, start, end, sort_col)
-                    if blk_len:
-                        irate = full/blk_len
-
-                if del_chr in indelTree.chroms:
-                    full = 0.0
-                    blk_len = 0
-                    blk_list = []
-                    root = indelTree.chroms[del_chr]    #root node for the chrom insertion tree
-                    counter(root, start, end, sort_col)
-                    if blk_len:
-                        drate = full/blk_len
-
-            interval.fields.append(str("%.2e" %irate))
-            interval.fields.append(str("%.2e" %drate))
-            print >>fout, "\t".join(interval.fields)
-            fout.flush()
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/tools/regVariation/getIndelRates_3way.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="indelRates_3way" name="Estimate Indel Rates" version="1.0.0">
-  <description> for 3-way alignments</description>
-  <command interpreter="python">
-    getIndelRates_3way.py $input1 $out_file1
-    #if $region.type == "align"
-        "None"
-    #else
-        $region.input2 $input2.dbkey $input2.metadata.chromCol,$input2.metadata.startCol,$input2.metadata.endCol,$input2.metadata.strandCol
-    #end if
-  </command>
-  <inputs>
-    <page>
-        <param format="tabular" name="input1" type="data" label="Select dataset containing Indels"/>
-
-        <conditional name="region">
-          <param name="type" type="select" label="Estimate rates corresponding to" multiple="false">
-            <option value="win" selected="True">Intervals in your history</option>
-            <option value="align">Alignment block</option>
-         </param>
-         <when value="win">
-            <param format="interval" name="input2" type="data" label="Choose intervals">
-                <validator type="unspecified_build" />
-            </param>
-          </when>
-          <when value="align" />
-      </conditional>
-
-    </page>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" metadata_source="input1"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input1" value="indels_3way.tabular"/>
-      <param name="type" value="align"/>
-      <output name="out_file1" file="indelrates_3way.tabular"/>
-    </test>
-  </tests>
-
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool estimates the insertion and deletion rates for alignments in a window of specified size. Rates are computed over the total adjusted lengths (adjusted by disregarding masked bases) of all the alignments blocks from the indel file that fall within that window.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-This tool only works on the output of the 'Estimate Indel Rates for 3-way alignments' tool.
-
-</help>
-
-
-</tool>
--- a/tools/regVariation/getIndels.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Estimate INDELs for pair-wise alignments.
-
-usage: %prog maf_input out_file1 out_file2
-"""
-
-from __future__ import division
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-try:
-    pkg_resources.require("numpy")
-except:
-    pass
-import psyco_full
-import sys
-from bx.cookbook import doc_optparse
-from galaxy.tools.exception_handling import *
-import bx.align.maf
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    # Parsing Command Line here
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        inp_file, out_file1 = args
-    except:
-        print >> sys.stderr, "Tool initialization error."
-        sys.exit()
-
-    try:
-        fin = open(inp_file,'r')
-    except:
-        print >> sys.stderr, "Unable to open input file"
-        sys.exit()
-    try:
-        fout1 = open(out_file1,'w')
-        #fout2 = open(out_file2,'w')
-    except:
-        print >> sys.stderr, "Unable to open output file"
-        sys.exit()
-
-    try:
-        maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
-    except:
-        print >>sys.stderr, "Your MAF file appears to be malformed."
-        sys.exit()
-    maf_count = 0
-
-    print >>fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length"
-    for block_ind, block in enumerate(maf_reader):
-        if len(block.components) < 2:
-            continue
-        seq1 = block.components[0].text
-        src1 = block.components[0].src
-        start1 = block.components[0].start
-        if len(block.components) == 2:
-            seq2 = block.components[1].text
-            src2 = block.components[1].src
-            start2 = block.components[1].start
-            #for pos in range(len(seq1)):
-            nt_pos1 = start1-1    #position of the nucleotide (without counting gaps)
-            nt_pos2 = start2-1
-            pos = 0        #character column position
-            gaplen1 = 0
-            gaplen2 = 0
-            prev_pos_gap1 = 0
-            prev_pos_gap2 = 0
-            while pos < len(seq1):
-                if prev_pos_gap1 == 0:
-                    gaplen1 = 0
-                if prev_pos_gap2 == 0:
-                    gaplen2 = 0
-
-                if seq1[pos] == '-':
-                    if seq2[pos] != '-':
-                        nt_pos2 += 1
-                        gaplen1 += 1
-                        prev_pos_gap1 = 1
-                        #write 2
-                        if prev_pos_gap2 == 1:
-                            prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1,nt_pos1+1,nt_pos2-1,nt_pos2-1+gaplen2,gaplen2)
-                        if pos == len(seq1)-1:
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1,nt_pos1+1,nt_pos2+1-gaplen1,nt_pos2+1,gaplen1)
-                    else:
-                        prev_pos_gap1 = 0
-                        prev_pos_gap2 = 0
-                        """
-                        if prev_pos_gap1 == 1:
-                            prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,gaplen1)
-                        elif prev_pos_gap2 == 1:
-                            prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos2-1,nt_pos2,gaplen2)
-                        """
-                else:
-                    nt_pos1 += 1
-                    if seq2[pos] != '-':
-                        nt_pos2 += 1
-                        #write both
-                        if prev_pos_gap1 == 1:
-                            prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2-gaplen1,nt_pos2,gaplen1)
-                        elif prev_pos_gap2 == 1:
-                            prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1-gaplen2,nt_pos1,nt_pos2-1,nt_pos2,gaplen2)
-                    else:
-                        gaplen2 += 1
-                        prev_pos_gap2 = 1
-                        #write 1
-                        if prev_pos_gap1 == 1:
-                            prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2,nt_pos2+gaplen1,gaplen1)
-                        if pos == len(seq1)-1:
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1+1-gaplen2,nt_pos1+1,nt_pos2,nt_pos2+1,gaplen2)
-                pos += 1
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/getIndels_2way.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-<tool id="getIndels_2way" name="Fetch Indels">
-  <description> from pairwise alignments</description>
-  <command interpreter="python">
-  	getIndels.py $input1 $out_file1
-  </command>
-  <inputs>
-    <page>
-    	<param format="maf" name="input1" type="data" label="Select data"/>
-    </page>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" metadata_source="input1"/>
-  </outputs>
-  <requirements>
-    <requirement type="python-module">numpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input1" value="6.maf"/>
-      <output name="out_file1" file="6_indels.tabular"/>
-    </test>
-  </tests>
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool estimates the number of indels for every alignment block of the MAF file.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-Any block/s not containing exactly 2 species will be omitted.
-
------
-
-**Example**
-
-- For the following alignment block::
-
-   a score=7233.0
-   s hg18.chr1     100 35 + 247249719 AT--GACTGAGGACTTAGTTTAAGATGTTCCTACT
-   s rheMac2.chr11 200 31 + 134511895 ATAAG-CGGACGACTTAGTTTAAGATGTTCC----
-
-- running this tool will return::
-
-   #Block   Source	      Seq1_Start	Seq1_End  Seq2_Start	Seq2_End	Indel_length
-   1	   hg18.chr1            101	         102	     202	     204	      2
-   1	   rheMac2.chr11	103	         104	     204	     205	      1
-   1	   rheMac2.chr11	129	         133	     229	     230	      4
-
-</help>
-
-
-</tool>
--- a/tools/regVariation/getIndels_3way.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="indels_3way" name="Fetch Indels"  version="1.0.3">
-  <description> from 3-way alignments</description>
-  <command interpreter="perl">
-  	parseMAF_smallIndels.pl $input1 $out_file1 $outgroup
-  </command>
-  <inputs>
-    <page>
-    	<param format="maf" name="input1" type="data" label="Select data"/>
-    	<param name="outgroup" type="select" label="Select outgroup species">
-          <options>
-            <filter type="data_meta" ref="input1" key="species" />
-          </options>
-      	</param>
-    </page>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" metadata_source="input1"/>
-    <!--<data format="tabular" name="out_file2" metadata_source="input1"/>-->
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3way.maf"/>
-      <param name="outgroup" value="canFam2"/>
-      <output name="out_file1" file="indels_3way.tabular"/>
-    </test>
-  </tests>
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool consists of the first module from the computational pipeline to identify indels as described in Kvikstad et al., 2007. Note that the generated output does not include subsequent filtering steps.
-
-Deletions in a particular species are identified as one or more consecutive gap columns within an alignment block, given that the orthologous positions in the other two species contain nucleotides of
-equal length.
-Similarly, insertions in a particular species are identified as one or more consecutive nucleotide columns within an alignment block, given that the orthologous positions in the other two
-species contain gaps.
-
-*Kvikstad E. M. et al. (2007). A Macaques-Eye View of Human Insertions and Deletions: Differences in Mechanisms. PLoS Computational Biology 3(9):e176*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-Any block/s not containing exactly 3 sequences will be omitted.
-
-  </help>
-
-
-</tool>
\ No newline at end of file
--- a/tools/regVariation/linear_regression.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-#!/usr/bin/env python
-
-from galaxy import eggs
-import sys, string
-from rpy import *
-import numpy
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-infile = sys.argv[1]
-y_col = int(sys.argv[2])-1
-x_cols = sys.argv[3].split(',')
-outfile = sys.argv[4]
-outfile2 = sys.argv[5]
-
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
-fout = open(outfile,'w')
-elems = []
-for i, line in enumerate( file ( infile )):
-    line = line.rstrip('\r\n')
-    if len( line )>0 and not line.startswith( '#' ):
-        elems = line.split( '\t' )
-        break
-    if i == 30:
-        break # Hopefully we'll never get here...
-
-if len( elems )<1:
-    stop_err( "The data in your input dataset is either missing or not formatted properly." )
-
-y_vals = []
-x_vals = []
-
-for k,col in enumerate(x_cols):
-    x_cols[k] = int(col)-1
-    x_vals.append([])
-
-NA = 'NA'
-for ind,line in enumerate( file( infile )):
-    if line and not line.startswith( '#' ):
-        try:
-            fields = line.split("\t")
-            try:
-                yval = float(fields[y_col])
-            except:
-                yval = r('NA')
-            y_vals.append(yval)
-            for k,col in enumerate(x_cols):
-                try:
-                    xval = float(fields[col])
-                except:
-                    xval = r('NA')
-                x_vals[k].append(xval)
-        except:
-            pass
-
-x_vals1 = numpy.asarray(x_vals).transpose()
-
-dat= r.list(x=array(x_vals1), y=y_vals)
-
-set_default_mode(NO_CONVERSION)
-try:
-    linear_model = r.lm(r("y ~ x"), data = r.na_exclude(dat))
-except RException, rex:
-    stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.")
-set_default_mode(BASIC_CONVERSION)
-
-coeffs=linear_model.as_py()['coefficients']
-yintercept= coeffs['(Intercept)']
-summary = r.summary(linear_model)
-
-co = summary.get('coefficients', 'NA')
-"""
-if len(co) != len(x_vals)+1:
-    stop_err("Stopped performing linear regression on the input data, since one of the predictor columns contains only non-numeric or invalid values.")
-"""
-
-try:
-    yintercept = r.round(float(yintercept), digits=10)
-    pvaly = r.round(float(co[0][3]), digits=10)
-except:
-    pass
-
-print >>fout, "Y-intercept\t%s" %(yintercept)
-print >>fout, "p-value (Y-intercept)\t%s" %(pvaly)
-
-if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
-    try:
-        slope = r.round(float(coeffs['x']), digits=10)
-    except:
-        slope = 'NA'
-    try:
-        pval = r.round(float(co[1][3]), digits=10)
-    except:
-        pval = 'NA'
-    print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope)
-    print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval)
-else:    #Multiple regression case with >1 predictors
-    ind=1
-    while ind < len(coeffs.keys()):
-        try:
-            slope = r.round(float(coeffs['x'+str(ind)]), digits=10)
-        except:
-            slope = 'NA'
-        print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope)
-        try:
-            pval = r.round(float(co[ind][3]), digits=10)
-        except:
-            pval = 'NA'
-        print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval)
-        ind+=1
-
-rsq = summary.get('r.squared','NA')
-adjrsq = summary.get('adj.r.squared','NA')
-fstat = summary.get('fstatistic','NA')
-sigma = summary.get('sigma','NA')
-
-try:
-    rsq = r.round(float(rsq), digits=5)
-    adjrsq = r.round(float(adjrsq), digits=5)
-    fval = r.round(fstat['value'], digits=5)
-    fstat['value'] = str(fval)
-    sigma = r.round(float(sigma), digits=10)
-except:
-    pass
-
-print >>fout, "R-squared\t%s" %(rsq)
-print >>fout, "Adjusted R-squared\t%s" %(adjrsq)
-print >>fout, "F-statistic\t%s" %(fstat)
-print >>fout, "Sigma\t%s" %(sigma)
-
-r.pdf( outfile2, 8, 8 )
-if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
-    sub_title =  "Slope = %s; Y-int = %s" %(slope,yintercept)
-    try:
-        r.plot(x=x_vals[0], y=y_vals, xlab="X", ylab="Y", sub=sub_title, main="Scatterplot with regression")
-        r.abline(a=yintercept, b=slope, col="red")
-    except:
-        pass
-else:
-    r.pairs(dat, main="Scatterplot Matrix", col="blue")
-try:
-    r.plot(linear_model)
-except:
-    pass
-r.dev_off()
--- a/tools/regVariation/linear_regression.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-<tool id="LinearRegression1" name="Perform Linear Regression" version="1.0.1">
-  <description> </description>
-  <command interpreter="python">
-    linear_regression.py
-      $input1
-      $response_col
-      $predictor_cols
-      $out_file1
-      $out_file2
-      1>/dev/null
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="response_col" label="Response column (Y)" type="data_column" data_ref="input1" numerical="True"/>
-    <param name="predictor_cols" label="Predictor columns (X)" type="data_column" data_ref="input1" numerical="True" multiple="true" >
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-    <data format="pdf" name="out_file2" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-        <param name="input1" value="regr_inp.tabular"/>
-        <param name="response_col" value="3"/>
-        <param name="predictor_cols" value="1,2"/>
-        <output name="out_file1" file="regr_out.tabular"/>
-        <output name="out_file2" file="regr_out.pdf"/>
-    </test>
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool uses the 'lm' function from R statistical package to perform linear regression on the input data. It outputs two files, one containing the summary statistics of the performed regression, and the other containing diagnostic plots to check whether model assumptions are satisfied.
-
-*R Development Core Team (2009). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- This tool currently treats all predictor and response variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results.
-
-- Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
-
-- The summary statistics in the output are described below:
-
-  - sigma: the square root of the estimated variance of the random error (standard error of the residiuals)
-  - R-squared: the fraction of variance explained by the model
-  - Adjusted R-squared: the above R-squared statistic adjusted, penalizing for the number of the predictors (p)
-  - p-value: p-value for the t-test of the null hypothesis that the corresponding slope is equal to zero against the two-sided alternative.
-
-
-  </help>
-</tool>
--- a/tools/regVariation/maf_cpg_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-#Adapted from bx/scripts/maf_mask_cpg.py
-"""
-Mask out potential CpG sites from a maf. Restricted or inclusive definition
-of CpG sites can be used. The total fraction masked is printed to stderr.
-
-usage: %prog < input > output restricted
-    -m, --mask=N: Character to use as mask ('?' is default)
-"""
-
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-try:
-    pkg_resources.require( "numpy" )
-except:
-    pass
-import bx.align
-import bx.align.maf
-from bx.cookbook import doc_optparse
-import sys
-import bx.align.sitemask.cpg
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def main():
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        inp_file, out_file, sitetype, definition = args
-        if options.mask:
-            mask = int(options.mask)
-        else:
-            mask = 0
-    except:
-        print >> sys.stderr, "Tool initialization error."
-        sys.exit()
-
-    reader = bx.align.maf.Reader( open(inp_file, 'r') )
-    writer = bx.align.maf.Writer( open(out_file,'w') )
-
-    mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
-    mask = mask_chr_dict[mask]
-
-    if sitetype == "CpG":
-        if int(definition) == 1:
-            cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
-            defn = "CpG-Restricted"
-        else:
-            cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
-            defn = "CpG-Inclusive"
-    else:
-        cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
-        defn = "non-CpG"
-    cpgfilter.run( reader, writer.write )
-
-    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
-
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/maf_cpg_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="cpgFilter" name="Mask CpG/non-CpG sites" version="1.0.0">
-  <description> from MAF file</description>
-  <command interpreter="python">
-  	maf_cpg_filter.py
-    $input
-    $out_file1
-    $masksite.type
-    #if $masksite.type == "CpG":
-     $masksite.definition
-    #else:
-     "NA"
-   #end if
-    -m $mask_char
-  </command>
-  <inputs>
-    <page>
-    	<param format="maf" name="input" type="data" label="Select data"/>
-   		<param name="mask_char" size="5" type="select" label="Mask character">
-    		<option value="0" selected="true">#</option>
-          	<option value="1">$</option>
-          	<option value="2">^</option>
-          	<option value="3">*</option>
-          	<option value="4">?</option>
-          	<option value="5">N</option>
-        </param>
-        <conditional name="masksite">
-            <param name="type" size="5" type="select" label="Sites to be masked">
-                <option value="CpG" selected="true">CpG sites</option>
-                <option value="nonCpG">non-CpG sites</option>
-             </param>
-            <when value="CpG">
-                <param name="definition" size="5" type="select" label="Definition">
-                    <option value="0" selected="true">Inclusive</option>
-                    <option value="1">Restricted</option>
-                 </param>
-            </when>
-            <when value="nonCpG" />
-        </conditional>
-    </page>
-  </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <requirements>
-    <requirement type="python-module">numpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="6.maf"/>
-      <param name="mask_char" value="0"/>
-      <param name="type" value="CpG" />
-      <param name="definition" value="0" />
-      <output name="out_file1" file="6_mask_cpg.maf"/>
-    </test>
-    <test>
-      <param name="input" value="6.maf"/>
-      <param name="mask_char" value="0"/>
-      <param name="type" value="nonCpG" />
-      <output name="out_file1" file="6_mask_noncpg.maf"/>
-    </test>
-  </tests>
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool takes a MAF file as input and masks CpG sites in every alignment block of the MAF file.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-*Inclusive definition* defines CpG sites as those sites that are CG in at least one of the species.
-
-*Restricted definition* considers sites to be CpG if they are CG in at least one of the species, however, sites that are part of overlapping CpGs are excluded.
-
-For more information on CpG site definitions, please refer this article_.
-
-.. _article: http://mbe.oxfordjournals.org/cgi/content/full/23/3/565
-
-  </help>
-
-
-</tool>
--- a/tools/regVariation/microsatellite_birthdeath.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3984 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use warnings;
-use Term::ANSIColor;
-use Pod::Checker;
-use File::Basename;
-use IO::Handle;
-use Cwd;
-use File::Path qw(make_path remove_tree);
-use File::Temp qw/ tempfile tempdir /;
-my $tdir = tempdir( CLEANUP => 0 );
-chdir $tdir;
-my $dir = getcwd;
-#print "current dit=$dir\n";
-
-use vars qw (%treesToReject %template $printer $interr_poscord $interrcord $no_of_interruptionscord $stringfile @tags
-$infocord $typecord $startcord $strandcord $endcord $microsatcord $motifcord $sequencepos $no_of_species
-$gapcord %thresholdhash $tree_decipherer @sp_ident %revHash %sameHash %treesToIgnore %alternate @exactspecies @exacttags);
-use FileHandle;
-use IO::Handle;                     # 5.004 or higher
-
-#my @ar = ("/Users/ydk/work/rhesus_microsat/results/galay/chr22_5sp.maf.txt", "/Users/ydk/work/rhesus_microsat/results/galay/dataset_11.dat",
-#"/Users/ydk/work/rhesus_microsat/results/galay/chr22_5spec.maf.summ","hg18,panTro2,ponAbe2,rheMac2,calJac1","((((hg18, panTro2), ponAbe2), rheMac2), calJac1)","9,10,12,12",
-#"10","0.8");
-my @ar = @ARGV;
-my ($maf, $orth, $summout, $species_set, $tree_definition, $thresholds, $FLANK_SUPPORT, $SIMILARITY_THRESH) = @ar;
-$SIMILARITY_THRESH=$SIMILARITY_THRESH/100;
-#########################
-$SIMILARITY_THRESH = $SIMILARITY_THRESH/100;
-my $EDGE_DISTANCE = 10;
-my $COMPLEXITY_SUPPORT = 20;
-load_thresholds("9_10_12_12");
-#########################
-
-my $complexity=int($COMPLEXITY_SUPPORT * (1/40));
-
-#print "complexity=$complexity\n";
-#<STDIN>;
-
-#$printer = 1;
-
-my $rando = int(rand(1000));
-my $localdate = `date`;
-$localdate =~ /([0-9]+):([0-9]+):([0-9]+)/;
-my $info = $rando.$1.$2.$3;
-
-#---------------------------------------------------------------------------
-# GETTING INPUT INFORMATION AND OPENING INPUT AND OUTPUT FILES
-
-
-my @thresharr = (0, split(/,/,$thresholds));
-my $randno=int(rand(100000));
-my $megamatch = $randno.".megamatch.net.axt"; #"/gpfs/home/ydk104/work/rhesus_microsat/axtNet/hg18.panTro2.ponAbe2.rheMac2.calJac1/chr1.hg18.panTro2.ponAbe2.rheMac2.calJac1.net.axt";
-my $megamatchlck = $megamatch.".lck";
-unlink $megamatchlck;
-
-#my $selected= $orth;
-#my $eventfile = $orth;
-#$selected = $selected."_SELECTED";
-#$selected = $selected."_".$SIMILARITY_THRESH;
-#my $runtime = $selected.".runtime";
-
-my $inputtags = "H:C:O:R:M";
-$inputtags = $ARGV[3] if exists $ARGV[3] && $ARGV[3] =~ /[A-Z]:[A-Z]/;
-
-my @all_tags = split(/:/, $inputtags);
-my $inputsp = "hg18:panTro2:ponAbe2:rheMac2:calJac1";
-$inputsp = $ARGV[4] if exists $ARGV[4] && $ARGV[3] =~ /[0-9]+:/;
-@sp_ident = split(/:/,$inputsp);
-my $junkfile = $orth."_junk";
-
-my $sh = load_sameHash(1);
-my $rh = load_revHash(1);
-
-#print "inputs are : \n"; foreach(@ARGV){print $_,"\n";}
-#open (SELECT, ">$selected") or die "Cannot open selected file: $selected: $!";
-open (SUMMARY, ">$summout") or die "Cannot open summout file: $summout: $!";
-#open (RUN, ">$runtime") or die "Cannot open orth file: $runtime: $!";
-#my $ctlfile = "baseml\.ctl"; #$ARGV[4];
-#my $treefile = "/gpfs/home/ydk104/work/rhesus_microsat/codes/lib/"; 	#1 THIS IS THE THE TREE UNDER CONSIDERATION, IN NEWICK
-my %registeredTrees = ();
-my @removalReasons =
-("microsatellite is compound",
-"complex structure",
-"if no. if micros is more than no. of species",
-"if more than one micro per species ",
-"if microsat contains N",
-"different motif than required ",
-"more than zero interruptions",
-"microsat could not form key ",
-"orthologous microsats of different motif size ",
-"orthologous microsats of different motifs ",
-"microsats belong to different alignment blocks altogether",
-"microsat near edge",
-"microsat in low complexity region",
-"microsat flanks dont align well",
-"phylogeny not informative");
-my %allowedhash=();
-#---------------------------------------------------------------------------
-# WORKING ON MAKING THE MEGAMATCH FILE
-my $chromt=int(rand(10000));
-my $p_chr=$chromt;
-
-$tree_definition=~s/,/, /g;
-$tree_definition =~ s/, +/, /g;
-my @exactspeciesset_unarranged = split(/,/,$species_set);
-my $largesttree = "$tree_definition;";
-$tree_definition=~s/[\)\(, ]/\t/g;
-
-my @treespecies=split(/\t+/,$tree_definition);
-
-foreach my $spec (@treespecies){
-	foreach my $espec (@exactspeciesset_unarranged){
-		push @exactspecies, $spec if $spec eq $espec;
-	}
-}
-#print "exactspecies=@exactspecies\n";
-my $focalspec = $exactspecies[0];
-my $arranged_species_set=join(".",@exactspecies);
-@exacttags=@exactspecies;
-foreach my $extag (@exacttags){
-	$extag =~ s/hg18/H/g;
-	$extag =~ s/panTro2/C/g;
-	$extag =~ s/ponAbe2/O/g;
-	$extag =~ s/rheMac2/R/g;
-	$extag =~ s/calJac1/M/g;
-}
-my $chr_name = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt");
-#print "sending to maftoAxt_multispecies: $maf, $tree_definition, $chr_name, $species_set .. focalspec=$focalspec \n";
-maftoAxt_multispecies($maf, $tree_definition, $chr_name, $species_set);
-my @filterseqfiles= ($chr_name);
-		$largesttree =~ s/hg18/H/g;
-		$largesttree =~ s/panTro2/C/g;
-		$largesttree =~ s/ponAbe2/O/g;
-		$largesttree =~ s/rheMac2/R/g;
-		$largesttree =~ s/calJac1/M/g;
-#---------------------------------------------------------------------------
-
-my ($lagestnodes, $largestbranches) = get_nodes($largesttree);
-shift (@$lagestnodes);
-my @extendedtitle=();
-
-my $title = ();
-my $parttitle = ();
-my @titlearr = ();
-my @firsttitle=($focalspec."chrom", $focalspec."start", $focalspec."end", $focalspec."motif", $focalspec."motifsize", $focalspec."threshold");
-
-my @finames= qw(chr	start	end	motif	motifsize	microsat	mutation	mutation.position	mutation.from	mutation.to	insertion.details	deletion.details);
-
-my @fititle=();
-
-foreach my $spec (split(",",$species_set)){
-	push @fititle, $spec;
-	foreach my $name (@finames){
-		push @fititle, $spec.".".$name;
-	}
-}
-
-
-my @othertitle=qw(somechr somestart	somened	event source);
-
-my @fnames = ();
-push @fnames, qw(insertions_num  deletions_num  motinsertions_num  motinsertionsf_num  motdeletions_num  motdeletionsf_num  noninsertions_num  nondeletions_num) ;
-push @fnames, qw(binsertions_num bdeletions_num bmotinsertions_num bmotinsertionsf_num bmotdeletions_num bmotdeletionsf_num bnoninsertions_num bnondeletions_num) ;
-push @fnames, qw(dinsertions_num ddeletions_num dmotinsertions_num dmotinsertionsf_num dmotdeletions_num dmotdeletionsf_num dnoninsertions_num dnondeletions_num) ;
-push @fnames, qw(ninsertions_num ndeletions_num nmotinsertions_num nmotinsertionsf_num nmotdeletions_num nmotdeletionsf_num nnoninsertions_num nnondeletions_num) ;
-push @fnames, qw(substitutions_num bsubstitutions_num dsubstitutions_num nsubstitutions_num indels_num subs_num);
-
-my @fullnames = ();
-
-foreach my $lnode (@$lagestnodes){
-	my @pair = @$lnode;
-	my @nodemutarr = ();
-	for my $p (@pair){
-	#	print "p = $p\n";
-		$p =~ s/[\(\), ]+//g;
-		$p =~ s/H/hg18/g;
-		$p =~ s/C/panTro2/g;
-		$p =~ s/O/ponAbe2/g;
-		$p =~ s/R/rheMac2/g;
-		$p =~ s/M/calJac1/g;
-		foreach my $n (@fnames) {	push @fullnames, $p.".".$n;}
-	}
-}
-print SUMMARY "#",join("\t", @firsttitle, @fititle, @othertitle);
-
-print SUMMARY "\t",join("\t", @fullnames);
-#$title = $title."\t".join("\t", @fullnames);
-
-print SUMMARY "\t",join("\t", @fnames);
-#$title=  $title."\t".join("\t", @fnames);
-
-print SUMMARY "\t","tree","\t", "cleancase", "\n";
-#$title=  $title."\t"."tree"."\t"."cleancase". "\n";
-
-#print $title; #<STDIN>;
-
-#print "all_tags = @all_tags\n";
-
-for my $no (3 ... $#all_tags+1){
-#	print "no=$no\n"; #<STDIN>;
-	@tags = @all_tags[0 ... $no-1];
-	#print "tags = = @tags\n" if $printer == 1;
-	%template=();
-	my @nextcounter = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-	#next if scalar(@tags) < 4;
-
-	#print "now doing tags = @tags, no = $no\n";
-	open (ORTH, "<$orth") or die "Cannot open orth file: $orth: $!";
-
-#	print SUMMARY join "\t", qw (species chr start end branch motif microsat mutation position from to insertion deletion);
-
-
-	##################### T E M P O R A R Y #####################
-	my @finaltitle=();
-	my @singletitle = qw (species chr start end motif motifsize microsat strand microsatsize col10 col11 col12 col13);
-	my $endtitle = ();
-	foreach my $tag (@tags){
-		my @tempsingle = ();
-
-		foreach my $single (@singletitle){
-			push @tempsingle, $tag.$single;
-		}
-		@finaltitle = (@finaltitle, @tempsingle);
-	}
-
-#	print SUMMARY join("\t",@finaltitle),"\n";
-
-	#############################################################
-
-	#---------------------------------------------------------------------------
-	# GET THE TREE FROM TREE FILE
-	my $tree = ();
-	$tree = "((H, C), O)" if $no == 3;
-	$tree = "(((H, C), O), R)" if $no == 4;
-	$tree = "((((H, C), O), R), M)" if $no == 5;
-#	$tree=~s/;$//g;
-#	print "our tree = $tree\n";
-	#---------------------------------------------------------------------------
-	# LOADING HASH CONTAINING ALL POSSIBLE TREES:
-	$tree_decipherer = "/gpfs/home/ydk104/work/rhesus_microsat/codes/lib/tree_analysis_".join("",@tags).".txt";
-	load_allPossibleTrees($tree_decipherer, \%template, \%alternate);
-
-	#---------------------------------------------------------------------------
-	# LOADING THE TREES TO REJECT FOR BIRTH ANALYSIS
-	%treesToReject=();
-	%treesToIgnore=();
-	load_treesToReject(@tags);
-	load_treesToIgnore(@tags);
-	#---------------------------------------------------------------------------
-	# LOADING INPUT DATA INTO HASHES AND ARRAYS
-
-
-	#1 THIS IS THE POINT WHERE WE CAN FILTER OUT LARGE MICROSAT CLUSTERS
-	#2 AS WELL AS MULTIPLE-ALIGNMENT-BLOCKS-SPANNING MICROSATS (KIND OF
-	#3 IMPLICIT IN THE FIRST PART OF THE SENTENCE ITSELF IN MOST CASES).
-
-	my %orths=();
-	my $counterm = 0;
-	my $loaded = 0;
-	my %seen = ();
-	my @allowedchrs = ();
-#	print "no = $no\n"; #<STDIN>;
-
-	while (my $line = <ORTH>){
-		#print "line=$line\n";
-		$line =~ s/>hg18/>H/g;
-		$line =~ s/>panTro2/>C/g;
-		$line =~ s/>ponAbe2/>O/g;
-		$line =~ s/>rheMac2/>R/g;
-		$line =~ s/>calJac1/>M/g;
-		my @micros = split(/>/,$line); 									# LOADING ALL THE MICROSAT ENTRIES FROM THE CLUSTER INTO @micros
-		#print "micros=",printarr(@micros),"\n"; #<STDIN>;
-		shift @micros; 													# EMPTYING THE FIRST, EMTPY ELEMENT OF THE ARRAY
-		$no_of_species = adjustCoordinates($micros[0]);
-		next if $no_of_species != $no;
-		$counterm++;
-		#------------------------------------------------
-		$nextcounter[0]++  if $line =~ /compound/;
-		next if $line =~ /compound/; 									# GETTING RID OF COMPOUND MICROSATS
-		#------------------------------------------------
-		#next if $line =~ /[A-Za-z]>[a-zA-Z]/;
-		#------------------------------------------------
-		chomp $line;
-		my $match_count = ($line =~ s/>/>/g); 							# COUNTING THE NUMBER OF MICROSAT ENTRIES IN THE CLUSTER
-		#print "number of species = $match_count\n";
-		my $stopper = 0;
-		foreach my $mic (@micros){
-			my @local = split(/\t/,$mic);
-			if ($local[$typecord] =~ /\./ || exists($local[$no_of_interruptionscord+2])) {$stopper = 1; $nextcounter[1]++;
-			last; }
-																		# REMOVING CLUSTERS WITH THE CYRPTIC, (UNRESOLVABLY COMPLEX) MICROSAT ENTRIES IN THEM
-		}
-		next if $stopper ==1;
-		#------------------------------------------------
-		$nextcounter[2]++ if (scalar(@micros) >$no_of_species);
-
-		next if (scalar(@micros) >$no_of_species); 						#1 REMOVING MICROSAT CLUSTERS WITH MORE NUMBER OF MICROSAT ENTRIES THAN THE NUMBER OF SPECIES IN THE DATASET.
-																		#2 THIS IS SO BECAUSE SUCH CLUSTERS IMPLY THAT IN AT LEAST ONE SPECIES, THERE IS MORE THAN ONE MICROSAT ENTRY
-																		#3 IN THE CLUSTER. THUS, HERE WE ARE GETTING RID OF MICROSATS CLUSTERS THAT INCLUDE MULTUPLE, NEIGHBORING
-																		#4 MICROSATS, AND STICK TO CLEAN MICROSATS THAT DO NOT HAVE ANY MICROSATS IN NEIGHBORHOOD.
-																		#5 THIS 'NEIGHBORHOOD-RANGE' HAD BEEN DECIDED PREVIOUSLY IN OUR CODE multiSpecies_orthFinder4.pl
-		my $nexter = 0;
-		foreach my $tag (@tags){
-			my $tagcount = ($line =~ s/>$tag\t/>$tag\t/g);
-			if ($tagcount > 1) { $nexter =1; #print colored ['red'],"multiple entires per species : $tagcount of $tag\n" if $printer == 1;
-				next;
-			}
-		}
-
-		if ($nexter == 1){
-			$nextcounter[3]++;
-			next;
-		}
-		#------------------------------------------------
-		foreach my $mic (@micros){										#1	REMOVING MICROSATELLITES WITH ANY 'N's IN THEM
-			my @local = split(/\t/,$mic);
-			if ($local[$microsatcord] =~ /N/) {$stopper =1; 		$nextcounter[4]++;
-			last;}
-		}
-		next if $stopper ==1;
-		#print "till here 1\n"; #<STDIN>;
-		#------------------------------------------------
-		my @micros_copy = @micros;
-
-		my $tempmicro = shift(@micros_copy);							#1 CURRENTLY OBTAINING INFORMATION FOR THE FIRST
-																		#2 MICROSAT IN THE CLUSTER.
-		my @tempfields = split(/\t/,$tempmicro);
-		my $prevtype = $tempfields[$typecord];
-		my $tempmotif = $tempfields[$motifcord];
-
-		my $tempfirstmotif = ();
-		if (scalar(@tempfields) > $microsatcord + 2){
-			if ($tempfields[$no_of_interruptionscord] >= 1) {			#1	DISCARDING MICROSATS WITH MORE THAN ZERO INTERRUPTIONS
-																		#2	IN THE FIRST MICROSAT OF THE CLUSTER
-				$nexter =1; #print colored ['blue'],"more than one interruptions \n" if $printer == 1;
-			}
-		}
-		if ($nexter == 1){
-			$nextcounter[6]++;
-			next;
-		}															#1	DONE OBTAINING INFORMATION REGARDING
-																		#2	THE FIRST MICROSAT FROM THE CLUSTER
-
-		if ($tempmotif =~ /^\[/){
-			$tempmotif =~ s/^\[//g;
-			$tempmotif =~ /([a-zA-Z]+)\].*/;
-			$tempfirstmotif = $1;										#1 OBTAINING THE FIRTS MOTIF OF MICROSAT
-		}
-		else {$tempfirstmotif = $tempmotif;}
-		my $prevmotif = $tempfirstmotif;
-
-		my $key = ();
-		if ($tempmicro =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
-			$key = join("\t",$1, $2,  $4, $5);
-		}
-		else{
-		#	print "counld not form  a key \n" if $printer == 1;
-			$nextcounter[7]++;
-			next;
-		}
-		#-----------------												#1	NOW, AFTER OBTAINING INFORMATION ABOUT
-																		#2	THE FIRST MICROSAT IN THE CLUSTER, THE
-																		#3	FOLLOWING LOOP GOES THROUGH THE OTHER MICROSATS
-																		#4	TO SEE IF THEY SHARE THE REQUIRED FEATURES (BELOW)
-
-		foreach my $micro (@micros_copy){
-			my @fields = split(/\t/,$micro);
-			#-----------------
-			if (scalar(@fields) > $microsatcord + 2){					#1	DISCARDING MICROSATS WITH MORE THAN ONE INTERRUPTIONS
-				if ($fields[$no_of_interruptionscord] >= 1) {$nexter =1; #print colored ['blue'],"more than one interruptions \n" if $printer == 1;
-				$nextcounter[6]++;
-				last; }
-			}
-			#-----------------
-			if (($prevtype ne "0") && ($prevtype ne $fields[$typecord])) {
-				$nexter =1; #print colored ['yellow'],"microsat of different type \n" if $printer == 1;
-				$nextcounter[8]++;
-				last; }														#1 DISCARDING MICROSAT CLUSTERS WHERE MICROSATS BELONG
-			#-----------------											#2 TO DIFFERENT TYPES (MONOS, DIS, TRIS ETC.)
-			$prevtype = $fields[$typecord];
-
-			my $motif = $fields[$motifcord];
-			my $firstmotif = ();
-
-			if ($motif =~ /^\[/){
-				$motif =~ s/^\[//g;
-				$motif =~ /([a-zA-Z]+)\].*/;
-				$firstmotif = $1;
-			}
-			else {$firstmotif = $motif;}
-
-			my $motifpattern = $firstmotif.$firstmotif;
-			my $prevmotifpattern = $prevmotif.$prevmotif;
-
-			if (($prevmotif ne "0")&&(($motifpattern !~ /$prevmotif/i)||($prevmotifpattern !~ /$firstmotif/i)) ) {
-				$nexter =1; #print colored ['green'],"different motifs used \n$line\n" if $printer == 1;
-				$nextcounter[9]++;
-				last;
-			}														#1	DISCARDING MICROSAT CLUSTERS WHERE MICROSATS BELONG
-																	#2	TO DIFFERENT MOTIFS
-			my $prevmotif = $firstmotif;
-			#-----------------
-
-			for my $t (0 ... $#tags){								#1	DISCARDING MICROSAT CLUSTERS WHERE MICROSAT ENTRIES BELONG
-																	#2	DIFFERENT ALIGNMENT BLOCKS
-				if ($micro =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
-					my $key2 = join("\t",$1, $2,  $4, $5);
-					if ($key2 ne $key){
-#						print "microsats belong to diffferent alignment blocks altogether\n" if $printer == 1;
-						$nextcounter[10]++;
-						$nexter = 1; last;
-					}
-				}
-				else{
-				#	print "counld not form  a key \n" if $printer == 1;
-					$nexter = 1; last;
-				}
-			}
-
-		}
-		#####################
-		if ($nexter == 1){
-			#	print "nexting\n" if $printer == 1;
-				next;
-			}
-		else{
-#			print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n$key:\n$line\nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"  if $printer == 1;
-			push (@{$orths{$key}},$line);
-			$loaded++;
-			if ($line =~ /($focalspec)\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/ ) {
-#				print "$line\n"  if $printer == 1; #if $line =~ /Contig/;
-#				print "################ ################\n" if $printer == 1;
-				push @allowedchrs, $2 if !exists $allowedhash{$2};
-				$allowedhash{$2} = 1;
-				my $key = join("\t",$1, $2, $3, $4);
-				#print "print the shit: $key\n" if $printer  == 1;
-				$seen{$key} = 1;
-			}
-			else { #print "Key could not be formed in SPUT for ($org) ($title) ([0-9]+) ([0-9]+)\n";
-			}
-		}
-	}
-	close ORTH;
-
-#	print "now studying where we lost microsatellites: @nextcounter\n";
-	for my $reason (0 ... $#nextcounter){
-#		print $removalReasons[$reason]."\t".$nextcounter[$reason],"\n";
-	}
-#	print "\ntotal number of keys formed = ", scalar(keys %orths), " = \n";
-#	print "done filtering .. counterm = $counterm and loaded = $loaded\n";
-
-	#----------------------------------------------------------------------------------------------------------------
-	# NOW GENERATING THE ALIGNMENT FILE WITH RELELEVENT ALIGNMENTS STORED ONLY.
-
-	while (1){
-		if (-e $megamatchlck){
-#			print "waiting to write into $megamatchlck\n";
-			sleep 10;
-		}
-		else{
-			open (MEGAMLCK, ">$megamatchlck") or die "Cannot open megamatchlck file $megamatchlck: $!";
-			open (MEGAM, ">$megamatch") or die "Cannot open megamatch file $megamatch: $!";
-			last;
-		}
-	}
-
-	foreach my $seqfile (@filterseqfiles){
-		my $fullpath = $seqfile;
-
-#		print "opening file: $fullpath\n";
-		open (MATCH, "<$fullpath") or die "Cannot open MATCH file $fullpath: $!";
-		my $matchlines = 0;
-
-		while (my $line = <MATCH>)	{
-			if ($line =~ /($focalspec)\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/ ) {
-				my $key = join("\t",$1, $2, $3, $4);
-				if (exists $seen{$key}){
-					while (1){
-						$matchlines++;
-						print MEGAM $line;
-						$line = <MATCH>;
-						print MEGAM "\n" if $line !~/[0-9a-zA-Z]/;
-						last if $line !~/[0-9a-zA-Z]/;
-					}
-				}
-			}
-		}
-#		print "matchlines = $matchlines\n";
-		close MATCH;
-	}
-	close MEGAMLCK;
-
-	unlink $megamatchlck;
-	close MEGAM;
-	undef %seen;
-	#----------------------------------------------------------------------------------------------------------------
-
-	#---------------------------------------------------------------------------
-	# NOW, AFTER FILTERING MANY MICROSATS, AND LOADING THE FILTERED ONES INTO
-	# THE HASH %orths , WE GO THROUGH THE ALIGNMENT FILE, AND STUDY THE
-	# FLANKING SEQUENCES OF ALL THESE MICROSATS, TO FILTER THEM FURTHER
-	#$printer = 1;
-
-	my $microreadcounter=0;
-	my $contigsentered=0;
-	my $contignotrightcounter=0;
-	my $keynotformedcounter=0;
-	my $keynotfoundcounter= 0;
-	my $dotcounter = 0;
-
-	open (BO, "<$megamatch") or die "Cannot open alignment file: $megamatch: $!";
-
-	while (my $line = <BO>){
-#		print "." if $dotcounter % 100 ==0;
-#		print "\n" if $dotcounter % 5000 ==0;
-#		print "dotcounter = $dotcounter\n " if $printer == 1;
-		next if $line !~ /^[0-9]+/;
-		$dotcounter++;
-#		print colored ['green'], "~" x 60, "\n" if $printer == 1;
-#		print colored ['green'], $line;# if $printer == 1;
-		chomp $line;
-		my @fields2 = split(/\t/,$line);
-		my $key2 = ();
-		my $alignment_no = ();										#1 TEMPORARY
-		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
-			$key2 = join("\t",$1, $2,  $4, $5);
-			$alignment_no=$1;
-		}
-		else {print "seq line $line incompatible\n"; $keynotformedcounter++; next;}
-
-		$no_of_species = adjustCoordinates($line);
-		$contignotrightcounter++  if $no_of_species != $no;
-	#	print "contignotrightcounter=$contignotrightcounter\n";
-	#	print "no_of_species=$no_of_species\n";
-	#	print "no=$no\n";
-
-		next if $no_of_species != $no;
-
-	#	print "key = $key2\n" if $printer == 1;
-		my @clusters = ();											#1 EXTRACTING MICROSATS CORRESPONDING TO THIS
-																	#2 ALIGNMENT BLOCK
-		if (exists($orths{$key2})){
-			@clusters = @{$orths{$key2}};
-			$contigsentered++;
-			delete $orths{$key2};
-		}
-		else{
-			#print "orth does not exist\n";
-			$keynotfoundcounter++;
-			next;
-		}
-
-		my %sequences=();											#1 WILL STORE SEQUENCES IN THE CURRENT ALIGNMENT BLOCK
-		my $humseq = ();
-		foreach my $tag (@tags){									#1 READING THE ALIGNMENT FILE AND CAPTURING SEQUENCES
-			my $seq = <BO>;											#2 OF ALL SPECIES.
-			chomp $seq;
-			$sequences{$tag} =  " ".$seq;
-			#print "sequences = $sequences{$tag}\n" if $printer == 1;
-			$humseq = $seq if $tag =~ /H/;
-		}
-
-
-		foreach my $cluster (@clusters){							#1 NOW, GOING THROUGH THE CLUSTER OF MICROSATS
-			#print "x" x 60, "\n" if $printer == 1;
-			#print colored ['red'],"cluster = $cluster\n";
-			$largesttree =~ s/hg18/H/g;
-			$largesttree =~ s/panTro2/C/g;
-			$largesttree =~ s/ponAbe2/O/g;
-			$largesttree =~ s/rheMac2/R/g;
-			$largesttree =~ s/calJac1/M/g;
-
-			$microreadcounter++;
-			my @micros = split(/>/,$cluster);
-			shift @micros;
-
-			my $edge_microsat=0;									#1 THIS WILL HAVE VALUE "1" IF MICROSAT IS FOUND
-																	#2 TO BE TOO CLOSE TO THE EDGES OF ALIGNMENT BLOCK
-
-			my @starts= ();	my %start_hash=();						#1 STORES THE START AND END COORDINATES OF MICROSATELLITES
-			my @ends = ();	my %end_hash=();						#2 SO THAT LATER, WE WILL BE ABLE TO FIND THE EXTREME
-																	#3 COORDINATE VALUES OF THE ORTHOLOGOUS MIROSATELLITES.
-
-			my %microhash=();
-			my %microsathash=();
-			my %nonmicrosathash=();
-			my $motif=();											#1 BASIC MOTIF OF THE MICROSATELLITE.. THERE'S ONLY 1
-			#print "tags=@tags\n";
-			for my $i (0 ... $#tags){								#1	FINDING THE MICROSAT, AND THE ALIGNMENT SEQUENCE
-																	#2	CORRESPONDING TO THE PARTICULAR SPECIES (AS PER
-																	#3	THE VARIABLE $TAG;
-				my $tag = $tags[$i];
-			#		print $seq;
-				my $locus="NULL";										#1	THIS WILL STORE THE MICROSAT OF THIS SPECIES.
-																		#2	IF THERE IS NO MICROSAT, IT WILL REMAIN "NULL"
-
-				foreach my $micro (@micros){
-				#	print "micro=$micro, tag=$tag\n";
-					if ($micro =~ /^$tag/){								#1	MICROSAT OF THIS SPECIES FOUND..
-						$locus  = $micro;
-						my @fields = split(/\t/,$micro);
-						$motif = $fields[$motifcord];
-						$microsathash{$tag}=$fields[$microsatcord];
-				#		print "fields=@fields, and startcord=$startcord = $fields[$startcord]\n";
-						push(@starts, $fields[$startcord]);
-						push(@ends, $fields[$endcord]);
-						$start_hash{$tag}=$fields[$startcord];
-						$end_hash{$tag}=$fields[$endcord];
-						last;
-					}
-					else{$microsathash{$tag}="NULL"}
-				}
-				$microhash{$tag}=$locus;
-
-			}
-
-
-
-			my $extreme_start  = smallest_number(@starts);		#1 THESE TWO ARE THE EXTREME COORDINATES OF THE
-			my $extreme_end  = largest_number(@ends);			#2 MICROSAT CLUSTER ACCROSS ALL THE SPECIES IN
-																#3 WHOM IT IS FOUND TO BE ORTHOLOGOUS.
-
-			#print "starts=@starts... ends=@ends\n";
-
-			my %up_flanks = ();									#1	CONTAINS UPSTEAM FLANKING REGIONS FOR EACH SPECIES
-			my %down_flanks = ();								#1	CONTAINS DOWNDTREAM FLANKING REGIONS FOR EACH SPECIES
-
-			my %up_largeflanks = ();
-			my %down_largeflanks = ();
-
-			my %locusandflanks = ();
-			my %locusandlargeflanks = ();
-
-			my %up_internal_flanks=();							#1	CONTAINS SEQUENCE BETWEEN THE $extreme_start and the
-																#2	ACTUAL START OF MICROSATELLITE IN THE SPECIES
-			my %down_internal_flanks=();						#1	CONTAINS SEQUENCE BETWEEN THE $extreme_end and the
-																#2	ACTUAL end OF MICROSATELLITE IN THE SPECIES
-
-			my %alignment=();									#1 CONTAINS ACTUAL ALIGNMENT SEQUENCE BETWEEN THE TWO
-																#2 EXTEME VALUES.
-
-			my %microsatstarts=();										#1 WITHIN EACH ALIGNMENT, IF THERE EXISTS A MICROSATELLITE
-																#2 THIS HASH CONTAINS THE START SITE OF THE MICROSATELLITE
-																#3 WIHIN THE ALIGNMENT
-			next if !defined $extreme_start;
-			next if !defined $extreme_end;
-			next if $extreme_start > length($sequences{$tags[0]});
-			next if $extreme_start < 0;
-			next if $extreme_end > length($sequences{$tags[0]});
-
-			for my $i (0 ... $#tags){							#1 NOW THAT WE HAVE GATHERED INFORMATION REGARDING
-																#2 SEQUENCE ALIGNMENT AND MICROSATELLITE COORDINATES
-																#3 AS WELL AS THE EXTREME COORDINATES OF THE
-																#4 MICROSAT CLUSTER, WE WILL PROCEED TO EXTRACT THE
-																#5 FLANKING SEQUENCE OF ALL ORGS, AND STUDY IT IN
-																#6 MORE DETAIL.
-				my $tag = $tags[$i];
-			#	print "tag=$tag.. seqlength = ",length($sequences{$tag})," extreme_start=$extreme_start and extreme_end=$extreme_end\n";
-				my $upstream_gaps = (substr($sequences{$tag}, 0, $extreme_start) =~ s/\-/-/g);		#1	NOW MEASURING THE NUMBER OF GAPS IN THE UPSTEAM
-																										#2	AND DOWNSTREAM SEQUENCES OF THE MICROSATs IN THIS
-																										#3	CLUSTER.
-
-				my $downstream_gaps = (substr($sequences{$tag}, $extreme_end) =~ s/\-/-/g);
-				if (($extreme_start - $upstream_gaps )< $EDGE_DISTANCE || (length($sequences{$tag}) - $extreme_end - $downstream_gaps) <  $EDGE_DISTANCE){
-					$edge_microsat=1;
-
-					last;
-				}
-				else{
-					$up_flanks{$tag} = substr($sequences{$tag}, $extreme_start - $FLANK_SUPPORT, $FLANK_SUPPORT);
-					$down_flanks{$tag} = substr($sequences{$tag}, $extreme_end+1, $FLANK_SUPPORT);
-
-					$up_largeflanks{$tag} = substr($sequences{$tag}, $extreme_start - $COMPLEXITY_SUPPORT, $COMPLEXITY_SUPPORT);
-					$down_largeflanks{$tag} = substr($sequences{$tag}, $extreme_end+1, $COMPLEXITY_SUPPORT);
-
-
-					$alignment{$tag} = substr($sequences{$tag}, $extreme_start, $extreme_end-$extreme_start+1);
-					$locusandflanks{$tag} = $up_flanks{$tag}."[".$alignment{$tag}."]".$down_flanks{$tag};
-					$locusandlargeflanks{$tag} = $up_largeflanks{$tag}."[".$alignment{$tag}."]".$down_largeflanks{$tag};
-
-					if ($microhash{$tag} ne "NULL"){
-						$up_internal_flanks{$tag} = substr($sequences{$tag}, $extreme_start , $start_hash{$tag}-$extreme_start);
-						$down_internal_flanks{$tag} = substr($sequences{$tag}, $end_hash{$tag} , $extreme_end-$end_hash{$tag});
-						$microsatstarts{$tag}=$start_hash{$tag}-$extreme_start;
-#						print "tag = $tag, internal flanks = $up_internal_flanks{$tag} and $down_internal_flanks{$tag} and start = $microsatstarts{$tag}\n" if $printer == 1;
-					}
-					else{
-						$nonmicrosathash{$tag}=substr($sequences{$tag}, $extreme_start, $extreme_end-$extreme_start+1);
-
-					}
-			#		print "up flank for species $tag = $up_flanks{$tag} \ndown flank for species $tag = $down_flanks{$tag} \n" if $printer == 1;
-
-				}
-
-			}
-			$nextcounter[11]++  if $edge_microsat==1;
-			next if $edge_microsat==1;
-
-
-			my $low_complexity = 0; 								#1 VALUE WILL BE 1 IF ANY OF THE FLANKING REGIONS
-																	#2 IS FOUND TO BE OF LOW COMPLEXITY, BY USING THE
-																	#3 FUNCTION sub test_complexity
-
-
-			for my $i (0 ... $#tags){
-#				print "i = $tags[$i]\n" if $printer == 1;
-				if (test_complexity($up_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT) eq "LOW" || test_complexity($down_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT) eq "LOW"){
-#					print "i = $i, low complexity regions: $up_largeflanks{$tags[$i]}: ",test_complexity($up_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT), "  and $down_largeflanks{$tags[$i]} = ",test_complexity($down_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT),"\n" if $printer == 1;
-					$low_complexity =1; last;
-				}
-			}
-
-			$nextcounter[12]++  if $low_complexity==1;
-			next if $low_complexity == 1;
-
-
-			my $sequence_dissimilarity = 0;										#1 THIS VALYE WILL BE 1 IF THE SEQUENCE SIMILARITY
-																	#2 BETWEEN ANY OF THE SPECIES AGAINST THE HUMAN
-																	#3 FLANKING SEQUENCES IS BELOW A CERTAIN THRESHOLD
-																	#4 AS DESCRIBED IN FUNCTION sub sequence_similarity
-			my %donepair = ();
-			for my $i (0 ... $#tags){
-			#	print "i = $tags[$i]\n" if $printer == 1;
-#				next if $i == 0;
-			#	print colored ['magenta'],"THIS IS UP\n" if $printer == 1;
-
-				for my $b (0 ... $#tags){
-					next if $b == $i;
-					my $pair = ();
-					$pair = $i."_".$b if $i < $b;
-					$pair = $b."_".$i if $b < $i;
-					next if exists $donepair{$pair};
-					my ($up_similarity,$upnucdiffs, $upindeldiffs) = sequence_similarity($up_flanks{$tags[$i]}, $up_flanks{$tags[$b]}, $SIMILARITY_THRESH, $info);
-					my ($down_similarity,$downnucdiffs, $downindeldiffs) = sequence_similarity($down_flanks{$tags[$i]}, $down_flanks{$tags[$b]}, $SIMILARITY_THRESH, $info);
-					$donepair{$pair} = $up_similarity."_".$down_similarity;
-
-#					print RUN "$up_similarity	$upnucdiffs	$upindeldiffs	$down_similarity	$downnucdiffs	$downindeldiffs\n";
-
-					if ( $up_similarity < $SIMILARITY_THRESH || $down_similarity < $SIMILARITY_THRESH){
-						$sequence_dissimilarity =1;
-						last;
-					}
-				}
-			}
-			$nextcounter[13]++  if $sequence_dissimilarity==1;
-
-			next if $sequence_dissimilarity == 1;
-			my ($simplified_microsat, $Hchrom, $Hstart, $Hend, $locusmotif, $locusmotifsize) = summarize_microsat($cluster, $humseq);
-		#	print "simplified_microsat=$simplified_microsat\n"; <STDIN>;
-			my ($tree_analysis, $alternative_trees, $conformation) = treeStudy($simplified_microsat);
-
-			if (exists $treesToReject{$tree_analysis}){
-				$nextcounter[14]++;
-				next;
-			}
-
-# 			my $adjuster=();
-# 			if ($no_of_species == 4){
-# 				my @sields = split(/\t/,$simplified_microsat);
-# 				my $somend = pop(@sields);
-# 				my $somestart = pop(@sields);
-# 				my $somechr = pop(@sields);
-# 				$adjuster = "NA\t" x 13 ;
-# 				$simplified_microsat = join ("\t", @sields, $adjuster).$somechr."\t".$somestart."\t".$somend;
-# 			}
-# 			if ($no_of_species == 3){
-# 				my @sields = split(/\t/,$simplified_microsat);
-# 				my $somend = pop(@sields);
-# 				my $somestart = pop(@sields);
-# 				my $somechr = pop(@sields);
-# 				$adjuster = "NA\t" x 26 ;
-# 				$simplified_microsat = join ("\t", @sields, $adjuster).$somechr."\t".$somestart."\t".$somend;
-# 			}
-#
-			$registeredTrees{$tree_analysis} = 1 if !exists $registeredTrees{$tree_analysis};
-			$registeredTrees{$tree_analysis}++ if exists $registeredTrees{$tree_analysis};
-
-			if (exists $treesToIgnore{$tree_analysis}){
-				my @appendarr = ();
-
-				print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize], "\t", $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
-				#print "SUMMARY ",$Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize], "\t", $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
-#				print SELECT $Hchrom,"\t",$Hstart,"\t",$Hend,"\t","NOEVENT", "\t\t", $cluster,"\n";
-
-				foreach my $lnode (@$lagestnodes){
-					my @pair = @$lnode;
-					my @nodemutarr = ();
-					for my $p (@pair){
-						my @mutinfoarray1 = ();
-						for (1 ... 38){
-							push (@mutinfoarray1, "NA")
-						}
-						print SUMMARY join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t";
-					}
-
-				}
-				for (1 ... 38){
-					push (@appendarr, "NA")
-				}
-				print SUMMARY join ("\t", @appendarr,"NULL", "NULL"),"\n";
-		#		print "SUMMARY ",join ("\t", @appendarr,"NULL", "NULL"),"\n"; #<STDIN>;
-				next;
-			}
-
-			my ($mutations_array, $nodes, $branches_hash, $alivehash, $primaryalignment) = peel_onion($tree, \%sequences, \%alignment, \@tags, \%microsathash, \%nonmicrosathash, $motif, $tree_analysis, $thresholdhash{length($motif)}, \%microsatstarts);
-
-			if ($mutations_array eq "NULL"){
-				my @appendarr = ();
-
-				print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
-		#		print "SUMMARY ", $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
-#				print SELECT $Hchrom,"\t",$Hstart,"\t",$Hend,"\t","EVENT", "\t\t", $cluster,"\n";
-
-				foreach my $lnode (@$lagestnodes){
-					my @pair = @$lnode;
-					my @nodemutarr = ();
-					for my $p (@pair){
-						my @mutinfoarray1 = ();
-						for (1 ... 38){
-							push (@mutinfoarray1, "NA")
-						}
-						print SUMMARY join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t";
-		#				print  join ("\t", "SUMMARY", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t";
-					}
-
-				}
-				for (1 ... 38){
-					push (@appendarr, "NA")
-				}
-				print SUMMARY join ("\t", @appendarr,"NULL", "NULL"),"\n";
-			#	print  join ("\t","SUMMARY", @appendarr,"NULL", "NULL"),"\n"; #<STDIN>;
-				next;
-			}
-
-
-#			print "sent: \n" if $printer == 1;
-#			print "nodes = @$nodes,  branches array:\n" if $mutations_array ne "NULL" &&  $printer == 1;
-
-			my ($newstatus, $newmutations_array, $newnodes, $newbranches_hash, $newalivehash, $finalalignment) = fillAlignmentGaps($tree, \%sequences, \%alignment, \@tags, \%microsathash, \%nonmicrosathash, $motif, $tree_analysis, $thresholdhash{length($motif)}, \%microsatstarts);
-#			print "newmutations_array returned = \n",join("\n",@$newmutations_array),"\n" if $newmutations_array ne "NULL" &&  $printer == 1;
-			my @finalmutations_array= ();
-			@finalmutations_array = selectMutationArray($mutations_array, $newmutations_array, \@tags, $alivehash, \%alignment, $motif) if $newmutations_array ne "NULL";
-			@finalmutations_array = selectMutationArray($mutations_array, $mutations_array, \@tags, $alivehash, \%alignment, $motif) if $newmutations_array eq "NULL";
-
-			my ($besttree, $treescore) = selectBetterTree($tree_analysis, $alternate{$tree_analysis}, \@finalmutations_array);
-			my $cleancase = "UNCLEAN";
-
-			$cleancase = checkCleanCase($besttree, $finalalignment) if $treescore > 0 && $finalalignment ne "NULL" && $finalalignment =~ /\!/;
-			$cleancase = checkCleanCase($besttree, $primaryalignment) if $treescore > 0 && $finalalignment eq "NULL" && $primaryalignment =~ /\!/ && $primaryalignment ne "NULL";
-			$cleancase = "CLEAN" if $finalalignment eq "NULL" && $primaryalignment !~ /\!/ && $primaryalignment ne "NULL";
-			$cleancase = "CLEAN" if $finalalignment ne "NULL" && $finalalignment !~ /\!/ ;
-			$besttree = "NULL" if $treescore <= 0;
-			print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
-		#	print "SUMMARY ", $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
-
-#			print SELECT $Hchrom,"\t",$Hstart,"\t",$Hend,"\t","EVENT", "\t\t", $cluster,"\n";
-
-			my @mutinfoarray =();
-
-			foreach my $lnode (@$lagestnodes){
-				my @pair = @$lnode;
-				my $joint = "(".join(", ",@pair).")";
-				my @nodemutarr = ();
-
-				for my $p (@pair){
-						foreach my $mut (@finalmutations_array){
-						$mut =~ /node=([A-Z, \(\)]+)/;
-						push @nodemutarr, $mut if $p eq $1;
-					}
-#					print "from pair @pair, p=$p\n";
-					@mutinfoarray = summarizeMutations(\@nodemutarr, $besttree);
-					print SUMMARY join ("\t", @mutinfoarray[0...($#mutinfoarray-1)] ),"\t";
-		#			print "SUMMARY ",join ("\t", @mutinfoarray[0...($#mutinfoarray-1)] ),"\t";
-				}
-
-			}
-
-			@mutinfoarray = summarizeMutations(\@finalmutations_array, $besttree);
-			print SUMMARY join ("\t", @mutinfoarray ),"\t";
-			print SUMMARY  $cleancase, "\n";
-		#	print "SUMMARY ",join ("\t", @mutinfoarray,$cleancase ),"\n"; #<STDIN>;
-		#	print "summarized\n"; <STDIN>;
-
-			my %indelcatch = ();
-			my %substcatch = ();
-			my %typecatch = ();
-			my %nodescatch = ();
-			my $mutconcat = join("\t", @finalmutations_array)."\n";
-			my %indelposcatch = ();
-			my %subsposcatch = ();
-
-				foreach my $fmut ( @finalmutations_array){
-#					next if $fmut !~ /indeltype=[a-zA-Z]+/;
-					#print RUN $fmut, "\n";
-					$fmut =~ /node=([a-zA-Z, \(\)]+)/;
-					my $lnode = $1;
-					$nodescatch{$1}=1;
-
-					if ($fmut =~ /type=substitution/){
-		#				print "fmut=$fmut\n";
-						$fmut =~ /from=([a-zA-Z\-]+)\tto=([a-zA-Z\-]+)/;
-						my $from=$1;
-		#				print "from=$from\n";
-						my $to=$2;
-		#				print "to=$to\n";
-						push @{$substcatch{$lnode}} , ("from:".$from." to:".$to);
-						$fmut =~ /position=([0-9]+)/;
-						push @{$subsposcatch{$lnode}}, $1;
-					}
-
-					if ($fmut =~ /insertion=[a-zA-Z\-]+/){
-						$fmut =~ /insertion=([a-zA-Z\-]+)/;
-						push @{$indelcatch{$lnode}} , $1;
-						$fmut =~ /indeltype=([a-zA-Z]+)/;
-						push @{$typecatch{$lnode}}, $1;
-						$fmut =~ /position=([0-9]+)/;
-						push @{$indelposcatch{$lnode}}, $1;
-					}
-					if ($fmut =~ /deletion=[a-zA-Z\-]+/){
-						$fmut =~ /deletion=([a-zA-Z\-]+)/;
-						push @{$indelcatch{$lnode}} , $1;
-						$fmut =~ /indeltype=([a-zA-Z]+)/;
-						push @{$typecatch{$lnode}}, $1;
-						$fmut =~ /position=([0-9]+)/;
-						push @{$indelposcatch{$lnode}}, $1;
-					}
-				}
-
-		#	print  $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t" if $printer == 1;
-		#	print join ("<\t>", @mutinfoarray),"\n" if $printer == 1;
-		#	print "where mutinfoarray = @mutinfoarray\n"  if $printer == 1;
-		#	#print RUN ".";
-
-		#	print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
-		#	print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
-
-		#	print colored ['red'],"finalmutations_array=\n" if $printer == 1;
-			foreach (@finalmutations_array) {
-#				print colored ['red'], "$_\n" if $_ =~ /type=substitution/ && $printer == 1  ;
-#				print colored ['yellow'], "$_\n" if $_ !~ /type=substitution/ && $printer == 1  ;
-
-			}# if $line =~ /cal/;# && $line =~ /chr4/;
-
-#			print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
-#			print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
-#			print "tree analysis = $tree_analysis\n" if $printer == 1;
-
-		#	my $mutations = "@$mutations_array";
-
-
-			next;
-			for my $keys (@$nodes) {foreach my $key (@$keys){
-										#print "key = $key, => $branches_hash->{$key}\n";
-									}
-								#	print "x" x 50, "\n";
-								}
-			my ($birth_steps, $death_steps) = decipher_history($mutations_array,join("",@tags),$nodes,$branches_hash,$tree_analysis,$conformation, $alivehash, $simplified_microsat);
-		}
-	}
-	close BO;
-#	print "now studying where we lost microsatellites:";
-#	print "x" x 60,"\n";
-	for my $reason (0 ... $#nextcounter){
-#		print $removalReasons[$reason]."\t".$nextcounter[$reason],"\n";
-	}
-#	print "x" x 60,"\n";
-#	print "In total we read $microreadcounter microsatellites after reading through $contigsentered contigs\n";
-#	print " we lost $keynotformedcounter contigs as they did not form the key, \n";
-#	print "$contignotrightcounter contigs as they were not of the right species configuration\n";
-#	print "$keynotfoundcounter contigs as they did not contain the microsats\n";
-#	print "... In total we went through a file that had $dotcounter contigs...\n";
-#	print join ("\n","remaining orth keys = ", (keys %orths),"");
-
-
-#	print "now printing counted trees: \n";
-		if (scalar(keys %registeredTrees) > 0){
-		foreach my $keyb ( sort (keys %registeredTrees) )
-		{
-#			print "$keyb : $registeredTrees{$keyb}\n";
-		}
-	}
-
-
-}
-
-my @summarizarr = ("+C=+C +R.+C -HCOR,+C",
-"+H=+H +R.+H -HCOR,+H",
-"-C=-C -R.-C +HCOR,-C",
-"-H=-H -R.-H +HCOR,-H",
-"+HC=+HC",
-"-HC=-HC",
-"+O=+O -HCOR,+O",
-"-O=-O +HCOR,-O",
-"+HCO=+HCO",
-"-HCO=-HCO",
-"+R=+R +R.+C +R.+H",
-"-R=-R -R.-C -R.-H");
-
-foreach my $line (@summarizarr){
-	next if $line !~ /[A-Za-z0-9]/;
-#	print $line;
-	chomp $line;
-	my @fields = split(/=/,$line);
-#	print "title = $fields[0]\n";
-	my @parts=split(/ +/, $fields[1]);
-	my %partshash = ();
-	 foreach my $part (@parts){$partshash{$part}=1;}
-	my $count=0;
-	foreach my $key ( sort keys %registeredTrees ){
-		next if !exists $partshash{$key};
-#		print "now adding $registeredTrees{$key} from $key\n";
-		$count+=$registeredTrees{$key};
-	}
-#	print "$fields[0] : $count\n";
-}
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-sub largest_number{
-	my $counter = 0;
-	my($max) = shift(@_);
-    foreach my $temp (@_) {
-
-    	#print "finding largest array: $maxcounter \n";
-    	if($temp > $max){
-        	$max = $temp;
-        }
-    }
-    return($max);
-}
-
-sub smallest_number{
-	my $counter = 0;
-	my($min) = shift(@_);
-    foreach my $temp (@_) {
-    	#print "finding largest array: $maxcounter \n";
-    	if($temp < $min){
-        	$min = $temp;
-        }
-    }
-    return($min);
-}
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-sub baseml_parser{
-	my $outputfile = $_[0];
-	open(BOUT,"<$outputfile") or die "Cannot open output of upstream baseml $outputfile: $!";
-	my @info = ();
-	my @branchields = ();
-	my @distanceields = ();
-	my @bout = <BOUT>;
-	#print colored ['red'], @bout ,"\n";
-	for my $b (0 ... $#bout){
-		my $bine=$bout[$b];
-		#print  colored ['yellow'], "sentence = ",$bine;
-		if ($bine =~ /TREE/){
-			$bine=$bout[$b++];
-			$bine=$bout[$b++];
-			$bine=$bout[$b++];
-			#print "FOUND",$bine;
-			chomp $bine;
-			$bine =~ s/^\s+//g;
-			@branchields = split(/\s+/,$bine);
-			$bine=$bout[$b++];
-			chomp $bine;
-			$bine =~ s/^\s+//g;
-			@distanceields = split(/\s+/,$bine);
-			#print "LASTING..............\n";
-			last;
-		}
-		else{
-		}
-	}
-
-	close BOUT;
-#			print "branchfields = @branchields and distanceields = @distanceields\n"  if $printer == 1;
-	my %distance_hash=();
-	for my $d (0 ... $#branchields){
-		$distance_hash{$branchields[$d]} = $distanceields[$d];
-	}
-
-	$info[0] = $distance_hash{"9..1"} + $distance_hash{"9..2"};
-	$info[1] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+ $distance_hash{"8..3"};
-	$info[2] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+$distance_hash{"7..8"}+$distance_hash{"7..4"};
-	$info[3] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+$distance_hash{"7..8"}+$distance_hash{"6..7"}+$distance_hash{"6..5"};
-
-#	print "\nsending back: @info\n" if $printer == 1;
-
-	return join("\t",@info);
-
-}
-
-
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-sub test_complexity{
-	my $printer = 0;
-	my $sequence = $_[0];
-	my $COMPLEXITY_SUPPORT = $_[1];
-	my $complexity=int($COMPLEXITY_SUPPORT * (1/40));				#1	THIS IS AN ARBITRARY THRESHOLD SET FOR LOW COMPLEXITY.
-																	#2	THE INSPIRATION WAS WEB MILLER'S MAIL SENT ON
-																	#3	19 Apr 2008 WHERE HE CLASSED AS HIGH COMPLEXITY
-																	#4	REGION, IF 40 BP OF SEQUENCE HAS AT LEAST 3 OF
-																	#5	EACH NUCLEOTIDE. HENCE, I NORMALIZE THIS PARAMETER
-																	#6	FOR THE ACTUAL LENGTH OF $FLANK_SUPPORT SET BY
-																	#7	THE USER.
-																	#8	WEB MILLER SENT THE MAIL TO YDK104@PSU.EDU
-
-
-
-	my $As = ($sequence=~ s/A/A/gi);
-	my $Ts = ($sequence=~ s/T/T/gi);
-	my $Gs = ($sequence=~ s/G/G/gi);
-	my $Cs = ($sequence=~ s/C/C/gi);
-	#print "seq = $sequence, As=$As, Ts=$Ts, Gs=$Gs, Cs=$Cs\n" if $printer == 1;
-
-	my $ans = ();
-
-	return "HIGH" if $As >= $complexity && $Ts >= $complexity && $Cs >= $complexity && $Gs >= $complexity;
-
-	my @nts = ("A","T","G","C","-");
-
-	my $lowcomplex = 0;
-
-	foreach my $nt (@nts){
-		$lowcomplex =1 if $sequence =~ /(($nt\-*){10,})/i;
-#		print "caught with a mono of $nt : $1 in $sequence\n" if $sequence =~ /(($nt\-*){10,})/i;
-		$lowcomplex =1 if $sequence =~ /(($nt[A-Za-z]){10,})/i;
-		$lowcomplex =1 if $sequence =~ /(([A-Za-z]$nt){10,})/i;
-#		print "caught with a di with $nt : $2 in $sequence\n" if $sequence =~ /(($nt[A-Za-z]){10,})/i || $sequence =~ /(([A-Za-z]$nt){10,})/i;
-		my $nont = ($sequence=~ s/$nt/$nt/gi);
-
-	}
-#	print "leaving for now.. $sequence\n" if $printer == 1 && $lowcomplex == 0;
-	#<STDIN>;
-	return "HIGH" if $lowcomplex == 0;
-	return "LOW" ;
-}
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-sub sequence_similarity{
-	my $printer = 0;
-	my @seq1 = split(/\s*/, $_[0]);
-	my @seq2 = split(/\s*/, $_[1]);
-	my $similarity_thresh = $_[2];
-	my $info = $_[3];
-#	print "input = @_\n" if $printer == 1;
-	my $seq1str = $_[0];
-	my $seq2str = $_[1];
-	$seq1str=~s/\-//g; 	$seq2str=~s/\-//g;
-	my $similarity=0;
-
-	my $nucdiffs=0;
-	my $nucsims=0;
-	my $indeldiffs=0;
-
-	for my $i (0...$#seq1){
-		$similarity++ if $seq1[$i] =~ /$seq2[$i]/i  ; #|| $seq1[$i] =~ /\-/i || $seq2[$i] =~ /\-/i ;
-		$nucsims++ if $seq1[$i] =~ /$seq2[$i]/i && ($seq1[$i] =~ /[a-zA-Z]/i && $seq2[$i] =~ /[a-zA-Z]/i);
-		$nucdiffs++ if $seq1[$i] !~ /$seq2[$i]/i && ($seq1[$i] =~ /[a-zA-Z]/i && $seq2[$i] =~ /[a-zA-Z]/i);
-		$indeldiffs++ if $seq1[$i] !~ /$seq2[$i]/i && $seq1[$i] =~ /\-/i || $seq2[$i] =~ /\-/i;
-	}
-	my $sim = $similarity/length($_[0]);
-	return ( $sim, $nucdiffs, $indeldiffs ); #<=  $similarity_thresh;
-}
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-
-sub load_treesToReject{
-	my @rejectlist = ();
-	my $alltags = join("",@_);
-	@rejectlist = qw (-HCOR +HCOR) if $alltags eq "HCORM";
-	@rejectlist = qw ( -HCO|+R +HCO|-R) if $alltags eq "HCOR";
-	@rejectlist = qw ( -HC|+O +HC|-O) if $alltags eq "HCO";
-
-	%treesToReject=();
-	$treesToReject{$_} = $_ foreach (@rejectlist);
-	#print "loaded to reject for $alltags; ", $treesToReject{$_},"\n" foreach (@rejectlist); #<STDIN>;
-}
-#--------------------------------------------------------------------------------------------------------
-sub load_treesToIgnore{
-	my @rejectlist = ();
-	my $alltags = join("",@_);
-	@rejectlist = qw (-HCOR +HCOR +HCORM -HCORM) if $alltags eq "HCORM";
-	@rejectlist = qw ( -HCO|+R +HCO|-R +HCOR -HCOR) if $alltags eq "HCOR";
-	@rejectlist = qw ( -HC|+O +HC|-O +HCO -HCO) if $alltags eq "HCO";
-
-	%treesToIgnore=();
-	$treesToIgnore{$_} = $_ foreach (@rejectlist);
-	#print "loaded ", $treesToIgnore{$_},"\n" foreach (@rejectlist);
-}
-#--------------------------------------------------------------------------------------------------------
-sub load_thresholds{
-	my @threshold_array=split(/[,_]/,$_[0]);
-	unshift @threshold_array, "0";
-	for my $size (1 ... 4){
-		$thresholdhash{$size}=$threshold_array[$size];
-	}
-}
-#--------------------------------------------------------------------------------------------------------
-sub load_allPossibleTrees{
-	#1 THIS FILE STORES ALL POSSIBLE SCENARIOS OF MICROSATELLITE
-	#2 BIRTH AND DEATH EVENTS ON A 5-PRIMATE TREE OF H,C,O,R,M
-	#3 IN FORM OF A TEXT FILE. THIS WILL BE USED AS A TEMPLET
-	#4 TO COMPARE EACH MICROSATELLITE CLUSTER TO UNDERSTAND THE
-	#5 EVOLUTION OF EACH LOCUS. WE WILL THEN DISCARD SOME
-	#6 MICROSATS ACCRODING TO THEIR EVOLUTIONARY BEHAVIOUR ON
-	#7 THE TREE. MOST PROBABLY WE WILL REMOVE THOSE MICROSATS
-	#8 THAT ARE NOT SUFFICIENTLY INFORMATIVE, LIKE IN CASE OF
-	#9 AN OUTGROUP MICROSATELLITE BEING DIFFERENT FRON ALL OTHER
-	#10 SPECIES IN THE TREE.
-	my $tree_list = $_[0];
-#	print "file to be loaded: $tree_list\n";
-
-	my @trarr = ();
-	@trarr = ("#H C O	CONCLUSION	ALTERNATE",
-"+ + +	+HCO	NA",
-"+ _ _	+H	NA",
-"_ + _	+C	NA",
-"_ _ +	-HC|+O	NA",
-"+ _ +	-C	+H",
-"_ + +	-H	+C",
-"+ + _	+HC|-O	NA",
-"_ _ _	-HCO	NA") if $tree_list =~ /_HCO\.txt/;
-	@trarr = ("#H C O R	CONCLUSION	ALTERNATE",
-"_ _ _ _	-HCOR	NA",
-"+ + + +	+HCOR	NA",
-"+ + + _	+HCO|-R	+H.+C.+O",
-"+ + _ _	+HC	+H.+C;-O",
-"+ _ _ _	+H	+HC,-C",
-"_ + _ _	+C	+HC,-H",
-"_ _ + _	+O	-HC|-H.-C",
-"_ _ + +	-HC	-H.-C",
-"+ _ _ +	+H|-C.-O	+HC,-C",
-"_ + _ +	+C	-H.-O",
-"_ + + _	-H	+C.+O",
-"_ _ _ +	-HCO|+R	NA",
-"+ _ + _	+H.+O|-C	NA",
-"_ + + +	-H	-HC,+C",
-"+ _ + +	-C	-HC,+H",
-"+ + _ +	-O	+HC") if $tree_list =~ /_HCOR\.txt/;
-
-		@trarr = ("#H C O R M	CONCLUSION	ALTERNATE",
-"+ + + + _	+HCOR	NA",
-"+ + + _ +	-R	+HCO;+HC.+O;+H.+C.+O",
-"+ + _ + +	-O	-HCO,+HC|-HCO,+HC;-HCO,(+H.+C)",
-"+ _ + + +	-C	-HC,+H;+HCO,(+H.+O)",
-"_ + + + +	-H	-HC,+C;-HCO,(+C.+O)",
-"_ _ _ _ +	-HCOR	NA",
-"_ _ _ + _	+R	-HC.-O;-H.-C.-O",
-"_ _ + _ _	+O	+HCO,-HC;+HCO,(-H.-C)",
-"_ + _ _ _	+C	+HC,-H;+HCO,(-H.-O)",
-"+ _ _ _ _	+H	+HC,-C;+HCO,(-C.-O)",
-"+ + + _ _	+HCO	+H.+C.+O",
-"+ + _ + _	-O	+R.+HC|-HCO,+HC;+H.+C.+R|-HCO,(+H.+C)",
-"+ _ + + _	-C	-HC,+H;+H.+O.+R|-HCO,(+H.+O)",
-"_ + + + _	-H	-HC,+C;+C.+O.+R|-HCO,(+C.+O)",
-"_ _ _ + +	-HCO	-HC.-O;-H.-C.-O",
-"_ _ + _ +	+O	+HCO,-HC;+HCO,(-H.-C)",
-"_ + _ _ +	+C	+HC,-H;+HCO,(-H.-O)",
-"+ _ _ _ +	+H	-HC,+H;+HCO,(-C.-O)",
-"+ + _ _ +	+HC	-R.-O|+HCO,-O|+H.+C;-HCO,+HC;-HCO,(+H.+C)",
-"+ _ + _ +	-R.-C|+HCO,-C|+H.+O	NA",
-"_ + + _ +	-R.-H|+HCO,-H|+C.+O	NA",
-"_ _ + + _	-HC	+R.+O|-HCO,+O|+HCO,-HC",
-"_ + _ + _	+R.+C|-HCO,+C|-HC,+C	+HCO,(-H.-O)",
-"+ _ _ + _	+R.+H|-C.-O	+HCO,(-C.-O)",
-"+ _ _ + +	-O.-C|-HCO,+H	+R.+H;-HCO,(+R.+H)",
-"_ + _ + +	-O.-H|-HCO,+C	+R.+C;-HCO,(+R.+C)",
-"_ + + _ _	+HCO,-H|+O.+C	NA",
-"+ _ + _ _	+HCO,-C|+O.+H	NA",
-"_ _ + + +	-HC	-H.-C|-HCO,+O",
-"+ + _ _ _	+HC	+H.+C|+HCO,-O|-HCO,+HC;-HCO,(+H.+C)",
-"+ + + + +	+HCORM	NA") if $tree_list =~ /_HCORM\.txt/;
-
-
-	my $template_p = $_[1];
-	my $alternate_p = $_[2];
-																	#1 THIS IS THE HASH IN WHICH INFORMATION FROM THE ABOVE FILE
-																	#2 GETS STORED, USING THE WHILE LOOP BELOW. HERE, THE KEY
-																	#3 OF EACH ROW IS THE EVOLUTIONARY CONFIGURATION OF A LOCUS
-																	#4 ON THE PRIMATE TREE, BASED ON PRESENCE/ABSENCE OF A MICROSAT
-																	#5 AT THAT LOCUS, LIKE SAY "+ + + _ _" .. EACH COLUMN BELONGS
-																	#6 TO ONE SPECIES; HERE THE COLUMN NAMES ARE "H C O R M".
-																	#7 THE VALUE FOR EACH ENTRY IS THE MEANING OF THE ABOVE
-																	#8 CONFIGURATION (I.E., CONFIGURAION OF THE KEY. HERE, THE
-																	#9 VALUE WILL BE +HCO, SIGNIFYING A BIRTH IN HUMAN-CHIMP-ORANG
-																	#10 COMMON ANCESTOR. THIS HASH HAS BEEN LOADED HERE TO BE USED
-																	#11 LATER BY THE SUBROUTINE sub treeStudy{} THAT STUDIES
-																	#12 EVOLUTIONARY CONFIGURAION OF EACH MICROSAT LOCUS, AS
-																	#13 MENTIONED ABOVE.
-	my @keys_array=();
-	foreach my $line (@trarr){
-		next if $line =~ /^#/;
-		chomp $line;
-		my @fields = split("\t", $line);
-		push @keys_array, $fields[0];
-#		print "loading: $fields[0]\n";
-		$template_p->{$fields[0]}[0] = $fields[1];
-		$template_p->{$fields[0]}[1] = 0;
-		$alternate_p->{$fields[1]} = $fields[2];
-
-	}
-#	print "loaded the trees with keys: @keys_array\n";
-	return $template_p, \@keys_array, $alternate_p;
-}
-
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-sub checkCleanCase{
-	my $printer = 0;
-	my $tree = $_[0];
-	my $finalalignment = $_[1];
-
-	#print "IN checkCleanCase: @_\n";
-	#<STDIN>;
-	my @indivspecies = $tree =~ /[A-Z]/g;
-	$finalalignment =~ s/\./_/g;
-	my @captured = $finalalignment =~ /[A-Za-z, \(\):]+\![:A-Za-z, \(\)]/g;
-
-	my $unclean = 0;
-
-	foreach my $sp (@indivspecies){
-		foreach my $cap (@captured){
-			$cap =~ s/:[A-Za-z\-]+//g;
-			my @sps = $cap =~ /[A-Z]+/g;
-			my $spsc = join("", @sps);
-#			print "checking whether imp species $sp is present in $cap i.e, in $spsc\n " if $printer == 1;
-			if ($spsc =~ /$sp/){
-#				print "foind : $sp\n";
-				$unclean = 1; last;
-			}
-		}
-		last if $unclean == 1;
-	}
-	#<STDIN>;
-	return "CLEAN" if $unclean == 0;
-	return "UNCLEAN";
-}
-
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-#--------------------------------------------------------------------------------------------------------
-
-
-sub adjustCoordinates{
-	my $line = $_[0];
-	my $no_of_species = $line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)/x/g;
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$typecord = 2 + (4*$no_of_species) + 1 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 2 - 1;
-	$gapcord = $motifcord+1;
-	$startcord = $gapcord+1;
-	$strandcord = $startcord+1;
-	$endcord = $strandcord + 1;
-	$microsatcord = $endcord + 1;
-	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
-	$interr_poscord = $microsatcord + 3;
-	$no_of_interruptionscord = $microsatcord + 4;
-	$interrcord = $microsatcord + 2;
-	#print "$line\n startcord = $startcord, and endcord = $endcord and no_of_species = $no_of_species\n" if $printer == 1;
-	return $no_of_species;
-}
-
-
-sub printhash{
-	my $alivehash = $_[0];
-	my @tags = @$_[1];
-#	print "print hash\n";
-	foreach my $tag (@tags){
-#		print "$tag=",$alivehash->{$tag},"\n" if exists $alivehash->{$tag};
-	}
-
-	return "\n"
-}
-sub peel_onion{
-	my $printer = 0;
-#	print "received: @_\n" ; #<STDIN>;
-	$printer = 0;
-	my ($tree, $sequences, $alignment, $tagarray, $microsathash, $nonmicrosathash, $motif, $tree_analysis, $threshold, $microsatstarts) = @_;
-#	print "in peel onion.. tree = $tree \n" if $printer == 1;
-	my %sequence_hash=();
-
-
-#	for my $i (0 ... $#sequences){ $sequence_hash{$species[$i]}=$sequences->[$i]; }
-
-
-	my %node_sequences=();
-
-	my %node_alignments = ();			#NEW, Nov 28 2008
-	my @tags=();
-	my @locus_sequences=();
-	my %alivehash=();
-	foreach my $tag (@$tagarray) {
-		#print "adding: $tag\n";
-		push(@tags, $tag);
-		$node_sequences{$tag}=join ".",split(/\s*/,$microsathash->{$tag}) if $microsathash->{$tag} ne "NULL";
-		$alivehash{$tag}= $tag if $microsathash->{$tag} ne "NULL";
-		$node_sequences{$tag}=join ".",split(/\s*/,$nonmicrosathash->{$tag}) if $microsathash->{$tag} eq "NULL";
-		$node_alignments{$tag}=join ".",split(/\s*/,$alignment->{$tag}) ;
-		push @locus_sequences, $node_sequences{$tag};
-		#print "adding to node_seq: $tag = ",$node_alignments{$tag},"\n";
-	}
-
-
-
-	my ($nodes_arr, $branches_hash) = get_nodes($tree);
-	my @nodes=@$nodes_arr;
-#	print "recieved nodes = " if $printer == 1;
-#	foreach my $key (@nodes) {print "@$key "  if $printer == 1;}
-
-#	print "\n" if $printer == 1;
-
-	#POPULATE branches_hash WITH INFORMATION ABOUT LIVESTATUS
-	foreach my $keys (@nodes){
-		my @pair = @$keys;
-		my $joint = "(".join(", ",@pair).")";
-		my $copykey = join "", @pair;
-		$copykey =~ s/[\W ]+//g;
-#		print "for node: $keys, copykey = $copykey and joint = $joint\n" if $printer == 1;
-		my $livestatus = 1;
-		foreach my $copy (split(/\s*/,$copykey)){
-			$livestatus = 0 if !exists $alivehash{$copy};
-		}
-		$alivehash{$joint} = $joint if !exists $alivehash{$joint} && $livestatus == 1;
-#		print "alivehash = $alivehash{$joint}\n" if exists $alivehash{$joint} && $printer == 1;
-	}
-
-	@nodes = reverse(@nodes); #1 THIS IS IN ORDER TO GO THROUGH THE TREE FROM LEAVES TO ROOT.
-
-	my @mutations_array=();
-
-	my $joint = ();
-	foreach my $node (@nodes){
-		my @pair = @$node;
-#		print "now in the nodes for loop, pair = @pair\n and sequences=\n" if $printer == 1;
-		$joint = "(".join(", ",@pair).")";
-		my @pair_sequences=();
-
-		foreach my $tag (@pair){
-#			print "$tag:  $node_alignments{$tag}\n" if $printer == 1;
-			print $node_alignments{$tag},"\n" if $printer == 1;
-			push @pair_sequences, $node_alignments{$tag};
-		}
-#		print "ppeel onion joint = $joint , pair_sequences=>@pair_sequences< , pair=>@pair<\n" if $printer == 1;
-
-		my ($compared, $substitutions_list) = base_by_base_simple($motif,\@pair_sequences, scalar(@pair_sequences), @pair, $joint);
-		$node_alignments{$joint}=$compared;
-		push(  @mutations_array,split(/:/,$substitutions_list));
-#		print "newly added to node_sequences: $node_alignments{$joint} and list of mutations =\n", join("\n",@mutations_array),"\n" if $printer == 1;
-	}
-
-#	print "now sending for analyze_mutations: mutation_array=@mutations_array, nodes=@nodes, branches_hash=$branches_hash, alignment=$alignment, tags=@tags, alivehash=\%alivehash, node_sequences=\%node_sequences, microsatstarts=$microsatstarts, motif=$motif\n" if $printer == 1;
-	## <STDIN> if $printer == 1;
-
-	my $analayzed_mutations = analyze_mutations(\@mutations_array, \@nodes, $branches_hash, $alignment, \@tags, \%alivehash, \%node_sequences, $microsatstarts, $motif);
-
-#	print "returning: ", $analayzed_mutations, \@nodes, $branches_hash,"\n" if scalar @mutations_array > 0 && $printer == 1;
-#	print "returning: NULL, NULL, NULL " if scalar @mutations_array == 0 && $printer == 1;
-#	print "final node alignment = $node_alignments{$joint}\n" if $printer == 1;
-
-	# <STDIN> if $printer == 1;
-
-
-	return ($analayzed_mutations, \@nodes, $branches_hash, \%alivehash, $node_alignments{$joint}) if scalar @mutations_array > 0;
-	return ("NULL",\@nodes,$branches_hash, \%alivehash, "NULL") if scalar @mutations_array == 0;
-}
-
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-
-sub get_nodes{
-	my $printer = 0;
-
-	my $tree=$_[0];
-	#$tree =~ s/ +//g;
-	$tree =~ s/\t+//g;
-	$tree=~s/;//g;
-	print "tree=$tree\n" if $printer == 1;
-	my @nodes = ();
-	my @onions=($tree);
-	my %branches=();
-	foreach my $bite (@onions){
-		$bite=~ s/^\(|\)$//g;
-		chomp $bite;
-#		print "tree = $bite \n";
-#		<STDIN>;
-		$bite=~ /([ ,\(\)A-Z]+)\,\s*([ ,\(\)A-Z]+)/;
-		#$tree =~ /(\(\(\(H, C\), O\), R\))\, (M)/;
-		my @raw_nodes = ($1, $2);
-		print "raw nodes =  $1 and $2\n" if $printer == 1;
-		push(@nodes, [@raw_nodes]);
-		foreach my $node (@raw_nodes) {push (@onions, $node) if $node =~ /,/;}
-		foreach my $node (@raw_nodes) {$branches{$node}="(".$bite.")"; print "adding to branches: $node = ($bite)\n" if $printer == 1;}
-		print "onions = @onions\n" if $printer == 1;<STDIN> if $printer == 1;
-	}
-	$printer = 0;
-	return \@nodes, \%branches;
-}
-
-
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-sub analyze_mutations{
-	my ($mutations_array, $nodes, $branches_hash, $alignment, $tags, $alivehash, $node_sequences, $microsatstarts, $motif) = @_;
-	my $locuslength = length($alignment->{$tags->[0]});
-	my $printer = 0;
-
-
-#	print " IN analyzed_mutations....\n" if $printer == 1; #  \n mutations array = @$mutations_array, \nAND locuslength = $locuslength\n" if $printer == 1;
-	my %mutation_hash=();
-	my %froms_megahash=();
-	my %tos_megahash=();
-	my %position_hash=();
-	my @solutions_array=();
-	foreach  my $mutation (@$mutations_array){
-#		print "loadin mutation: $mutation\n" if $printer == 1;
-		my %localhash= $mutation =~ /([\S ]+)=([\S ]+)/g;
-		$mutation_hash{$localhash{"position"}} = {%localhash};
-		push @{$position_hash{$localhash{"position"}}},$localhash{"node"};
-#		print "feeding position hash with $localhash{position}: $position_hash{$localhash{position}}[0]\n" if $printer == 1;
-		$froms_megahash{$localhash{"position"}}{$localhash{"node"}}=$localhash{"from"};
-		$tos_megahash{$localhash{"position"}}{$localhash{"node"}}=$localhash{"to"};
-#		print "just a trial: $mutation_hash{$localhash{position}}{position}\n" if $printer == 1;
-#		print "loadin in tos_megahash: $localhash{position} {$localhash{node} = $localhash{to}\n" if $printer == 1;
-#		print "loadin in from: $localhash{position} {$localhash{node} = $localhash{from}\n" if $printer == 1;
-	}
-
-#	print "now going through each position in loculength:\n" if $printer == 1;
-	## <STDIN> if $printer == 1;
-
-	for my $pos (0 ... $locuslength-1){
-#		print "at position: $pos\n" if $printer == 1;
-
-		if (exists($mutation_hash{$pos})){
-			my @local_nodes=@{$position_hash{$pos}};
-#			print "found mutation: @{$position_hash{$pos}} :  @local_nodes\n" if $printer == 1;
-
-			foreach my $local_node (@local_nodes){
-#				print "at local node: $local_node ... from state = $froms_megahash{$pos}{$local_node}\n" if $printer == 1;
-				my $open_insertion=();
-				my $open_deletion=();
-				my $open_to_substitution=();
-				my $open_from_substitution=();
-				if ($froms_megahash{$pos}{$local_node} eq "-"){
-				#	print "here exists a microsatellite from $local_node to $branches_hash->{$local_node}\n" if $printer == 1 &&  exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
-				#	print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node},  nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}} && $printer == 1;
-					#next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
-					$open_insertion=$tos_megahash{$pos}{$local_node};
-					for my $posnext ($pos+1 ... $locuslength-1){
-#						print "in first if  .... studying posnext: $posnext\n" if $printer == 1;
-						last if !exists ($froms_megahash{$posnext}{$local_node});
-#						print "for posnext: $posnext, there exists $froms_megahash{$posnext}{$local_node}.. already, open_insertion = $open_insertion.. checking is $froms_megahash{$posnext}{$local_node} matters\n" if $printer == 1;
-						$open_insertion = $open_insertion.$tos_megahash{$posnext}{$local_node} if $froms_megahash{$posnext}{$local_node} eq "-";
-#						print "now open_insertion=$open_insertion\n" if $printer == 1;
-						delete $mutation_hash{$posnext} if $froms_megahash{$posnext}{$local_node} eq "-";
-					}
-					print "1 Feeding in: ", join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_insertion", "deletion="),"\n" if $printer == 1;
-					push (@solutions_array, join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_insertion", "deletion="));
-				}
-				elsif ($tos_megahash{$pos}{$local_node} eq "-"){
-				#	print "here exists a microsatellite to $local_node from $branches_hash->{$local_node}\n" if $printer == 1 && exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
-				#	print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node},  nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
-					#next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
-					$open_deletion=$froms_megahash{$pos}{$local_node};
-					for my $posnext ($pos+1 ... $locuslength-1){
-						print "in 1st elsif studying posnext: $posnext\n" if $printer == 1;
-						print "nexting as nextpos does not exist\n" if !exists ($tos_megahash{$posnext}{$local_node}) && $printer == 1;
-						last if !exists ($tos_megahash{$posnext}{$local_node});
-						print "for posnext: $posnext, there exists $tos_megahash{$posnext}{$local_node}\n" if $printer == 1;
-						$open_deletion = $open_deletion.$froms_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} eq "-";
-						delete $mutation_hash{$posnext} if $tos_megahash{$posnext}{$local_node} eq "-";
-					}
-					print "2 Feeding in:",  join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_deletion"), "\n" if $printer == 1;
-					push (@solutions_array, join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_deletion"));
-				}
-				elsif ($tos_megahash{$pos}{$local_node} ne "-"){
-				#	print "here exists a microsatellite from $local_node to $branches_hash->{$local_node}\n" if $printer == 1 && exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
-				#	print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node},  nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
-					#next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
-				#	print "microsatstart = $microsatstarts->{$local_node}  \n" if exists $microsatstarts->{$local_node} && $pos < $microsatstarts->{$local_node} && $printer == 1;
-					next if exists $microsatstarts->{$local_node} && $pos < $microsatstarts->{$local_node};
-					$open_to_substitution=$tos_megahash{$pos}{$local_node};
-					$open_from_substitution=$froms_megahash{$pos}{$local_node};
-					print "open from substitution: $open_from_substitution \n" if $printer == 1;
-					for my $posnext ($pos+1 ... $locuslength-1){
-						#print "in last elsif studying posnext: $posnext\n";
-						last if !exists ($tos_megahash{$posnext}{$local_node});
-						print "for posnext: $posnext, there exists $tos_megahash{$posnext}{$local_node}\n" if $printer == 1;
-						$open_to_substitution = $open_to_substitution.$tos_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} ne "-";
-						$open_from_substitution = $open_from_substitution.$froms_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} ne "-";
-						delete $mutation_hash{$posnext} if $tos_megahash{$posnext}{$local_node} ne "-" && $froms_megahash{$posnext}{$local_node} ;
-					}
-					print "open from substitution: $open_from_substitution \n" if $printer == 1;
-
-					#IS THE STRETCH OF SUBSTITUTION MICROSATELLITE-LIKE?
-					my @motif_parts=split(/\s*/,$motif);
-					#GENERATING THE FLEXIBLE LEFT END
-					my $left_query=();
-					for my $k (1 ... $#motif_parts) {
-						$left_query= $motif_parts[$k]."|)";
-						$left_query="(".$left_query;
-					}
-					$left_query=$left_query."?";
-
-					print "left_quewry = $left_query\n" if $printer == 1;
-					#GENERATING THE FLEXIBLE RIGHT END
-					my $right_query=();
-					for my $k (0 ... ($#motif_parts-1)) {
-						$right_query= "(|".$motif_parts[$k];
-						$right_query=$right_query.")";
-					}
-					$right_query=$right_query."?";
-					print "right_query = $right_query\n" if $printer == 1;
-					print "Hence, searching for: ^$left_query($motif)+$right_query\$\n" if $printer == 1;
-
-					my $motifcomb=$motif x 50;
-					print "motifcomb = $motifcomb\n" if $printer == 1;
-					if ( ($motifcomb =~/$open_to_substitution/i) && (length ($open_to_substitution) >= length($motif)) ){
-						print "sequence microsat-like\n" if $printer == 1;
-						my $all_microsat_like = 0;
-						print "3 feeding in: ", join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_from_substitution"), "\n" if $printer == 1;
-						push (@solutions_array, join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_from_substitution"));
-						print "4 feeding in: ", join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_to_substitution", "deletion="), "\n" if $printer == 1;
-						push (@solutions_array, join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_to_substitution", "deletion="));
-
-					}
-					else{
-						print "5 feeding in: ", join("\t", "node=$local_node","type=substitution" ,"position=$pos", "from=$open_from_substitution", "to=$open_to_substitution", "insertion=", "deletion="), "\n" if $printer == 1;
-						push (@solutions_array, join("\t", "node=$local_node","type=substitution" ,"position=$pos", "from=$open_from_substitution", "to=$open_to_substitution", "insertion=", "deletion="));
-					}
-					#IS THE FROM-SEQUENCE MICROSATELLITE-LIKE?
-
-				}
-				#<STDIN> if $printer ==1;
-			}
-			#<STDIN> if $printer ==1;
-		}
-	}
-
-	print "\n", "#" x 50, "\n"  if $printer == 1;
-	foreach my $tag (@$tags){
-		print "$tag: $alignment->{$tag}\n" if $printer == 1;
-	}
-	print "\n", "#" x 50, "\n" if $printer == 1;
-
-	print "returning SOLUTIONS ARRAY : \n",join("\n", @solutions_array),"\n" if $printer == 1;
-	#print "end\n";
-	#<STDIN> if
-	return \@solutions_array;
-}
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
-
-sub base_by_base_simple{
-	my $printer = 0;
-	my ($motif, $locus, $no, $pair0, $pair1, $joint) = @_;
-	my @seq_array=();
-	print "IN SUBROUTUNE base_by_base_simple.. information received = @_\n" if $printer == 1;
-	print "pair0 = $pair0 and pair1  = $pair1\n" if $printer == 1;
-
-	my @example=split(/\./,$locus->[0]);
-	print "example, for length = @example\n" if $printer == 1;
-	for my $i (0...$no-1){push(@seq_array, [split(/\./,$locus->[$i])]); print "for $i, from $locus->[$i],  seq_array = >@{$seq_array[$i]}<\n" if $printer == 1;}
-
-	my @compared_sequence=();
-	my @substitutions_list;
-	for my $i (0...scalar(@example)-1){
-
-		#print "i = $i\n" if $printer == 1;
-		#print "comparing $seq_array[0][$i] and  $seq_array[1][$i] \n" ;#if $printer == 1;
-		if ($seq_array[0][$i] =~ /!/ && $seq_array[1][$i] !~ /!/){
-
-			my $resolution= resolve_base($seq_array[0][$i],$seq_array[1][$i], $pair1 ,"keep" );
-		#	print "ancestral = $resolution\n" if $printer == 1;
-
-			if ($resolution =~ /$seq_array[1][$i]/i && $resolution !~ /!/){
-				push @substitutions_list, add_mutation($i, $pair0,  $seq_array[0][$i], $resolution );
-			}
-			elsif ( $resolution !~ /!/){
-				push @substitutions_list, add_mutation($i, $pair1,  $seq_array[1][$i], $resolution);
-			}
-			push @compared_sequence,$resolution;
-		}
-		elsif ($seq_array[0][$i] !~ /!/ && $seq_array[1][$i] =~ /!/){
-
-			my $resolution=  resolve_base($seq_array[1][$i],$seq_array[0][$i], $pair0, "invert" );
-		#	print "ancestral = $resolution\n" if $printer == 1;
-
-			if ($resolution =~ /$seq_array[0][$i]/i && $resolution !~ /!/){
-				push @substitutions_list, add_mutation($i, $pair1, $seq_array[1][$i], $resolution);
-			}
-			elsif ( $resolution !~ /!/){
-				push @substitutions_list, add_mutation($i, $pair0,  $seq_array[0][$i], $resolution);
-			}
-			push @compared_sequence,$resolution;
-		}
-		elsif($seq_array[0][$i] =~ /!/ && $seq_array[1][$i] =~ /!/){
-			push @compared_sequence, add_bases($seq_array[0][$i],$seq_array[1][$i], $pair0, $pair1, $joint );
-		}
-		else{
-			if($seq_array[0][$i] !~ /^$seq_array[1][$i]$/i){
-				push @compared_sequence, $pair0.":".$seq_array[0][$i]."!".$pair1.":".$seq_array[1][$i];
-			}
-			else{
-		#		print "perfect match\n" if $printer == 1;
-				push @compared_sequence, $seq_array[0][$i];
-			}
-		}
-	}
-	print "returning: comared = @compared_sequence \nand substitutions list =\n", join("\n",@substitutions_list),"\n" if $printer == 1;
-	return join(".",@compared_sequence), join(":", @substitutions_list) if scalar (@substitutions_list) > 0;
-	return join(".",@compared_sequence), "" if scalar (@substitutions_list) == 0;
-}
-
-
-sub resolve_base{
-	my $printer = 0;
-
-	print "IN SUBROUTUNE resolve_base.. information received = @_\n" if $printer == 1;
-	my ($optional, $single, $singlesp, $arg) = @_;
-	my @options=split(/!/,$optional);
-	foreach my $option(@options) {
-		$option=~s/[A-Z\(\) ,]+://g;
-		if ($option =~ /$single/i){
-			print "option = $option , returning single: $single\n" if $printer == 1;
-			return $single;
-		}
-	}
-	print "returning ",$optional."!".$singlesp.":".$single. "\n" if $arg eq "keep" && $printer == 1;
-	print "returning ",$singlesp.":".$single."!".$optional. "\n" if $arg eq "invert" && $printer == 1;
-	return $optional."!".$singlesp.":".$single if $arg eq "keep";
-	return $singlesp.":".$single."!".$optional if $arg eq "invert";
-
-}
-
-sub same_length{
-	my $printer = 0;
-	my @locus = @_;
-	my $temp = shift @locus;
-	$temp=~s/-|,//g;
-	foreach my $l (@locus){
-		$l=~s/-|,//g;
-		return 0 if length($l) != length($temp);
-		$temp = $l;
-	}
-	return 1;
-}
-sub treeStudy{
-	my $printer = 0;
-#	print "template DEFINED.. received: @_\n" if defined %template;
-#	print "only received = @_" if !defined %template;
-	my $stopper = 0;
-	if (!defined %template){
-		$stopper = 1;
-		%template=();
-		print "tree decipherer = $tree_decipherer\n" if $printer == 1;
-		my ( $template_ref, $keys_array)=load_allPossibleTrees($tree_decipherer, \%template);
-		print "return = $template_ref and @{$keys_array}\n" if $printer == 1;
-		foreach my $key (@$keys_array){
-			print "addding : $template_ref->{$key} for $key\n" if $printer == 1;
-			$template{$key} = $template_ref->{$key};
-		}
-	}
-
-	for my $templet ( keys %template ) {
-	#	print "$templet => @{$template{$templet}}\n";
-	}
-	<STDIN> if !defined %template;
-
-	my $strict = 0;
-
-    my $H = 0;
-    my $Hchr = 1;
-    my $Hstart = 2;
-    my $Hend = 3;
-	my $Hmotif = 4;
-	my $Hmotiflen = 5;
-	my $Hmicro = 6;
-	my $Hstrand = 7;
-	my $Hmicrolen = 8;
-	my $Hinterpos = 9;
-	my $Hrelativepos = 10;
-	my $Hinter = 11;
-	my $Hinterlen = 12;
-
-	my $C = 13;
-    my $Cchr = 14;
-    my $Cstart = 15;
-    my $Cend = 16;
-	my $Cmotif = 17;
-	my $Cmotiflen = 18;
-	my $Cmicro = 19;
-	my $Cstrand = 20;
-	my $Cmicrolen = 21;
-	my $Cinterpos = 22;
-	my $Crelativepos = 23;
-	my $Cinter = 24;
-	my $Cinterlen = 25;
-
-	my $O = 26;
-    my $Ochr = 27;
-    my $Ostart = 28;
-    my $Oend = 29;
-	my $Omotif = 30;
-	my $Omotiflen = 31;
-	my $Omicro = 32;
-	my $Ostrand = 33;
-	my $Omicrolen = 34;
-	my $Ointerpos = 35;
-	my $Orelativepos = 36;
-	my $Ointer = 37;
-	my $Ointerlen = 38;
-
-	my $R = 39;
-    my $Rchr = 40;
-    my $Rstart = 41;
-    my $Rend = 42;
-	my $Rmotif = 43;
-	my $Rmotiflen = 44;
-	my $Rmicro = 45;
-	my $Rstrand = 46;
-	my $Rmicrolen = 47;
-	my $Rinterpos = 48;
-	my $Rrelativepos = 49;
-	my $Rinter = 50;
-	my $Rinterlen = 51;
-
-    my $Mchr = 52;
-    my $Mstart = 53;
-    my $Mend = 54;
-	my $M = 55;
-	my $Mmotif = 56;
-	my $Mmotiflen = 57;
-	my $Mmicro = 58;
-	my $Mstrand = 59;
-	my $Mmicrolen = 60;
-	my $Minterpos = 61;
-	my $Mrelativepos = 62;
-	my $Minter = 63;
-	my $Minterlen = 64;
-
-	#-------------------------------------------------------------------------------#
-	my @analysis=();
-
-
-	my %speciesOrder = ();
-	$speciesOrder{"H"} = 0;
-	$speciesOrder{"C"} = 1;
-	$speciesOrder{"O"} = 2;
-	$speciesOrder{"R"} = 3;
-	$speciesOrder{"M"} = 4;
-	#-------------------------------------------------------------------------------#
-
-	my $line = $_[0];
-	chomp $line;
-
-	my @f = split(/\t/,$line);
-	print "received array : @f.. recieved tags = @tags\n" if $printer == 1;
-
-	# collect all motifs
-	my @motifs=();
-	 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif], $f[$Mmotif]) if $tags[$#tags] =~ /M/;
-	 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif]) if $tags[$#tags] =~ /R/;
-	 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif]) if $tags[$#tags] =~ /O/;
-#	print "motifs in the array = $f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif]\n" if $tags[$#tags] =~ /R/;;
-	print "motifs = @motifs\n" if $printer == 1;
-	my @translation = ();
-	foreach my $motif (@motifs){
-		push(@translation, "_") if $motif eq "NA";
-		push(@translation, "+") if $motif ne "NA";
-	}
-	my $translate = join(" ", @translation);
-#	print "translate = >$translate< and analysis = $template{$translate}[0].. on the other hand, ",$template{"- - +"}[0],"\n";
-	my @analyses = split(/\|/,$template{$translate}[0]);
-
-	print "motifs = @motifs, analyses = @analyses\n" if $printer == 1;
-
-	if (scalar(@analyses) == 1) {
-		#print "analysis = $analyses[0]\n";
-		if ($analyses[0] !~ /,|\./ ){
-			if ($analyses[0] =~ /\+/){
-				my $analysis = $analyses[0];
-				$analysis =~ s/\+|\-//g;
-				my @species = split(/\s*/,$analysis);
-				my @currentMotifs = ();
-				foreach my $specie (@species){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); print "pushing into currentMotifs: $speciesOrder{$specie}: $motifs[$speciesOrder{$specie}]\n" if $printer == 1;}
-				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
-				$template{$translate}[1]++ if $strict == 1 && consistency(@currentMotifs) ne "NULL";
-				$template{$translate}[1]++ if $strict == 0;
-				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
-			}
-			else{
-				my $analysis = $analyses[0];
-				$analysis =~ s/\+|\-//g;
-				my @species = split(/\s*/,$analysis);
-				my @currentMotifs = ();
-				my @complementarySpecies = ();
-				my $allSpecies = join("",@tags);
-				foreach my $specie (@species){	$allSpecies =~	s/$specie//g; }
-				foreach my $specie (split(/\s*/,$allSpecies)){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); print "pushing into currentMotifs: $speciesOrder{$specie}: $motifs[$speciesOrder{$specie}]\n" if $printer == 1;;}
-				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
-				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
-				$template{$translate}[1]=$template{$translate}[1]+1  if $strict == 0;
-				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
-			}
-		}
-
-		elsif ($analyses[0] =~ /,/) {
-			my @events = split(/,/,$analyses[0]);
-			print "events = @events \n " if $printer == 1;
-			if ($events[0] =~ /\+/){
-				my $analysis1 = $events[0];
-				$analysis1 =~ s/\+|\-//g;
-				my $analysis2 = $events[1];
-				$analysis2 =~ s/\+|\-//g;
-				my @nSpecies = split(/\s*/,$analysis2);
-				print "original anslysis = $analysis1 " if $printer == 1;
-				foreach my $specie (@nSpecies){ $analysis1=~ s/$specie//g;}
-				print "processed anslysis = $analysis1 \n" if $printer == 1;
-				my @currentMotifs = ();
-				foreach my $specie (split(/\s*/,$analysis1)){push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
-				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
-				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
-				$template{$translate}[1]=$template{$translate}[1]+1  if $strict == 0;
-				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
-			}
-			else{
-				my $analysis1 = $events[0];
-				$analysis1 =~ s/\+|\-//g;
-				my $analysis2 = $events[1];
-				$analysis2 =~ s/\+|\-//g;
-				my @pSpecies = split(/\s*/,$analysis2);
-				my @currentMotifs = ();
-				foreach my $specie (@pSpecies){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
-				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
-				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
-				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
-				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
-
-			}
-
-		}
-		elsif ($analyses[0] =~ /\./) {
-			my @events = split(/\./,$analyses[0]);
-			foreach my $event (@events){
-				print "event = $event \n" if $printer == 1;
-				if ($event =~ /\+/){
-					my $analysis = $event;
-					$analysis =~ s/\+|\-//g;
-					my @species = split(/\s*/,$analysis);
-					my @currentMotifs = ();
-					foreach my $specie (@species){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
-					#print consistency(@currentMotifs),"<- \n";
-					print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
-					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
-					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
-					print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
-				}
-				else{
-					my $analysis = $event;
-					$analysis =~ s/\+|\-//g;
-					my @species = split(/\s*/,$analysis);
-					my @currentMotifs = ();
-					my @complementarySpecies = ();
-					my $allSpecies = join("",@tags);
-					foreach my $specie (@species){	$allSpecies =~	s/$specie//g; }
-					foreach my $specie (split(/\s*/,$allSpecies)){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
-					#print consistency(@currentMotifs),"<- \n";
-					print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
-					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
-					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
-					print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
-				}
-			}
-
-		}
-	}
-	else{
-		my $finalanalysis = ();
-		$template{$translate}[1]++;
-		foreach my $analysis (@analyses){ ;}
-	}
-	# test if motifs where microsats are present, as indeed of same the motif composition
-
-
-
-	for my $templet ( keys %template ) {
-	#	print "now returning: $templet\n";
-		if (@{ $template{$templet} }[1] > 0){
-			print "returning in the end: $templet and $translate\n" if $printer == 1;
-			$template{$templet}[1] = 0;
-			return 	(@{$template{$templet}}[0], $translate);
-		}
-	}
-	undef %template;
-	print "sending NULL\n" if $printer == 1;
-	return ("NULL", $translate);
-
-}
-
-
-sub consistency{
-	my @motifs = @_;
-	print "in consistency \n" if $printer == 1;
-	print "motifs sent = >",join("|",@motifs),"< \n" if $printer == 1;
-	return $motifs[0] if scalar(@motifs) == 1;
-	my $prevmotif = shift(@motifs);
-	my $stopper = 0;
-	for my $i (0 ... $#motifs){
-		next if $motifs[$i] eq "NA";
-		my $templet = $motifs[$i].$motifs[$i];
-		if ($templet !~ /$prevmotif/i){
-			$stopper = 1; last;
-		}
-	}
-	return $prevmotif if $stopper == 0;
-	return "NULL" if $stopper == 1;
-}
-sub summarize_microsat{
-	my $printer = 0;
-	my $line = $_[0];
-	my $humseq = $_[1];
-
-	my @gaps = $line =~ /[0-9]+\t[0-9]+\t[\+\-]/g;
-	my @starts = $line =~ /[0-9]+\t[\+\-]/g;
-	my @ends = $line =~ /[\+\-]\t[0-9]+/g;
-	print "starts = @starts\tends = @ends\n" if $printer == 1;
-	for my $i (0 ... $#gaps) {$gaps[$i] =~ s/\t[0-9]+\t[\+\-]//g;}
-	for my $i (0 ... $#starts) {$starts[$i] =~ s/\t[\+\-]//g;}
-	for my $i (0 ... $#ends) {$ends[$i] =~ s/[\+\-]\t//g;}
-
-	my $minstart = array_smallest_number(@starts);
-	my $maxend = array_largest_number(@ends);
-
-	my $humupstream_st = substr($humseq, 0, $minstart);
-	my $humupstream_en = substr($humseq, 0, $maxend);
-	my $no_of_gaps_to_start = 0;
-	my $no_of_gaps_to_end = 0;
-	$no_of_gaps_to_start = ($humupstream_st =~ s/\-/x/g) if $humupstream_st=~/\-/;
-	$no_of_gaps_to_end = ($humupstream_en =~ s/\-/x/g) if $humupstream_en=~/\-/;
-
-	my $locusmotif = ();
-
-
-	print "IN SUB SUMMARIZE_MICROSAT $line\n" if $printer == 1;
-	#return "NULL" if $line =~ /compound/;
-	my $Hstart = "NA";
-	my $Hend = "NA";
-	chomp $line;
-	my $match_count = ($line =~ s/>/>/g);
-	#print "number of species = $match_count\n";
-	my @micros = split(/>/,$line);
-	shift @micros;
-	my $stopper = 0;
-
-
-	foreach my $mic (@micros){
-		my @local = split(/\t/,$mic);
-		if ($local[$microsatcord] =~ /N/) {$stopper =1; last;}
-	}
-	return "NULL" if $stopper ==1;
-
-	#------------------------------------------------------
-
-	my @arranged = ();
-	for my $arr (0 ... $#exacttags) {$arranged[$arr] = '0';}
-
-	foreach my $micro (@micros){
-		for my $i (0 ... $#exacttags){
-			if ($micro =~ /^$exacttags[$i]/){
-				$arranged[$i] = $micro;
-				last;
-			}
-		}
-	}
-#	print "arranged = @arranged \n" ; <STDIN>;;
-
-	my @endstatement = ();
-	my $turn = 0;
-	my $species_counter = 0;
-	#	print scalar(@arranged),"\n";
-
-	my $species_no=0;
-
-	my $orthHchr = 0;
-
-	foreach my $micro (@arranged) {
-		$micro =~ s/\t\t/\t \t/g;
-		$micro =~ s/\t,/\t ,/g;
-		$micro =~ s/,\t/, \t/g;
-		print "------------------------------------------------------------------------------------------\n" if $printer == 1;
-		chomp $micro;
-		if ($micro eq '0'){
-			push(@endstatement, join("\t",$exacttags[$species_counter],"NA","NA","NA","NA",0 ,"NA", "NA", 0,"NA","NA","NA", "NA" ));
-			$species_counter++;
-			print join("|","ENDSTATEMENT:",@endstatement),"\n" if $printer == 1;
-			next;
-		}
-	#		print $micro,"\n";
-		print "micro  = $micro \n" if $printer == 1;
-		my @fields  = split(/\t/,$micro);
-		my $microcopy = $fields[$microsatcord];
-		$microcopy =~ s/\[|\]|-//g;
-		my $microsatlength = length($microcopy);
-		print "microsat = $fields[$microsatcord] and microsatlength = $microsatlength\n" if $printer == 1;
-#		print "sp_ident = @sp_ident.. species_no=$species_no\n";
-		$micro =~ /$sp_ident[$species_no]\s(\S+)\s([0-9]+)\s([0-9]+)/;
-
-
-		my $sp_chr=$1;
-		my $sp_start=$2 + $fields[$startcord] - $fields[$gapcord];
-		my $sp_end= $sp_start + $microsatlength - 1;
-
-		$species_no++;
-
-		$micro =~ /$focalspec\s(\S+)\s([0-9]+)\s([0-9]+)/;
-		$orthHchr=$1;
-		$Hstart=$2+$minstart-$no_of_gaps_to_start;
-		$Hend=$2+$maxend-$no_of_gaps_to_end;
-
-		print "Hstart = $Hstart = $fields[4] + $fields[$startcord] - $fields[$gapcord]\n" if $printer == 1;
-
-		my $motif = $fields[$motifcord];
-		my $firstmotif = ();
-		my $strand = $fields[$strandcord];
-	#		print "strand = $strand\n";
-
-
-		if ($motif =~ /^\[/){
-			$motif =~ s/^\[//g;
-			$motif =~ /([a-zA-Z]+)\].*/;
-			$firstmotif = $1;
-		}
-
-		else {$firstmotif = $motif;}
-		print "firstmotif =$firstmotif : \n" if $printer == 1;
-		$firstmotif = allCaps($firstmotif);
-
-		if (exists $revHash{$firstmotif} && $turn == 0) {
-			$turn=1 if $species_counter==0;
-			$firstmotif = $revHash{$firstmotif};
-		}
-
-		elsif (exists $revHash{$firstmotif} && $turn == 1) {$firstmotif = $revHash{$firstmotif}; $turn = 1;}
-		print "changed firstmotif =$firstmotif\n" if $printer == 1;
-	#		<STDIN>;
-		$locusmotif = $firstmotif;
-
-		if (scalar(@fields) > $microsatcord + 2){
-			print "fields = @fields ... interr_poscord=$interr_poscord=$fields[$interr_poscord] .. interrcord=$interrcord=$fields[$interrcord]\n" if $printer == 1;
-
-			my @interposes = ();
-			@interposes = split(",",$fields[$interr_poscord]) if $fields[$interr_poscord] =~ /,/;
-			$interposes[0] = $fields[$interr_poscord] if $fields[$interr_poscord] !~ /,/ ;
-			print "interposes=@interposes\n" if $printer == 1;
-			my @relativeposes = ();
-			my @interruptions = ();
-			@interruptions = split(",",$fields[$interrcord]) if $fields[$interrcord] =~ /,/;
-			$interruptions[0] = $fields[$interrcord]  if $fields[$interrcord] !~ /,/;
-			my @interlens = ();
-
-
-			for my $i (0 ... $#interposes){
-
-				my $interpos = $interposes[$i];
-				my $nexter = 0;
-				my $interruption = $interruptions[$i];
-				my $interlen = length($interruption);
-				push (@interlens, $interlen);
-
-
-				my $relativepos = (100 * $interpos) / $microsatlength;
-				print "relativepos  = $relativepos ,interpos=$interpos, interruption=$interruption, interlen=$interlen \n" if $printer == 1;
-				$relativepos = (100 * ($interpos-$interlen)) / $microsatlength if $relativepos > 50;
-				print "-->  = $relativepos\n" if $printer == 1;
-				$interruption = "IND" if length($interruption) < 1;
-
-				if ($turn == 1){
-					$fields[$microsatcord] = switch_micro($fields[$microsatcord]);
-					$interruption = switch_nucl($interruption) unless $interruption eq "IND";
-					$interpos = ($microsatlength - $interpos) - $interlen + 2;
-					print "turn interpos = $interpos for $fields[$microsatcord]\n" if $printer == 1;
-					$relativepos = (100 * $interpos) / $microsatlength;
-					$relativepos = (100 * ($interpos-$interlen)) / $microsatlength if $relativepos > 50;
-
-
-					$strand = '+' if $strand eq '-';
-					$strand = '-' if $strand eq '+';
-				}
-				print "final relativepos = $relativepos\n" if $printer == 1;
-				push(@relativeposes, $relativepos);
-			}
-			push(@endstatement,join("\t",($exacttags[$species_counter],$sp_chr, $sp_start, $sp_end, $firstmotif,length($firstmotif),$fields[$microsatcord],$strand,$microsatlength,join(",",@interposes),join(",",@relativeposes),join(",",@interruptions), join(",",@interlens))));
-		}
-
-		else{
-			push(@endstatement, join("\t",$exacttags[$species_counter],$sp_chr, $sp_start, $sp_end, $firstmotif,length($firstmotif),$fields[$microsatcord],$strand,$microsatlength,"NA","NA","NA", "NA"));
-		}
-
-		$species_counter++;
-	}
-
-		$locusmotif = $sameHash{$locusmotif} if exists $sameHash{$locusmotif};
-		$locusmotif = $revHash{$locusmotif} if exists $revHash{$locusmotif};
-
-		my $endst =  join("\t", @endstatement, $orthHchr, $Hstart, $Hend);
-		print join("\t", @endstatement, $orthHchr,  $Hstart, $Hend), "\n" if $printer == 1;
-
-
-	return (join("\t", @endstatement, $orthHchr, $Hstart, $Hend), $orthHchr, $Hstart, $Hend, $locusmotif, length($locusmotif));
-
-}
-
-sub switch_nucl{
-	my @strand = split(/\s*/,$_[0]);
-	for my $i (0 ... $#strand){
-		if ($strand[$i] =~ /c/i) {$strand[$i] = "G";next;}
-		if ($strand[$i] =~ /a/i) {$strand[$i] = "T";next;}
-		if ($strand[$i] =~ /t/i) { $strand[$i] = "A";next;}
-		if ($strand[$i] =~ /g/i) {$strand[$i] = "C";next;}
-	}
-	return join("",@strand);
-}
-
-
-sub switch_micro{
-	my $micro = reverse($_[0]);
-	my @strand = split(/\s*/,$micro);
-	for my $i (0 ... $#strand){
-		if ($strand[$i] =~ /c/i) {$strand[$i] = "G";next;}
-		if ($strand[$i] =~ /a/i) {$strand[$i] = "T";next;}
-		if ($strand[$i] =~ /t/i) { $strand[$i] = "A";next;}
-		if ($strand[$i] =~ /g/i) {$strand[$i] = "C";next;}
-		if ($strand[$i] =~ /\[/i) {$strand[$i] = "]";next;}
-		if ($strand[$i] =~ /\]/i) {$strand[$i] = "[";next;}
-	}
-	return join("",@strand);
-}
-sub decipher_history{
-	my $printer = 0;
-	my ($mutations_array, $tags_string, $nodes, $branches_hash, $tree_analysis, $confirmation_string, $alivehash) = @_;
-	my %mutations_hash=();
-	foreach my $mutation (@$mutations_array){
-		print "mutation = $mutation\n" if $printer == 1;
-		my %local = $mutation =~ /([\S ]+)=([\S ]+)/g;
-		push @{$mutations_hash{$local{"node"}}},$mutation;
-		print "just for confirmation: $local{node} pushed as: $mutation\n" if $printer == 1;
-	}
-	my @nodes;
-	my @birth_steps=();
-	my @death_steps=();
-
-	my @tags=split(/\s*/,$tags_string);
-	my @confirmation=split(/\s+/,$confirmation_string);
-	my %info=();
-
-	for my $i (0 ... $#tags){
-		$info{$tags[$i]}=$confirmation[$i];
-		print "feeding info: $tags[$i] = $info{$tags[$i]}\n" if $printer == 1;
-	}
-
-	for my $keys (@$nodes) {
-		foreach my $key (@$keys){
-#			print "current key  = $key\n";
-			my $copykey = $key;
-			$copykey =~ s/[\W ]+//g;
-			my @copykeys=split(/\s*/,$copykey);
-			my $states=();
-			foreach my $copy (@copykeys){
-				$states=$states.$info{$copy};
-			}
-
-			print "reduced key = $copykey and state = $states\n" if $printer == 1;
-
-			if (exists $mutations_hash{$key}) {
-
-				if ($states=~/\+/){
-					push @birth_steps, @{$mutations_hash{$key}};
-					$birth_steps[$#birth_steps] =~ s/\S+=//g;
-					delete $mutations_hash{$key};
-				}
-				else{
-					push @death_steps, @{$mutations_hash{$key}};
-					$death_steps[$#death_steps] =~ s/\S+=//g;
-					delete $mutations_hash{$key};
-				}
-			}
-		}
-	}
-	print "conformation = $confirmation_string\n" if $printer == 1;
-	push (@birth_steps, "NULL") if scalar(@birth_steps) == 0;
-	push (@death_steps, "NULL") if scalar(@death_steps) == 0;
-	print "birth steps = ",join("\n",@birth_steps)," and death steps = ",join("\n",@death_steps),"\n" if $printer == 1;
-	return \@birth_steps, \@death_steps;
-}
-
-sub fillAlignmentGaps{
-	my $printer = 0;
-	print "received: @_\n" if $printer == 1;
-	my ($tree, $sequences, $alignment, $tagarray, $microsathash, $nonmicrosathash, $motif, $tree_analysis, $threshold, $microsatstarts) = @_;
-	print "in fillAlignmentGaps.. tree = $tree \n" if $printer == 1;
-	my %sequence_hash=();
-
-	my @phases = ();
-	my $concat = $motif.$motif;
-	my $motifsize = length($motif);
-
-	for my $i (1 ... $motifsize){
-		push @phases, substr($concat, $i, $motifsize);
-	}
-
-	my $concatalignment = ();
-	foreach my $tag (@tags){
-		$concatalignment = $concatalignment.$alignment->{$tag};
-	}
-#	print "returningg NULL","NULL","NULL", "NULL\n" if $concatalignment !~ /-/;
-	return 0, "NULL","NULL","NULL", "NULL","NULL" if $concatalignment !~ /-/;
-
-
-
-	my %node_sequences_temp=();
-	my %node_alignments_temp =();			#NEW, Nov 28 2008
-
-	my @tags=();
-	my @locus_sequences=();
-	my %alivehash=();
-
-#	print "IN fillAlignmentGaps\n";# <STDIN>;
-	my %fillrecord = ();
-
-	my $change = 0;
-	foreach my $tag (@$tagarray) {
-		#print "adding: $tag\n";
-		push(@tags, $tag);
-		if (exists $microsathash->{$tag}){
-			my $micro = $microsathash->{$tag};
-			my $orig_micro = $micro;
-			($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
-			$change = 1 if uc($micro) ne uc($orig_micro);
-			$node_sequences_temp{$tag}=$micro if $microsathash->{$tag} ne "NULL";
-		}
-		if (exists $nonmicrosathash->{$tag}){
-			my $micro = $nonmicrosathash->{$tag};
-			my $orig_micro = $micro;
-			($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
-			$change = 1 if uc($micro) ne uc($orig_micro);
-			$node_sequences_temp{$tag}=$micro if $nonmicrosathash->{$tag} ne "NULL";
-		}
-
-		if (exists $alignment->{$tag}){
-			my $micro = $alignment->{$tag};
-			my $orig_micro = $micro;
-			($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
-			$change = 1 if uc($micro) ne uc($orig_micro);
-			$node_alignments_temp{$tag}=$micro if $alignment->{$tag} ne "NULL";
-		}
-
-		#print "adding to node_sequences: $tag = ",$node_sequences_temp{$tag},"\n" if $printer == 1;
-		#print "adding to node_alignments: $tag = ",$node_alignments_temp{$tag},"\n" if $printer == 1;
-	}
-
-
-	my %node_sequences=();
-	my %node_alignments =();			#NEW, Nov 28 2008
-	foreach my $tag (@$tagarray) {
-		$node_sequences{$tag} = join ".",split(/\s*/,$node_sequences_temp{$tag});
-		$node_alignments{$tag} = join ".",split(/\s*/,$node_alignments_temp{$tag});
-	}
-
-	print "\n", "#" x 50, "\n" if $printer == 1;
-	foreach my $tag (@tags){
-		print "$tag: $alignment->{$tag} = $node_alignments{$tag}\n" if $printer == 1;
-	}
-	print "\n", "#" x 50, "\n" if $printer == 1;
-#	print "change = $change\n";
-	#<STDIN> if $concatalignment=~/\-/;
-
-#	<STDIN> if $printer == 1 && $concatalignment =~ /\-/;
-
-	return 0, "NULL","NULL","NULL", "NULL", "NULL" if $change == 0;
-
-	my ($nodes_arr, $branches_hash) = get_nodes($tree);
-	my @nodes=@$nodes_arr;
-	print "recieved nodes = @nodes\n" if $printer == 1;
-
-
-	#POPULATE branches_hash WITH INFORMATION ABOUT LIVESTATUS
-	foreach my $keys (@nodes){
-		my @pair = @$keys;
-		my $joint = "(".join(", ",@pair).")";
-		my $copykey = join "", @pair;
-		$copykey =~ s/[\W ]+//g;
-		print "for node: $keys, copykey = $copykey and joint = $joint\n" if $printer == 1;
-		my $livestatus = 1;
-		foreach my $copy (split(/\s*/,$copykey)){
-			$livestatus = 0 if !exists $alivehash{$copy};
-		}
-		$alivehash{$joint} = $joint if !exists $alivehash{$joint} && $livestatus == 1;
-		print "alivehash = $alivehash{$joint}\n" if exists $alivehash{$joint} && $printer == 1;
-	}
-
-
-
-	@nodes = reverse(@nodes); #1 THIS IS IN ORDER TO GO THROUGH THE TREE FROM LEAVES TO ROOT.
-
-	my @mutations_array=();
-
-	my $joint = ();
-	foreach my $node (@nodes){
-		my @pair = @$node;
-		print "now in the nodes for loop, pair = @pair\n and sequences=\n" if $printer == 1;
-		$joint = "(".join(", ",@pair).")";
-		print "joint = $joint \n" if $printer == 1;
-		my @pair_sequences=();
-
-		foreach my $tag (@pair){
-			print "tag = $tag: " if $printer == 1;
-			print $node_alignments{$tag},"\n" if $printer == 1;
-			push @pair_sequences, $node_alignments{$tag};
-		}
-#		print "fillgap\n";
-		my ($compared, $substitutions_list) = base_by_base_simple($motif,\@pair_sequences, scalar(@pair_sequences), @pair, $joint);
-		$node_alignments{$joint}=$compared;
-		push(  @mutations_array,split(/:/,$substitutions_list));
-		print "newly added to node_sequences: $node_alignments{$joint} and list of mutations = @mutations_array\n" if $printer == 1;
-	}
-	print "now sending for analyze_mutations: mutation_array=@mutations_array, nodes=@nodes, branches_hash=$branches_hash, alignment=$alignment, tags=@tags, alivehash=%alivehash, node_sequences=\%node_sequences, microsatstarts=$microsatstarts, motif=$motif\n" if $printer == 1;
-#	<STDIN> if $printer == 1;
-
-	my $analayzed_mutations = analyze_mutations(\@mutations_array, \@nodes, $branches_hash, $alignment, \@tags, \%alivehash, \%node_sequences, $microsatstarts, $motif);
-
-#	print "returningt: ", $analayzed_mutations, \@nodes,"\n" if scalar @mutations_array > 0;;
-#	print "returningy: NULL, NULL, NULL " if scalar @mutations_array == 0 && $printer == 1;
-	print "final node alignment after filling for $joint= " if $printer == 1;
-	print "$node_alignments{$joint}\n" if $printer == 1;
-
-
-	return 1, $analayzed_mutations, \@nodes, $branches_hash, \%alivehash, $node_alignments{$joint} if scalar @mutations_array > 0 ;
-	return 1, "NULL","NULL","NULL", "NULL", "NULL" if scalar @mutations_array == 0;
-}
-
-
-
-sub add_mutation{
-	my $printer = 0;
-	print "IN SUBROUTUNE add_mutation.. information received = @_\n" if $printer == 1;
-	my ($i , $bite, $to, $from) = @_;
-	print "bite = $bite.. all received info = ",join("^", @_),"\n" if $printer == 1;
-	print "to=$to\n" if $printer == 1;
-	print "tis split = ",join(" and ",split(/!/,$to)),"\n" if $printer == 1;
-	my @toields = split "!",$to;
-	print "toilds  = @toields\n" if $printer == 1;
-	my @mutations=();
-
-	foreach my $toield (@toields){
-		my @toinfo=split(":",$toield);
-		print " at toinfo=@toinfo \n" if $printer == 1;
-		next if  $toinfo[1] =~ /$from/i;
-		my @mutation = @toinfo if $toinfo[1] !~ /$from/i;
-		print "adding to mutaton list: ", join(",", "node=$mutation[0]","type=substitution" ,"position=$i", "from=$from", "to=$mutation[1]", "insertion=", "deletion="),"\n" if $printer == 1;
-		push (@mutations, join("\t", "node=$mutation[0]","type=substitution" ,"position=$i", "from=$from", "to=$mutation[1]", "insertion=", "deletion="));
-	}
-	return @mutations;
-}
-
-
-sub add_bases{
-
-	my $printer = 0;
-	print "IN SUBROUTUNE add_bases.. information received = @_\n" if $printer == 1;
-	my ($optional0, $optional1, $pair0, $pair1,$joint) = @_;
-	my $total_list=();
-
-	my @total_list0=split(/!/,$optional0);
-	my @total_list1=split(/!/,$optional1);
-	my @all_list=();
-	my %total_hash0=();
-	foreach my $entry (@total_list0) {
-		$entry = uc $entry;
-		$entry =~ /(\S+):(\S+)/;
-		$total_hash0{$2}=$1;
-		push @all_list, $2;
-	}
-
-	my %total_hash1=();
-	foreach my $entry (@total_list1) {
-		$entry = uc $entry;
-		$entry =~ /(\S+):(\S+)/;
-		$total_hash1{$2}=$1;
-		push @all_list, $2;
-	}
-
-	my %alphabetical_hash=();
-	my @return_options=();
-
-	for my $i (0 ... $#all_list){
-		my $alph = $all_list[$i];
-		if (exists $total_hash0{$alph} && exists $total_hash1{$alph}){
-			push(@return_options, $joint.":".$alph);
-			delete $total_hash0{$alph}; delete $total_hash1{$alph};
-		}
-		if (exists $total_hash0{$alph} && !exists $total_hash1{$alph}){
-			push(@return_options, $pair0.":".$alph);
-			delete $total_hash0{$alph};
-		}
-		if (!exists $total_hash0{$alph} && exists $total_hash1{$alph}){
-			push(@return_options, $pair1.":".$alph);
-			delete $total_hash1{$alph};
-		}
-
-	}
-
-	print "returning ",join "!",@return_options,"\n" if $printer == 1;
-	return join "!",@return_options;
-
-}
-
-
-sub fillgaps{
-#	print "IN fillgaps: @_\n";
-	my ($micro, $phasesinput) = @_;
-	#print "in microsathash ,,.. micro = $micro\n";
-	return $micro if $micro !~ /\-/;
-	my $orig_micro = $micro;
-	my @phases = @$phasesinput;
-
-	my %tested_patterns = ();
-
-	foreach my $phase (@phases){
-	#	print "considering phase: $phase\n";
-		my @phase_prefixes = ();
-		my @prephase_left_contexts = ();
-		my @prephase_right_contexts = ();
-		my @pregapsize = ();
-		my @prepostfilins = ();
-
-		my @phase_suffixes;
-		my @suffphase_left_contexts;
-		my @suffphase_right_contexts;
-		my @suffgapsize;
-		my @suffpostfilins;
-
-		my @postfilins = ();
-		my $motifsize = length($phases[0]);
-
-		my $change = 0;
-
-		for my $u (0 ... $motifsize-1){
-			my $concat = $phase.$phase.$phase.$phase;
-			my @concatarr = split(/\s*/, $concat);
-			my $l = 0;
-			while ($l < $u){
-				shift @concatarr;
-				$l++;
-			}
-			$concat = join ("", @concatarr);
-
-			for my $t (0 ... $motifsize-1){
-				for my $k (1 ... $motifsize-1){
-					push @phase_prefixes, substr($concat, $motifsize+$t, $k);
-					push @prephase_left_contexts, substr ($concat, $t, $motifsize);
-					push @prephase_right_contexts, substr ($concat, $motifsize+$t+$k+($motifsize-$k), 1);
-					push @pregapsize, $k;
-					push @prepostfilins, substr($concat,  $motifsize+$t+$k, ($motifsize-$k));
-				#	print "reading: $concat, t=$t, k=$k prefix: $prephase_left_contexts[$#prephase_left_contexts] $phase_prefixes[$#phase_prefixes] -x$pregapsize[$#pregapsize] $prephase_right_contexts[$#prephase_right_contexts]\n";
-				#	print "phase_prefixes = $phase_prefixes[$#phase_prefixes]\n";
-				#	print "prephase_left_contexts = $prephase_left_contexts[$#prephase_left_contexts]\n";
-				#	print "prephase_right_contexts = $prephase_right_contexts[$#prephase_right_contexts]\n";
-				#	print "pregapsize = $pregapsize[$#pregapsize]\n";
-				#	print "prepostfilins = $prepostfilins[$#prepostfilins]\n";
-				}
-			}
-		}
-
-	#	print "looking if $micro =~ /($phase\-{$motifsize})/i || $micro =~ /^(\-{$motifsize,}$phase)/i\n";
-		if ($micro =~ /($phase\-{$motifsize,})$/i || $micro =~ /^(\-{$motifsize,}$phase)/i){
-	#			print "micro: $micro needs further gap removal: $1\n";
-			while ($micro =~ /$phase(\-{$motifsize,})$/i || $micro =~ /^(\-{$motifsize,})$phase/i){
-			#	print "micro: $micro needs further gap removal: $1\n";
-
-			#	print "phase being considered = $phase\n";
-				my $num = ();
-				$num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi if $micro =~ /$phase\-{$motifsize,}/i;
-				$num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi if $micro =~ /\-{$motifsize,}$phase/i;
-			#	print "num = $num\n";
-				$change = 1 if $num == 1;
-			}
-		}
-
-		elsif ($micro =~ /(($phase)+)\-{$motifsize,}(($phase)+)/i){
-			while ($micro =~ /(($phase)+)\-{$motifsize,}(($phase)+)/i){
-		#		print "checking lengths of $1 and $3 for $micro... \n";
-				my $num = ();
-				if (length($1) >= length($3)){
-		#			print "$micro matches (($phase)+)\-{$motifsize,}(($phase)+) = $1, >= , $3 \n";
-					$num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi ;
-				}
-				if (length($1) < length($3)){
-		#			print "$micro matches (($phase)+)\-{$motifsize,}(($phase)+) = $1, < , $3 \n";
-					$num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi ;
-				}
-	#			print "micro changed to $micro\n";
-			}
-		}
-		elsif ($micro =~ /([A-Z]+)\-{$motifsize,}(($phase)+)/i){
-			while ($micro =~ /([A-Z]+)\-{$motifsize,}(($phase)+)/i){
-		#			print "$micro matches ([A-Z]+)\-{$motifsize}(($phase)+) = 1=$1, - , 3=$3 \n";
-					my $num = 0;
-					$num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi ;
-			}
-		}
-		elsif ($micro =~ /(($phase)+)\-{$motifsize,}([A-Z]+)/i){
-			while ($micro =~ /(($phase)+)\-{$motifsize,}([A-Z]+)/i){
-			#		print "$micro matches (($phase)+)\-{$motifsize,}([A-Z]+) = 1=$1, - , 3=$3 \n";
-					my $num = 0;
-					$num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi ;
-			}
-		}
-
-	#	print "$orig_micro to $micro\n";
-
-	#s	<STDIN>;
-
-		for my $h (0 ... $#phase_prefixes){
-	#		print "searching using prefix : $prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]\n";
-			my $pattern = $prephase_left_contexts[$h].$phase_prefixes[$h].$pregapsize[$h].$prephase_right_contexts[$h];
-	#		print "returning orig_micro = $orig_micro, micro = $micro \n" if exists $tested_patterns{$pattern};
-			if ($micro =~ /$prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]/i){
-				return $orig_micro if exists $tested_patterns{$pattern};
-				while ($micro =~ /($prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h])/i){
-					$tested_patterns{$pattern} = $pattern;
-	#				print "micro: $micro needs further gap removal: $1\n";
-
-	#				print "prefix being considered = $phase_prefixes[$h]\n";
-					my $num = ();
-					$num = ($micro =~ s/$prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]/$prephase_left_contexts[$h]$phase_prefixes[$h]$prepostfilins[$h]$prephase_right_contexts[$h]/gi) ;
-	#				print "num = $num, micro = $micro\n";
-					$change = 1 if $num == 1;
-
-					return $orig_micro if $num > 1;
-				}
-			}
-
-		}
-	}
-	return $orig_micro if length($micro) != length($orig_micro);
-	return $micro;
-}
-
-sub selectMutationArray{
-	my $printer =0;
-
-	my $oldmutspt  = $_[0];
-	my $newmutspt  = $_[1];
-	my $tagstringpt = $_[2];
-	my $alivehashpt = $_[3];
-	my $alignmentpt = $_[4];
-	my $motif = $_[5];
-
-	my @alivehasharr=();
-
-	my @tags = @$tagstringpt;
-	my $alignmentln = length($alignmentpt->{$tags[0]});
-
-	foreach my $key (keys %$alivehashpt) { push @alivehasharr, $key; print "we have alive: $key\n" if $printer == 1;}
-
-	my %newside = ();
-	my %oldside = ();
-	my %newmuts = ();
-
-	my %commons = ();
-	my %olds = ();
-	foreach my $old (@$oldmutspt){
-		$olds{$old} = 1;
-	}
-	foreach my $new (@$newmutspt){
-		$commons{$new} = 1 if exists $olds{$new};;
-	}
-
-
-	foreach my $pos ( 0 ... $alignmentln){
-		#print "pos = $pos\n" if $printer == 1;
-		my $newyes = 0;
-		foreach my $mut (@$newmutspt){
-			$newmuts{$mut} = 1;
-			chomp $mut;
-			$newyes++;
-			 $mut =~ s/=\t/= \t/g;
-			 $mut =~ s/=$/= /g;
-
-			 $mut =~ /node=([A-Z\(\), ]+)\stype=([a-zA-Z ]+)\sposition=([0-9 ]+)\sfrom=([a-zA-Z\- ]+)\sto=([a-zA-Z\- ]+)\sinsertion=([a-zA-Z\- ]+)\sdeletion=([a-zA-Z\- ]+)/;
-			my $node = $1;
-			next if $3 != $pos;
-			print "new mut = $mut\n" if $printer == 1;
-			print "node = $node, pos = $3 ... and alivehasharr = >@alivehasharr<\n" if $printer == 1;
-			my $alivenode = 0;
-			foreach my $key (@alivehasharr){
-				$alivenode = 1 if $key =~ /$node/;
-			}
-		#	next if $alivenode == 0;
-			my $indel_type = " ";
-			if ($2 eq "insertion" || $2 eq "deletion"){
-				my $thisindel = ();
-				$thisindel = $6 if $2 eq "insertion";
-				$thisindel = $7 if $2 eq "deletion";
-
-				$indel_type = "i".checkIndelType($node, $thisindel, $motif,$alignmentpt,$3, $2) if $2 eq "insertion";
-				$indel_type = "d".checkIndelType($node, $thisindel, $motif,$alignmentpt, $3, $2) if $2 eq "deletion";
-				$indel_type = $indel_type."f" if $indel_type =~ /mot/ && length($thisindel) >= length($motif);
-			}
-			print "indeltype = $indel_type\n" if $printer == 1;
-			my $added = 0;
-
-			if (exists $newside{$pos} && $indel_type =~ /[a-z]+/){
-				print "we have a preexisting one for $pos\n" if $printer == 1;
-				my @preexisting = @{$newside{$pos}};
-				foreach my $pre (@preexisting){
-					print "looking at $pre\n" if $printer == 1;
-					next if $pre !~ /node=$node/;
-					next if $pre !~ /indeltype=([a-z]+)/;
-					my $currtype = $1;
-
-					if ($currtype =~ /inon/ && $indel_type =~ /dmot/){
-						delete $newside{$pos};
-						push @{$newside{$pos}}, $pre;
-						$added = 1;
-					}
-					if ($currtype =~ /dnon/ && $indel_type =~ /imot/){
-						delete $newside{$pos};
-						push @{$newside{$pos}}, $pre;
-						$added = 1;
-					}
-					if ($currtype =~ /dmot/ && $indel_type =~ /inon/){
-						delete $newside{$pos};
-						push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
-						$added = 1;
-					}
-					if ($currtype =~ /imot/ && $indel_type =~ /dnon/){
-						delete $newside{$pos};
-						push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
-						$added = 1;
-					}
-				}
-			}
-
-			print "added = $added\n" if $printer == 1;
-			push @{$newside{$pos}}, $mut."\tindeltype=$indel_type" if $added == 0;
-
-			print "for new pos,: $pos we have: @{$newside{$pos}}\n " if $printer == 1;
-		}
-	}
-
-	foreach my $pos ( 0 ... $alignmentln){
-		my $oldyes = 0;
-		foreach my $mut (@$oldmutspt){
-			chomp $mut;
-			$oldyes++;
-			 $mut =~ s/=\t/= \t/g;
-			 $mut =~ s/=$/= /g;
-			$mut =~ /node=([A-Z\(\), ]+)\ttype=([a-zA-Z ]+)\tposition=([0-9 ]+)\tfrom=([a-zA-Z\- ]+)\tto=([a-zA-Z\- ]+)\tinsertion=([a-zA-Z\- ]+)\tdeletion=([a-zA-Z\- ]+)/;
-			my $node = $1;
-			next if $3 != $pos;
-			print "old mut = $mut\n" if $printer == 1;
-			my $alivenode = 0;
-			foreach my $key (@alivehasharr){
-				$alivenode = 1 if $key =~ /$node/;
-			}
-			#next if $alivenode == 0;
-			my $indel_type = " ";
-			if ($2 eq "insertion" || $2 eq "deletion"){
-				$indel_type = "i".checkIndelType($node, $6, $motif,$alignmentpt, $3, $2) if $2 eq "insertion";
-				$indel_type = "d".checkIndelType($node, $7, $motif,$alignmentpt, $3, $2) if $2 eq "deletion";
-				next if $indel_type =~/non/;
-			}
-			else{ next;}
-
-			my $imp=0;
-			$imp = 1 if $indel_type =~ /dmot/ && $alivenode == 0;
-			$imp = 1 if $indel_type =~ /imot/ && $alivenode == 1;
-
-
-			if (exists $newside{$pos} && $indel_type =~ /[a-z]+/){
-				my @preexisting = @{$newside{$pos}};
-				print "we have a preexisting one for $pos: @preexisting\n" if $printer == 1;
-				next if $imp == 0;
-
-				if (scalar(@preexisting) == 1){
-					my $foundmut = $preexisting[0];
-					$foundmut=~ /node=([A-Z, \(\)]+)/;
-					next if $1 eq $node;
-
-					if (exists $oldside{$pos} || exists $commons{$foundmut}){
-						print "not replacing, but just adding\n" if $printer == 1;
-						push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
-						push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
-						next;
-					}
-
-					delete $newside{$pos};
-					push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
-					push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
-					print "now  new one is : @{$newside{$pos}}\n" if $printer == 1;
-				}
-
-				print "for pos: $pos: @{$newside{$pos}}\n" if $printer == 1;
-				next;
-			}
-
-
-			my @news = @{$newside{$pos}} if exists $newside{$pos};
-
-			print "mut = $mut and news = @news\n" if $printer == 1;
-			push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
-			push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
-		}
-	}
-
-	print "in the end, our collected mutations = \n" if $printer == 1;
-	my @returnarr = ();
-	foreach my $key (keys %newside) {push @returnarr,@{$newside{$key}};}
-	print join("\n", @returnarr),"\n" if $printer == 1;
-	#<STDIN>;
-	return @returnarr;
-
-}
-
-
-sub checkIndelType{
-	my $printer  = 0;
-	my $node = $_[0];
-	my $indel = $_[1];
-	my $motif = $_[2];
-	my $alignmentpt = $_[3];
-	my $posit = $_[4];
-	my $type = $_[5];
-	my @phases =();
-	my %prephases = ();
-	my %postphases = ();
-	#print "motif = $motif\n";
-	print "IN checkIndelType ... received: @_\n" if $printer == 1;
-	my $concat = $motif.$motif.$motif.$motif;
-	my $motiflength = length($motif);
-
-	if ($motiflength > length ($indel)){
-		return "non" if $motif !~ /$indel/i;
-		return checkIndelType_ComplexAnalysis($node, $indel, $motif, $alignmentpt, $posit, $type);
-	}
-
-	my $firstpass = 0;
-	for my $y (0 ... $motiflength-1){
-		my $phase = substr($concat, $motiflength+$y, $motiflength);
-		push @phases, $phase;
-		$firstpass = 1 if $indel =~ /$phase/i;
-		for my $k (0 ... length($motif)-1){
-			print "at: motiflength=$motiflength , y=$y , k=$k.. for pre: $motiflength+$y-$k and post: $motiflength+$y-$k+$motiflength in $concat\n" if $printer == 1;
-			my $pre = substr($concat, $motiflength+$y-$k, $k );
-			my $post = substr($concat, $motiflength+$y+$motiflength, $k);
-			print "adding to phases : $phase - $pre and $post\n" if $printer == 1;
-			push @{$prephases{$phase}} , $pre;
-			push @{$postphases{$phase}} , $post;
-		}
-
-	}
-	print "firstpass 1= $firstpass\n" if $printer == 1;
-	return "non" if $firstpass ==0;
-	$firstpass =0;
-
-	foreach my $phase (@phases){
-		my @pres = @{$prephases{$phase}};
-		my @posts = @{$postphases{$phase}};
-
-		foreach my $pre (@pres){
-			foreach my $post (@posts){
-
-				$firstpass = 1 if $indel =~ /($pre)?($phase)+($post)?/i && length($indel) > (3 * length($motif));
-				$firstpass = 1 if $indel =~ /^($pre)?($phase)+($post)?$/i && length($indel) < (3 * length($motif));
-				print "matched here : ($pre)?($phase)+($post)?\n" if $printer == 1;
-				last if $firstpass == 1;
-			}
-			last if $firstpass == 1;
-		}
-		last if $firstpass == 1;
-	}
-
-	print "firstpass 2= $firstpass\n" if $printer == 1;
-	return "non" if $firstpass ==0;
-	return "mot" if $firstpass ==1;
-}
-
-
-sub checkIndelType_ComplexAnalysis{
-	my $printer = 0;
-	my $node = $_[0];
-	my $indel = $_[1];
-	my $motif = $_[2];
-	my $alignmentpt = $_[3];
-	my $pos = $_[4];
-	my $type = $_[5];
-	my @speciesinvolved = $node =~ /[A-Z]+/g;
-
-	my @seqs = ();
-	my $residualseq = length($motif) - length($indel);
-	print "IN COMPLEX ANALYSIS ... received: @_  .... speciesinvolved = @speciesinvolved\n" if $printer == 1;
-	print "we have position = $pos, sseq = $alignmentpt->{$speciesinvolved[0]}\n" if $printer == 1;
-	print "residualseq = $residualseq\n" if $printer == 1;
-	print "pos=$pos... got: @_\n" if $printer == 1;
-	foreach my $sp (@speciesinvolved){
-		my $spseq = $alignmentpt->{$sp};
-		#print "orig spseq = $spseq\n";
-		my $subseq = ();
-
-		if ($type eq "deletion"){
-			my @indelparts = split(/\s*/,$indel);
-			my @seqparts = split(/\s*/,$spseq);
-
-			for my $p ($pos ... $pos+length($indel)-1){
-				$seqparts[$p] = shift @indelparts;
-			}
-			$spseq = join("",@seqparts);
-		}
-		#print "mod spseq = $spseq\n";
-	#	$spseq=~ s/\-//g if $type !~ /deletion/;
-
-		print "substr($spseq, $pos-($residualseq), length($indel)+$residualseq+$residualseq)\n" if $pos > 0 && $pos < (length($spseq) - length($motif))  && $printer == 1;
-		print "substr($spseq, 0, length($indel)+$residualseq)\n" if $pos == 0 && $printer == 1;
-		print "substr($spseq, $pos - $residualseq, length($indel)+$residualseq)\n" if $pos >= (length($spseq) - length($motif))  && $printer == 1;
-
-		$subseq = substr($spseq, $pos-($residualseq), length($indel)+$residualseq+$residualseq) if $pos > 0 && $pos < (length($spseq) - length($motif))  ;
-		$subseq = substr($spseq, 0, length($indel)+$residualseq) if $pos == 0;
-		$subseq = substr($spseq, $pos - $residualseq, length($indel)+$residualseq) if $pos >= (length($spseq) - length($motif))  ;
-		print "spseq = $spseq . subseq=$subseq . type = $type\n" if $printer == 1;
-		#<STDIN> if $subseq !~ /[a-z\-]/i;
-		$subseq =~ s/\-/$indel/g if $type =~ /insertion/;
-		push @seqs, $subseq;
-		print "seqs = @seqs\n" if $printer == 1;
-	}
-	return "non" if checkIfSeqsIdentical(@seqs) eq "NO";
-
-	print "checking for $seqs[0] \n" if $printer == 1;
-
-	my @phases =();
-	my %prephases = ();
-	my %postphases = ();
-	my $concat = $motif.$motif.$motif.$motif;
-	my $motiflength = length($motif);
-
-	my $firstpass = 0;
-
-	for my $y (0 ... $motiflength-1){
-		my $phase = substr($concat, $motiflength+$y, $motiflength);
-		push @phases, $phase;
-		$firstpass = 1 if $seqs[0] =~ /$phase/i;
-		for my $k (0 ... length($motif)-1){
-			my $pre = substr($concat, $motiflength+$y-$k, $k );
-			my $post = substr($concat, $motiflength+$y+$motiflength, $k);
-			print "adding to phases : $phase - $pre and $post\n" if $printer == 1;
-			push @{$prephases{$phase}} , $pre;
-			push @{$postphases{$phase}} , $post;
-		}
-
-	}
-	print "firstpass 1= $firstpass.. also, res-d = ",(length($seqs[0]))%(length($motif)),"\n" if $printer == 1;
-	return "non" if $firstpass ==0;
-	$firstpass =0;
-	foreach my $phase (@phases){
-
-		$firstpass = 1 if $seqs[0] =~ /^($phase)+$/i && ((length($seqs[0]))%(length($motif))) == 0;
-
-		if (((length($seqs[0]))%(length($motif))) != 0){
-			my @pres = @{$prephases{$phase}};
-			my @posts = @{$postphases{$phase}};
-			foreach my $pre (@pres){
-				foreach my $post (@posts){
-					next if $pre !~ /\S/ && $post !~ /\S/;
-					$firstpass = 1 if ($seqs[0] =~ /^($pre)($phase)+($post)$/i || $seqs[0] =~ /^($pre)($phase)+$/i || $seqs[0] =~ /^($phase)+($post)$/i);
-					print "caught with $pre $phase $post\n" if $printer == 1;
-					last if $firstpass == 1;
-				}
-				last if $firstpass == 1;
-			}
-		}
-
-		last if $firstpass == 1;
-	}
-
-	#print "indel = $indel.. motif = $motif.. firstpass 2= mot\n" if $firstpass ==1;
-	#print "indel = $indel.. motif = $motif.. firstpass 2= non\n" if $firstpass ==0;
-	#<STDIN>;# if $firstpass ==1;
-	return "non" if $firstpass ==0;
-	return "mot" if $firstpass ==1;
-
-}
-
-sub checkIfSeqsIdentical{
-	my @seqs = @_;
-	my $identical = 1;
-
-	for my $j (1 ... $#seqs){
-		$identical = 0 if uc($seqs[0]) ne uc($seqs[$j]);
-	}
-	return "NO" if $identical == 0;
-	return "YES" if $identical == 1;
-
-}
-
-sub summarizeMutations{
-	my $mutspt = $_[0];
-	my @muts = @$mutspt;
-	my $tree = $_[1];
-
-	my @returnarr = ();
-
-	for (1 ... 38){
-		push @returnarr, "NA";
-	}
-	push @returnarr, "NULL";
-	return @returnarr if $tree eq "NULL" || scalar(@muts) < 1;
-
-
-	my @bspecies = ();
-	my @dspecies = ();
-	my $treecopy = $tree;
-	$treecopy =~ s/[\(\)]//g;
-	my @treeparts  = split(/[\.,]+/, $treecopy);
-
-	for my $part (@treeparts){
-		if ($part =~ /\+/){
-			$part =~ s/\+//g;
-			#my @sp = split(/\s*/, $part);
-			#foreach my $p (@sp) {push @bspecies, $p;}
-			push @bspecies, $part;
-		}
-		if ($part =~ /\-/){
-			$part =~ s/\-//g;
-			#my @sp = split(/\s*/, $part);
-			#foreach my $p (@sp) {push @dspecies,  $p;}
-			push @dspecies, $part;
-		}
-
-	}
-	#print "-------------------------------------------------------\n";
-
-	my ($insertions, $deletions, $motinsertions, $motinsertionsf, $motdeletions, $motdeletionsf, $noninsertions, $nondeletions) = (0,0,0,0,0,0,0,0);
-	my ($binsertions, $bdeletions, $bmotinsertions,$bmotinsertionsf, $bmotdeletions, $bmotdeletionsf, $bnoninsertions, $bnondeletions) = (0,0,0,0,0,0,0,0);
-	my ($dinsertions, $ddeletions, $dmotinsertions,$dmotinsertionsf, $dmotdeletions, $dmotdeletionsf, $dnoninsertions, $dnondeletions) = (0,0,0,0,0,0,0,0);
-	my ($ninsertions, $ndeletions, $nmotinsertions,$nmotinsertionsf, $nmotdeletions, $nmotdeletionsf, $nnoninsertions, $nnondeletions) = (0,0,0,0,0,0,0,0);
-	my ($substitutions, $bsubstitutions, $dsubstitutions, $nsubstitutions, $indels, $subs) = (0,0,0,0,"NA","NA");
-
-	my @insertionsarr = (" ");
-	my @deletionsarr = (" ");
-
-	my @substitutionsarr = (" ");
-
-
-	foreach my $mut (@muts){
-	#	print "mut = $mut\n";
-		chomp $mut;
-		$mut =~ s/=\t/= /g;
-		$mut =~ s/=$/= /g;
-		my %mhash = ();
-		my @mields = split(/\t/,$mut);
-
-		foreach my $m (@mields){
-			my @fields = split(/=/,$m);
-			next if $fields[1] eq " ";
-			$mhash{$fields[0]} = $fields[1];
-		}
-
-		my $myutype = ();
-		my $decided = 0;
-
-		my $localnode  = $mhash{"node"};
-		$localnode =~ s/[\(\)\. ,]//g;
-
-
-		foreach my $s (@bspecies){
-			if ($localnode eq $s)		{
-				$decided = 1; $myutype = "b";
-			}
-		}
-
-		foreach my $s (@dspecies){
-			if ($localnode eq $s)		{
-				$decided = 1; $myutype = "d";
-			}
-		}
-
-		$myutype = "n" if $decided != 1;
-
-
-	#	print "tree=$tree, birth species=@bspecies, death species=@dspecies, node=$mhash{node}  .. myutype=$myutype .. \n";
-	#	<STDIN> if $mhash{"type"} eq "insertion" && $myutype eq "b";
-
-
-		if ($mhash{"type"} eq "substitution"){
-			$substitutions++;
-			$bsubstitutions++ if $myutype eq "b";
-			$dsubstitutions++ if $myutype eq "d";
-			$nsubstitutions++ if $myutype eq "n";
-	#		print "substitution: from= $mhash{from}, to = $mhash{to}, and type = myutype\n";
-			push @substitutionsarr, "b:$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "b";
-			push @substitutionsarr, "d:$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "d";
-			push @substitutionsarr, "n:$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "n";
-	#		print "substitutionsarr = @substitutionsarr\n";
-	#		<STDIN>;
-		}
-		else{
-			#print "tree=$tree, birth species=@bspecies, death species=@dspecies, node=$mhash{node}  .. myutype=$myutype .. indeltype=$mhash{indeltype}\n";
-			if ($mhash{"type"} eq "deletion"){
-				$deletions++;
-
-				$motdeletions++ if $mhash{"indeltype"} =~ /dmot/;
-				$motdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/;
-
-				$nondeletions++ if $mhash{"indeltype"} =~ /dnon/;
-
-				$bdeletions++ if $myutype eq "b";
-				$ddeletions++ if $myutype eq "d";
-				$ndeletions++ if $myutype eq "n";
-
-				$bmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "b";
-				$bmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "b";
-				$bnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "b";
-
-				$dmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "d";
-				$dmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "d";
-				$dnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "d";
-
-				$nmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "n";
-				$nmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "n";
-				$nnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "n";
-
-				push @deletionsarr, "b:$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "b";
-				push @deletionsarr, "d:$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "d";
-				push @deletionsarr, "n:$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "n";
-			}
-
-			if ($mhash{"type"} eq "insertion"){
-				$insertions++;
-
-				$motinsertions++ if $mhash{"indeltype"} =~ /imot/;
-				$motinsertionsf++ if $mhash{"indeltype"} =~ /imotf/;
-				$noninsertions++ if $mhash{"indeltype"} =~ /inon/;
-
-				$binsertions++ if $myutype eq "b";
-				$dinsertions++ if $myutype eq "d";
-				$ninsertions++ if $myutype eq "n";
-
-				$bmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "b";
-				$bmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "b";
-				$bnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "b";
-
-				$dmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "d";
-				$dmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "d";
-				$dnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "d";
-
-				$nmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "n";
-				$nmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "n";
-				$nnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "n";
-
-				push @insertionsarr, "b:$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "b";
-				push @insertionsarr, "d:$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "d";
-				push @insertionsarr, "n:$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "n";
-
-			}
-		}
-	}
-
-
-
-	$indels = "ins=".join(",",@insertionsarr).";dels=".join(",",@deletionsarr) if scalar(@insertionsarr) > 1 || scalar(@deletionsarr) > 1 ;
-	$subs = join(",",@substitutionsarr) if scalar(@substitutionsarr) > 1;
-	$indels =~ s/ //g;
-	$subs =~ s/ //g  ;
-
- 	#print "indels = $indels, subs=$subs\n";
- 	##<STDIN> if $indels =~ /[a-zA-Z0-9]/ || $subs =~ /[a-zA-Z0-9]/ ;
-	#print "tree = $tree, indels = $indels, subs = $subs, bspecies = @bspecies, dspecies = @dspecies \n";
-	my @returnarray = ();
-
-	push (@returnarray, $insertions,  $deletions,  $motinsertions, $motinsertionsf,  $motdeletions,  $motdeletionsf,  $noninsertions,  $nondeletions) ;
-	push (@returnarray, $binsertions, $bdeletions, $bmotinsertions,$bmotinsertionsf, $bmotdeletions, $bmotdeletionsf, $bnoninsertions, $bnondeletions) ;
-	push (@returnarray, $dinsertions, $ddeletions, $dmotinsertions,$dmotinsertionsf, $dmotdeletions, $dmotdeletionsf, $dnoninsertions, $dnondeletions) ;
-	push (@returnarray, $ninsertions, $ndeletions, $nmotinsertions,$nmotinsertionsf, $nmotdeletions, $nmotdeletionsf, $nnoninsertions, $nnondeletions) ;
-	push (@returnarray, $substitutions, $bsubstitutions, $dsubstitutions, $nsubstitutions, $indels, $subs) ;
-
-	push @returnarray, $tree;
-
-	my @copy = @returnarray;
-	return (@returnarray);
-
-}
-
-sub selectBetterTree{
-	my $printer = 0;
-	my $treestudy = $_[0];
-	my $alt = $_[1];
-	my $mutspt = $_[2];
-	my @muts = @$mutspt;
-	my @trees = (); my @alternatetrees=();
-
-	@trees  = split(/\|/,$treestudy) if $treestudy =~ /\|/;
-	@alternatetrees  = split(/[\|;]/,$alt) if $alt =~ /[\|;\(\)]/;
-
-	$trees[0]  = $treestudy if $treestudy !~ /\|/;
-	$alternatetrees[0]  = $alt if $alt !~ /[\|;\(\)]/;
-
-	my @alltrees = (@trees, @alternatetrees);
-#	push(@alltrees,@alternatetrees);
-
-	my %mutspecies = ();
-
-	print "IN selectBetterTree..treestudy=$treestudy. alt=$alt. for: @_. trees=@trees<. alternatetrees=@alternatetrees\n" if $printer == 1;
-	#<STDIN>;
-	foreach my $mut (@muts){
-		print colored ['green'],"mut = $mut\n" if $printer == 1;
-		$mut =~ /node=([A-Z,\(\) ]+)/;
-		my $node  = $1;
-		$node =~s/[,\(\) ]+//g;
-		my @indivspecies = $node =~ /[A-Z]+/g;
-		#print "adding node: $node\n" if $printer == 1;
-		$mutspecies{$node} = $node;
-
-		#foreach (@indivspecies) {
-			#$mutspecies{$mut} = $_; #print "for $_ adding $mutspecies{$_}\n";
-		#}
-
-	}
-
-	my @treerecords = ();
-	my $treecount = -1;
-	foreach my $tree (@alltrees){
-		print "checking with tree $tree\n" if $printer == 1;
-		$treecount++;
-		$treerecords[$treecount] = 0;
-		my @indivspecies = ($tree =~ /[A-Z]+/g);
-		print "indivspecies=@indivspecies\n" if $printer == 1;
-		foreach my $species (@indivspecies){
-			print "checkin if exists species: $species\n" if $printer == 1;
-			$treerecords[$treecount]+=2 if exists $mutspecies{$species} && $mutspecies{$species} !~ /indeltype=[a-z]mot/;
-			$treerecords[$treecount]+=1.5 if exists $mutspecies{$species} && $mutspecies{$species} =~ /indeltype=[a-z]mot/;
-			$treerecords[$treecount]-- if !exists $mutspecies{$species};
-		}
-
-		print "for tree $tree, our treecount = $treerecords[$treecount]\n" if $printer == 1;
-	}
-
-	my @best_tree = array_largest_number_arrayPosition(@treerecords);
-	print "treerecords = @treerecords. hence, best tree = @best_tree\n" if $printer == 1;
-
-	return ($alltrees[$best_tree[0]], $treerecords[$best_tree[0]]) if scalar(@best_tree) == 1;
-	print "best_tree[0] = $best_tree[0], and treerecords = $treerecords[$best_tree[0]]\n" if $printer == 1;
-	return ("NULL", -1) if $treerecords[$best_tree[0]] < 1;
-	my $rando = int(rand($#trees));
-	return ($alltrees[$rando], $treerecords[$rando]) if scalar(@best_tree) > 1;
-
-}
-
-
-
-
-sub load_sameHash{
-	#my $g = %$_[0];
-	$sameHash{"CAGT"}="AGTC";
-	$sameHash{"ATGA"}="AATG";
-	$sameHash{"CAAC"}="AACC";
-	$sameHash{"GGAA"}="AAGG";
-	$sameHash{"TAAG"}="AAGT";
-	$sameHash{"CGAG"}="AGCG";
-	$sameHash{"TAGG"}="AGGT";
-	$sameHash{"GCAG"}="AGGC";
-	$sameHash{"TAGA"}="ATAG";
-	$sameHash{"TGA"}="ATG";
-	$sameHash{"CAAG"}="AAGC";
-	$sameHash{"CTAA"}="AACT";
-	$sameHash{"CAAT"}="AATC";
-	$sameHash{"GTAG"}="AGGT";
-	$sameHash{"GAAG"}="AAGG";
-	$sameHash{"CGA"}="ACG";
-	$sameHash{"GTAA"}="AAGT";
-	$sameHash{"ACAA"}="AAAC";
-	$sameHash{"GCGG"}="GGGC";
-	$sameHash{"ATCA"}="AATC";
-	$sameHash{"TAAC"}="AACT";
-	$sameHash{"GGCA"}="AGGC";
-	$sameHash{"TGAG"}="AGTG";
-	$sameHash{"AACA"}="AAAC";
-	$sameHash{"GAGC"}="AGCG";
-	$sameHash{"ACCA"}="AACC";
-	$sameHash{"TGAA"}="AATG";
-	$sameHash{"ACA"}="AAC";
-	$sameHash{"GAAC"}="AACG";
-	$sameHash{"GCA"}="AGC";
-	$sameHash{"CCAC"}="ACCC";
-	$sameHash{"CATA"}="ATAC";
-	$sameHash{"CAC"}="ACC";
-	$sameHash{"TACA"}="ATAC";
-	$sameHash{"GGAC"}="ACGG";
-	$sameHash{"AGA"}="AAG";
-	$sameHash{"ATAA"}="AAAT";
-	$sameHash{"CA"}="AC";
-	$sameHash{"CCCA"}="ACCC";
-	$sameHash{"TCAA"}="AATC";
-	$sameHash{"CAGA"}="AGAC";
-	$sameHash{"AATA"}="AAAT";
-	$sameHash{"CCA"}="ACC";
-	$sameHash{"AGAA"}="AAAG";
-	$sameHash{"AGTA"}="AAGT";
-	$sameHash{"GACG"}="ACGG";
-	$sameHash{"TCAG"}="AGTC";
-	$sameHash{"ACGA"}="AACG";
-	$sameHash{"CGCA"}="ACGC";
-	$sameHash{"GAGT"}="AGTG";
-	$sameHash{"GA"}="AG";
-	$sameHash{"TA"}="AT";
-	$sameHash{"TAA"}="AAT";
-	$sameHash{"CAG"}="AGC";
-	$sameHash{"GATA"}="ATAG";
-	$sameHash{"GTA"}="AGT";
-	$sameHash{"CCAA"}="AACC";
-	$sameHash{"TAG"}="AGT";
-	$sameHash{"CAAA"}="AAAC";
-	$sameHash{"AAGA"}="AAAG";
-	$sameHash{"CACG"}="ACGC";
-	$sameHash{"GTCA"}="AGTC";
-	$sameHash{"GGA"}="AGG";
-	$sameHash{"GGAT"}="ATGG";
-	$sameHash{"CGGG"}="GGGC";
-	$sameHash{"CGGA"}="ACGG";
-	$sameHash{"AGGA"}="AAGG";
-	$sameHash{"TAAA"}="AAAT";
-	$sameHash{"GAGA"}="AGAG";
-	$sameHash{"ACTA"}="AACT";
-	$sameHash{"GCGA"}="AGCG";
-	$sameHash{"CACA"}="ACAC";
-	$sameHash{"AGAT"}="ATAG";
-	$sameHash{"GAGG"}="AGGG";
-	$sameHash{"CGAC"}="ACCG";
-	$sameHash{"GGAG"}="AGGG";
-	$sameHash{"GCCA"}="AGCC";
-	$sameHash{"CCAG"}="AGCC";
-	$sameHash{"GAAA"}="AAAG";
-	$sameHash{"CAGG"}="AGGC";
-	$sameHash{"GAC"}="ACG";
-	$sameHash{"CAA"}="AAC";
-	$sameHash{"GACC"}="ACCG";
-	$sameHash{"GGCG"}="GGGC";
-	$sameHash{"GGTA"}="AGGT";
-	$sameHash{"AGCA"}="AAGC";
-	$sameHash{"GATG"}="ATGG";
-	$sameHash{"GTGA"}="AGTG";
-	$sameHash{"ACAG"}="AGAC";
-	$sameHash{"CGG"}="GGC";
-	$sameHash{"ATA"}="AAT";
-	$sameHash{"GACA"}="AGAC";
-	$sameHash{"GCAA"}="AAGC";
-	$sameHash{"CAGC"}="AGCC";
-	$sameHash{"GGGA"}="AGGG";
-	$sameHash{"GAG"}="AGG";
-	$sameHash{"ACAT"}="ATAC";
-	$sameHash{"GAAT"}="AATG";
-	$sameHash{"CACC"}="ACCC";
-	$sameHash{"GAT"}="ATG";
-	$sameHash{"GCG"}="GGC";
-	$sameHash{"GCAC"}="ACGC";
-	$sameHash{"GAA"}="AAG";
-	$sameHash{"TGGA"}="ATGG";
-	$sameHash{"CCGA"}="ACCG";
-	$sameHash{"CGAA"}="AACG";
-}
-
-
-
-sub load_revHash{
-	$revHash{"CTGA"}="AGTC";
-	$revHash{"TCTT"}="AAAG";
-	$revHash{"CTAG"}="AGCT";
-	$revHash{"GGTG"}="ACCC";
-	$revHash{"GCC"}="GGC";
-	$revHash{"GCTT"}="AAGC";
-	$revHash{"GCGT"}="ACGC";
-	$revHash{"GTTG"}="AACC";
-	$revHash{"CTCC"}="AGGG";
-	$revHash{"ATC"}="ATG";
-	$revHash{"CGAT"}="ATCG";
-	$revHash{"TTAA"}="AATT";
-	$revHash{"GTTC"}="AACG";
-	$revHash{"CTGC"}="AGGC";
-	$revHash{"TCGA"}="ATCG";
-	$revHash{"ATCT"}="ATAG";
-	$revHash{"GGTT"}="AACC";
-	$revHash{"CTTA"}="AAGT";
-	$revHash{"TGGC"}="AGCC";
-	$revHash{"CCG"}="GGC";
-	$revHash{"CGGC"}="GGCC";
-	$revHash{"TTAG"}="AACT";
-	$revHash{"GTG"}="ACC";
-	$revHash{"CTTT"}="AAAG";
-	$revHash{"TGCA"}="ATGC";
-	$revHash{"CGCT"}="AGCG";
-	$revHash{"TTCC"}="AAGG";
-	$revHash{"CT"}="AG";
-	$revHash{"C"}="G";
-	$revHash{"CTCT"}="AGAG";
-	$revHash{"ACTT"}="AAGT";
-	$revHash{"GGTC"}="ACCG";
-	$revHash{"ATTC"}="AATG";
-	$revHash{"GGGT"}="ACCC";
-	$revHash{"CCTA"}="AGGT";
-	$revHash{"CGCG"}="GCGC";
-	$revHash{"GTGT"}="ACAC";
-	$revHash{"GCCC"}="GGGC";
-	$revHash{"GTCG"}="ACCG";
-	$revHash{"TCCC"}="AGGG";
-	$revHash{"TTCA"}="AATG";
-	$revHash{"AGTT"}="AACT";
-	$revHash{"CCCT"}="AGGG";
-	$revHash{"CCGC"}="GGGC";
-	$revHash{"CTT"}="AAG";
-	$revHash{"TTGG"}="AACC";
-	$revHash{"ATT"}="AAT";
-	$revHash{"TAGC"}="AGCT";
-	$revHash{"ACTG"}="AGTC";
-	$revHash{"TCAC"}="AGTG";
-	$revHash{"CTGT"}="AGAC";
-	$revHash{"TGTG"}="ACAC";
-	$revHash{"ATCC"}="ATGG";
-	$revHash{"GTGG"}="ACCC";
-	$revHash{"TGGG"}="ACCC";
-	$revHash{"TCGG"}="ACCG";
-	$revHash{"CGGT"}="ACCG";
-	$revHash{"GCTC"}="AGCG";
-	$revHash{"TACG"}="ACGT";
-	$revHash{"GTTT"}="AAAC";
-	$revHash{"CAT"}="ATG";
-	$revHash{"CATG"}="ATGC";
-	$revHash{"GTTA"}="AACT";
-	$revHash{"CACT"}="AGTG";
-	$revHash{"TCAT"}="AATG";
-	$revHash{"TTA"}="AAT";
-	$revHash{"TGTA"}="ATAC";
-	$revHash{"TTTC"}="AAAG";
-	$revHash{"TACT"}="AAGT";
-	$revHash{"TGTT"}="AAAC";
-	$revHash{"CTA"}="AGT";
-	$revHash{"GACT"}="AGTC";
-	$revHash{"TTGC"}="AAGC";
-	$revHash{"TTC"}="AAG";
-	$revHash{"GCT"}="AGC";
-	$revHash{"GCAT"}="ATGC";
-	$revHash{"TGGT"}="AACC";
-	$revHash{"CCT"}="AGG";
-	$revHash{"CATC"}="ATGG";
-	$revHash{"CCAT"}="ATGG";
-	$revHash{"CCCG"}="GGGC";
-	$revHash{"TGCC"}="AGGC";
-	$revHash{"TG"}="AC";
-	$revHash{"TGCT"}="AAGC";
-	$revHash{"GCCG"}="GGCC";
-	$revHash{"TCTG"}="AGAC";
-	$revHash{"TGT"}="AAC";
-	$revHash{"TTAT"}="AAAT";
-	$revHash{"TAGT"}="AACT";
-	$revHash{"TATG"}="ATAC";
-	$revHash{"TTTA"}="AAAT";
-	$revHash{"CGTA"}="ACGT";
-	$revHash{"TA"}="AT";
-	$revHash{"TGTC"}="AGAC";
-	$revHash{"CTAT"}="ATAG";
-	$revHash{"TATA"}="ATAT";
-	$revHash{"TAC"}="AGT";
-	$revHash{"TC"}="AG";
-	$revHash{"CATT"}="AATG";
-	$revHash{"TCG"}="ACG";
-	$revHash{"ATTT"}="AAAT";
-	$revHash{"CGTG"}="ACGC";
-	$revHash{"CTG"}="AGC";
-	$revHash{"TCGT"}="AACG";
-	$revHash{"TCCG"}="ACGG";
-	$revHash{"GTT"}="AAC";
-	$revHash{"ATGT"}="ATAC";
-	$revHash{"CTTG"}="AAGC";
-	$revHash{"CCTT"}="AAGG";
-	$revHash{"GATC"}="ATCG";
-	$revHash{"CTGG"}="AGCC";
-	$revHash{"TTCT"}="AAAG";
-	$revHash{"CGTC"}="ACGG";
-	$revHash{"CG"}="GC";
-	$revHash{"TATT"}="AAAT";
-	$revHash{"CTCG"}="AGCG";
-	$revHash{"TCTC"}="AGAG";
-	$revHash{"TCCT"}="AAGG";
-	$revHash{"TGG"}="ACC";
-	$revHash{"ACTC"}="AGTG";
-	$revHash{"CTC"}="AGG";
-	$revHash{"CGC"}="GGC";
-	$revHash{"TTG"}="AAC";
-	$revHash{"ACCT"}="AGGT";
-	$revHash{"TCTA"}="ATAG";
-	$revHash{"GTAC"}="ACGT";
-	$revHash{"TTGA"}="AATC";
-	$revHash{"GTCC"}="ACGG";
-	$revHash{"GATT"}="AATC";
-	$revHash{"T"}="A";
-	$revHash{"CGTT"}="AACG";
-	$revHash{"GTC"}="ACG";
-	$revHash{"GCCT"}="AGGC";
-	$revHash{"TGC"}="AGC";
-	$revHash{"TTTG"}="AAAC";
-	$revHash{"GGCT"}="AGCC";
-	$revHash{"TCA"}="ATG";
-	$revHash{"GTGC"}="ACGC";
-	$revHash{"TGAT"}="AATC";
-	$revHash{"TAT"}="AAT";
-	$revHash{"CTAC"}="AGGT";
-	$revHash{"TGCG"}="ACGC";
-	$revHash{"CTCA"}="AGTG";
-	$revHash{"CTTC"}="AAGG";
-	$revHash{"GCTG"}="AGCC";
-	$revHash{"TATC"}="ATAG";
-	$revHash{"TAAT"}="AATT";
-	$revHash{"ACT"}="AGT";
-	$revHash{"TCGC"}="AGCG";
-	$revHash{"GGT"}="ACC";
-	$revHash{"TCC"}="AGG";
-	$revHash{"TTGT"}="AAAC";
-	$revHash{"TGAC"}="AGTC";
-	$revHash{"TTAC"}="AAGT";
-	$revHash{"CGT"}="ACG";
-	$revHash{"ATTA"}="AATT";
-	$revHash{"ATTG"}="AATC";
-	$revHash{"CCTC"}="AGGG";
-	$revHash{"CCGG"}="GGCC";
-	$revHash{"CCGT"}="ACGG";
-	$revHash{"TCCA"}="ATGG";
-	$revHash{"CGCC"}="GGGC";
-	$revHash{"GT"}="AC";
-	$revHash{"TTCG"}="AACG";
-	$revHash{"CCTG"}="AGGC";
-	$revHash{"TCT"}="AAG";
-	$revHash{"GTAT"}="ATAC";
-	$revHash{"GTCT"}="AGAC";
-	$revHash{"GCTA"}="AGCT";
-	$revHash{"TACC"}="AGGT";
-}
-
-
-sub allCaps{
-	my $motif = $_[0];
-	$motif =~ s/a/A/g;
-	$motif =~ s/c/C/g;
-	$motif =~ s/t/T/g;
-	$motif =~ s/g/G/g;
-	return $motif;
-}
-
-
-sub all_caps{
-	my @strand = split(/\s*/,$_[0]);
-	for my $i (0 ... $#strand){
-		if ($strand[$i] =~ /c/) {$strand[$i] = "C";next;}
-		if ($strand[$i] =~ /a/) {$strand[$i] = "A";next;}
-		if ($strand[$i] =~ /t/) { $strand[$i] = "T";next;}
-		if ($strand[$i] =~ /g/) {$strand[$i] = "G";next;}
-	}
-	return join("",@strand);
-}
-sub array_mean{
-	return "NA" if scalar(@_) == 0;
-	my $sum = 0;
-	foreach my $val (@_){
-		$sum = $sum + $val;
-	}
-	return ($sum/scalar(@_));
-}
-sub array_sum{
-	return "NA" if scalar(@_) == 0;
-	my $sum = 0;
-	foreach my $val (@_){
-		$sum = $sum + $val;
-	}
-	return ($sum);
-}
-
-sub variance{
-	return "NA" if scalar(@_) == 0;
-	return 0 if scalar(@_) == 1;
-	my $mean = 	array_mean(@_);
-	my $num = 0;
-	return 0 if scalar(@_) == 1;
-#	print "mean = $mean .. array = >@_<\n";
-	foreach my $ele (@_){
-	#	print "$num = $num + ($ele-$mean)*($ele-$mean)\n";
-		$num = $num + ($ele-$mean)*($ele-$mean);
-	}
-	my $var = $num / scalar(@_);
-	return $var;
-}
-
-sub array_95confIntervals{
-	return "NA" if scalar(@_) <= 0;
-	my @sorted = sort { $a <=> $b } @_;
-#	print  "@sorted=",scalar(@sorted), "\n";
-	my $aDeechNo = int((scalar(@sorted) * 2.5) / 100);
-	my $saaDeNo = int((scalar(@sorted) * 97.5) / 100);
-
-	return ($sorted[$aDeechNo], $sorted[$saaDeNo]);
-}
-
-sub array_median{
-	return "NA" if scalar(@_) == 0;
-	return $_[0] if scalar(@_) == 1;
-	my @sorted = sort { $a <=> $b } @_;
-	my $totalno = scalar(@sorted);
-
-	#print "sorted = @sorted\n";
-
-	my $pick = ();
-	if ($totalno % 2 == 1){
-		#print "odd set .. totalno = $totalno\n";
-		my $mid = $totalno / 2;
-		my $onehalfno = $mid - $mid % 1;
-		my $secondhalfno = $onehalfno + 1;
-		my $onehalf = $sorted[$onehalfno-1];
-		my $secondhalf = $sorted[$secondhalfno-1];
-		#print "onehalfno = $onehalfno and secondhalfno = $secondhalfno \n onehalf = $onehalf and secondhalf = $secondhalf\n";
-
-		$pick =  $secondhalf;
-	}
-	else{
-		#print "even set .. totalno = $totalno\n";
-		my $mid = $totalno / 2;
-		my $onehalfno = $mid;
-		my $secondhalfno = $onehalfno + 1;
-		my $onehalf = $sorted[$onehalfno-1];
-		my $secondhalf = $sorted[$secondhalfno-1];
-		#print "onehalfno = $onehalfno and secondhalfno = $secondhalfno \n onehalf = $onehalf and secondhalf = $secondhalf\n";
-		$pick = ($onehalf + $secondhalf )/2;
-
-	}
-	#print "pick = $pick..\n";
-	return $pick;
-
-}
-
-
-sub array_numerical_sort{
-		return "NA" if scalar(@_) == 0;
-        my @sorted = sort { $a <=> $b } @_;
-        return (@sorted);
-}
-
-sub array_smallest_number{
-	return "NA" if scalar(@_) == 0;
-	return $_[0] if scalar(@_) == 1;
-	my @sorted = sort { $a <=> $b } @_;
-	return $sorted[0];
-}
-
-
-sub array_largest_number{
-	return "NA" if scalar(@_) == 0;
-	return $_[0] if scalar(@_) == 1;
-	my @sorted = sort { $a <=> $b } @_;
-	return $sorted[$#sorted];
-}
-
-
-sub array_largest_number_arrayPosition{
-	return "NA" if scalar(@_) == 0;
-	return 0 if scalar(@_) == 1;
-	my $maxpos = 0;
-	my @maxposes = ();
-	my @maxvals = ();
-	my $maxval = array_smallest_number(@_);
-	for my $i (0 ... $#_){
-		if ($_[$i] > $maxval){
-			$maxval = $_[$i];
-			$maxpos = $i;
-		}
-		if ($_[$i] == $maxval){
-			$maxval = $_[$i];
-			if (scalar(@maxposes) == 0){
-				push @maxposes, $i;
-				push @maxvals, $_[$i];
-
-			}
-			elsif ($maxvals[0] == $maxval){
-				push @maxposes, $i;
-				push @maxvals, $_[$i];
-			}
-			else{
-				@maxposes = (); @maxvals = ();
-				push @maxposes, $i;
-				push @maxvals, $_[$i];
-			}
-
-		}
-
-	}
-	return $maxpos  if scalar(@maxposes) < 2;
-	return (@maxposes);
-}
-
-sub array_smallest_number_arrayPosition{
-	return "NA" if scalar(@_) == 0;
-	return 0 if scalar(@_) == 1;
-	my $minpos = 0;
-	my @minposes = ();
-	my @minvals = ();
-	my $minval = array_largest_number(@_);
-	my $maxval = array_smallest_number(@_);
-	#print "starting with $maxval, ending with $minval\n";
-	for my $i (0 ... $#_){
-		if ($_[$i] < $minval){
-			$minval = $_[$i];
-			$minpos = $i;
-		}
-		if ($_[$i] == $minval){
-			$minval = $_[$i];
-			if (scalar(@minposes) == 0){
-				push @minposes, $i;
-				push @minvals, $_[$i];
-
-			}
-			elsif ($minvals[0] == $minval){
-				push @minposes, $i;
-				push @minvals, $_[$i];
-			}
-			else{
-				@minposes = (); @minvals = ();
-				push @minposes, $i;
-				push @minvals, $_[$i];
-			}
-
-		}
-
-	}
-	#print "minposes=@minposes\n";
-
-	return $minpos  if scalar(@minposes) < 2;
-	return (@minposes);
-}
-
-sub basic_stats{
-	my @arr = @_;
-#	print " array_smallest_number= ", array_smallest_number(@arr)," array_largest_number= ", array_largest_number(@arr), " array_mean= ",array_mean(@arr),"\n";
-	return ":";
-}
-#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
-
-sub maftoAxt_multispecies {
-	my $printer = 0;
-#	print "in maftoAxt_multispecies : got @_\n";
-	my $fname=$_[0];
-	open(IN,"<$_[0]") or die "Cannot open $_[0]: $! \n";
-	my $treedefinition = $_[1];
-	open(OUT,">$_[2]") or die "Cannot open $_[2]: $! \n";
-	my $counter = 0;
-	my $exactspeciesset = $_[3];
-	my @exactspeciesset_unarranged = split(/,/,$exactspeciesset);
-
-	$treedefinition=~s/[\)\(, ]/\t/g;
-	my @species=split(/\t+/,$treedefinition);
-	my @exactspecies=();
-
-	foreach my $spec (@species){
-		foreach my $espec (@exactspeciesset_unarranged){
-			push @exactspecies, $spec if $spec eq $espec;
-		}
-	}
-#	print "exactspecies=@exactspecies\n";
-
-	###########
-	my $select = 2;
-	#select = 1 if all species need sequences to be present for each block otherwise, it is 0
-	#select = 2 only the allowed set make up the alignment. use the removeset
-	# information to detect alignmenets that have other important genomes aligned.
-	###########
-	my @allowedset = ();
-	@allowedset = split(/;/,allowedSetOfSpecies(join("_",@species))) if $select == 0;
-	@allowedset = join("_",0,@species) if $select == 1;
-	#print "species = @species , allowedset =",join("\n", @allowedset) ," \n";
-	@allowedset = join("_",0,@exactspecies) if $select == 2;
-	#print "allowedset = @allowedset and exactspecies = @exactspecies\n";
-
-	my $start = 0;
-	my @sequences = ();
-	my @titles = ();
-	my $species_counter = "0";
-	my $countermatch = 0;
-	my $outsideSpecies=0;
-
-	while(my $line = <IN>){
-		next if $line =~ /^#/;
-		next if $line =~ /^i/;
-		chomp $line;
-		#print "$line";
-		my @fields = split(/\s+/,$line);
-		chomp $line;
-		if ($line =~ /^a /){
-			$start = 1;
-		}
-
-		if ($line =~ /^s /){
-		#	print "fields1 = $fields[1] , start = $start\n";
-
-			foreach my $sp (@species){
-				if ($fields[1] =~ /$sp/){
-					$species_counter = $species_counter."_".$sp;
-					push(@sequences, $fields[6]);
-					my @sp_info = split(/\./,$fields[1]);
-					my $title = join(" ",@sp_info, $fields[2], ($fields[2]+$fields[3]), $fields[4]);
-					push(@titles, $title);
-
-				}
-			}
-		}
-
-		if (($line !~ /^a/) && ($line !~ /^s/) && ($line !~ /^#/) && ($line !~ /^i/) && ($start = 1)){
-
-			my $arranged = reorderSpecies($species_counter, @species);
-			my $stopper = 1;
-			my $arrno = 0;
-			foreach my $set (@allowedset){
-				if ($arranged eq $set){
-	#				print "$arranged == $set\n";
-					$stopper = 0; last;
-				}
-				$arrno++;
-			}
-
-			if ($stopper == 0) {
-			#	print "    accepted\n";
-				@titles = split ";", orderInfo(join(";", @titles), $species_counter, $arranged) if $species_counter ne $arranged;
-
-				@sequences = split ";", orderInfo(join(";", @sequences), $species_counter, $arranged) if $species_counter ne $arranged;
-				my $filteredseq = filter_gaps(@sequences);
-
-				if ($filteredseq ne "SHORT"){
-					$counter++;
-					print OUT join (" ",$counter, @titles), "\n";
-					print OUT $filteredseq, "\n";
-					print OUT "\n";
-					$countermatch++;
-				}
-			#	my @filtered_seq = split(/\t/,filter_gaps(@sequences) );
-			}
-			else{#print "\n";
-			}
-
-			@sequences = (); @titles = (); $start = 0;$species_counter = "0";
-			next;
-
-		}
-	}
-#	print "countermatch = $countermatch\n";
-}
-
-sub reorderSpecies{
-	my @inarr=@_;
-	my $currSpecies = shift (@inarr);
-	my $ordered_species = 0;
-	my @species=@inarr;
-	foreach my $order (@species){
-		$ordered_species = $ordered_species."_".$order	if	$currSpecies=~ /$order/;
-	}
-	return $ordered_species;
-
-}
-
-sub filter_gaps{
-	my @sequences = @_;
-#	print "sequences sent are @sequences\n";
-	my $seq_length = length($sequences[0]);
-	my $seq_no = scalar(@sequences);
-	my $allgaps = ();
-	for (1 ... $seq_no){
-		$allgaps = $allgaps."-";
-	}
-
-	my @seq_array = ();
-	my $seq_counter = 0;
-	foreach my $seq (@sequences){
-#		my @sequence = split(/\s*/,$seq);
-		$seq_array[$seq_counter] = [split(/\s*/,$seq)];
-#		push @seq_array, [@sequence];
-		$seq_counter++;
-	}
-	my $g = 0;
-	while ( $g < $seq_length){
-		last if (!exists $seq_array[0][$g]);
-		my $bases = ();
-		for my $u (0 ... $#seq_array){
-			$bases = $bases.$seq_array[$u][$g];
-		}
-#		print $bases, "\n";
-		if ($bases eq $allgaps){
-#			print "bases are $bases, position is $g \n";
-			for my $seq (@seq_array){
-				splice(@$seq , $g, 1);
-			}
-		}
-		else {
-			$g++;
-		}
-	}
-
-	my @outs = ();
-
-	foreach my $seq (@seq_array){
-		push(@outs, join("",@$seq));
-	}
-	return "SHORT" if length($outs[0]) <=100;
-	return (join("\n", @outs));
-}
-
-
-sub allowedSetOfSpecies{
-	my @allowed_species = split(/_/,$_[0]);
-	unshift @allowed_species, 0;
-#	print "allowed set = @allowed_species \n";
-	my @output = ();
-	for (0 ... scalar(@allowed_species) - 4){
-		push(@output, join("_",@allowed_species));
-		pop @allowed_species;
-	}
-	return join(";",reverse(@output));
-
-}
-
-
-sub orderInfo{
-	my @info = split(/;/,$_[0]);
-#	print "info = @info";
-	my @old = split(/_/,$_[1]);
-	my @new = split(/_/,$_[2]);
-	shift @old; shift @new;
-	my @outinfo = ();
-	foreach my $spe (@new){
-		for my $no (0 ... $#old){
-			if ($spe eq $old[$no]){
-				push(@outinfo, $info[$no]);
-			}
-		}
-	}
-#	print "outinfo = @outinfo \n";
-	return join(";", @outinfo);
-}
-
-#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
-
-sub printarr {
-	print ">::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::\n";
-	foreach my $line (@_) {print "$line\n";}
-	print "::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::<\n";
-}
-
--- a/tools/regVariation/microsatellite_birthdeath.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<tool id="microsatellite_birthdeath" name="Identify microsatellite births and deaths" version="1.0.0">
-  <description> and causal mutational mechanisms from previously identified orthologous microsatellite sets</description>
-  <command interpreter="perl">
-      microsatellite_birthdeath.pl
-      $alignment
-      $orthfile
-      $outfile
-      ${alignment.metadata.species}
-      "$tree_definition"
-      $thresholds
-      $separation
-      $simthresh
-
-  </command>
-  <inputs>
-    <page>
-        <param format="maf" name="alignment" type="data" label="Select MAF alignments"/>
-
-        <param format="txt" name="orthfile" type="data" label="Select raw microsatellite data"/>
-
-    	<param name="tree_definition" size="200" type="text" value= "((((hg18,panTro2),ponAbe2),rheMac2),calJac1)" label="Tree definition of all species above whether or not selected for microsatellite extraction"
-    	help="For example: ((((hg18,panTro2),ponAbe2),rheMac2),calJac1)"/>
-
-      	<param name="separation" size="10" type="integer" value="40" label="Total length of flanking DNA used for sequence-similarity comparisons among species"
-    	help="A value of 40 means: 20 bp upstream and 20 bp downstream DNA will be used for similarity comparisons."/>
-
-     	<param name="thresholds" size="15" type="text" value="9,10,12,12" label="Minimum Threshold for the number of repeats for microsatellites"
-    	help="A value of 9,10,12,12 means: All monos having fewer than 9 repeats, dis having fewer than 5 repeats, tris having fewer than 4 repeats, tetras having fewer than 3 repeats will be excluded from the output."/>
-
-     	<param name="simthresh" size="10" type="integer" value="80" label="Percent sequence similarity of flanking regions (of length same as  the above separation distance"
-    	help="Enter a value from 0 to 100"/>
-
-
-     </page>
-  </inputs>
-  <outputs>
-    <data format="txt" name="outfile" metadata_source="orthfile"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="alignment" value="chr22_5sp.maf"/>
-      <param name="orthfile" value="chr22_5sp.microraw.tabular"/>
-      <param name="thresholds" value="9,10,12,12"/>
-      <param name="tree_definition" value="((((hg18, panTro2), ponAbe2), rheMac2), calJac1)"/>
-      <param name="separation" value="40"/>
-      <param name="simthresh" value="80"/>
-      <output name="outfile" file="chr22_5sp.microtab.tabular"/>
-    </test>
-  </tests>
-
-
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool uses raw orthologous microsatellite clusters (identified by the tool "Extract orthologous microsatellites") to identify microsatellite births and deaths along individual lineages of a phylogenetic tree.
-
-</help>
-
-
-</tool>
--- a/tools/regVariation/microsats_alignment_level.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,323 +0,0 @@
- #!/usr/bin/env python
-#Guruprasad Ananda
-"""
-Uses SPUTNIK to fetch microsatellites and extracts orthologous repeats from the sputnik output.
-"""
-from galaxy import eggs
-import sys, os, tempfile, string, math, re
-
-def reverse_complement(text):
-    DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
-    comp = [ch for ch in text.translate(DNA_COMP)]
-    comp.reverse()
-    return "".join(comp)
-
-def main():
-    if len(sys.argv) != 8:
-        print >>sys.stderr, "Insufficient number of arguments."
-        sys.exit()
-
-    infile = open(sys.argv[1],'r')
-    separation = int(sys.argv[2])
-    outfile = sys.argv[3]
-    align_type = sys.argv[4]
-    if align_type == "2way":
-        align_type_len = 2
-    elif align_type == "3way":
-        align_type_len = 3
-    mono_threshold = int(sys.argv[5])
-    non_mono_threshold = int(sys.argv[6])
-    allow_different_units = int(sys.argv[7])
-
-    print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" %(separation, mono_threshold, non_mono_threshold, allow_different_units==1)
-    try:
-        fout = open(outfile, "w")
-        print >>fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit"
-        #sputnik_cmd = os.path.join(os.path.split(sys.argv[0])[0], "sputnik")
-        sputnik_cmd = "sputnik"
-        input = infile.read()
-        skipped = 0
-        block_num = 0
-        input = input.replace('\r','\n')
-        for block in input.split('\n\n'):
-            block_num += 1
-            tmpin = tempfile.NamedTemporaryFile()
-            tmpout = tempfile.NamedTemporaryFile()
-            tmpin.write(block.strip())
-            blk = tmpin.read()
-            cmdline = sputnik_cmd + " " + tmpin.name + "  > /dev/null 2>&1 >> " + tmpout.name
-            try:
-                os.system(cmdline)
-            except Exception, es:
-                continue
-            sputnik_out = tmpout.read()
-            tmpin.close()
-            tmpout.close()
-            if sputnik_out != "":
-                if len(block.split('>')[1:]) != 2:        #len(sputnik_out.split('>')):
-                    skipped += 1
-                    continue
-                align_block = block.strip().split('>')
-
-                lendict = {'mononucleotide':1, 'dinucleotide':2, 'trinucleotide':3, 'tetranucleotide':4, 'pentanucleotide':5, 'hexanucleotide':6}
-                blockdict={}
-                r=0
-                namelist=[]
-                for k,sput_block in enumerate(sputnik_out.split('>')[1:]):
-                    whole_seq = ''.join(align_block[k+1].split('\n')[1:]).replace('\n','').strip()
-                    p = re.compile('\n(\S*nucleotide)')
-                    repeats = p.split(sput_block.strip())
-                    repeats_count = len(repeats)
-                    j = 1
-                    name = repeats[0].strip()
-                    try:
-                        coords = re.search('\d+[-_:]\d+',name).group()
-                        coords = coords.replace('_','-').replace(':','-')
-                    except Exception, e:
-                        coords = '0-0'
-                        pass
-                    r += 1
-                    blockdict[r]={}
-                    try:
-                        sp_name = name[:name.index('.')]
-                        chr_name = name[name.index('.'):name.index('(')]
-                        namelist.append(sp_name + chr_name)
-                    except:
-                        namelist.append(name[:20])
-                    while j < repeats_count:
-                        try:
-                            if repeats[j].strip() not in lendict:
-                                j += 2
-                                continue
-
-                            if blockdict[r].has_key('types'):
-                                blockdict[r]['types'].append(repeats[j].strip())        #type of microsat
-                            else:
-                                blockdict[r]['types'] = [repeats[j].strip()]               #type of microsat
-
-                            sequence = ''.join(align_block[r].split('\n')[1:]).replace('\n','').strip()
-                            start = int(repeats[j+1].split('--')[0].split(':')[0].strip())
-                            #check to see if there are gaps before the start of the repeat, and change the start accordingly
-                            sgaps = 0
-                            ch_pos = start - 1
-                            while ch_pos >= 0:
-                                if whole_seq[ch_pos] == '-':
-                                    sgaps += 1
-                                else:
-                                    break    #break at the 1st non-gap character
-                                ch_pos -= 1
-                            if blockdict[r].has_key('starts'):
-                                blockdict[r]['starts'].append(start+sgaps)        #start co-ords adjusted with alignment co-ords to include GAPS
-                            else:
-                                blockdict[r]['starts'] = [start+sgaps]
-
-                            end = int(repeats[j+1].split('--')[0].split(':')[1].strip())
-                            #check to see if there are gaps after the end of the repeat, and change the end accordingly
-                            egaps = 0
-                            for ch in whole_seq[end:]:
-                                if ch == '-':
-                                    egaps += 1
-                                else:
-                                    break    #break at the 1st non-gap character
-                            if blockdict[r].has_key('ends'):
-                                blockdict[r]['ends'].append(end+egaps)        #end co-ords adjusted with alignment co-ords to include GAPS
-                            else:
-                                blockdict[r]['ends'] = [end+egaps]
-
-                            repeat_seq = ''.join(repeats[j+1].replace('\r','\n').split('\n')[1:]).strip()       #Repeat Sequence
-                            repeat_len = repeats[j+1].split('--')[1].split()[1].strip()
-                            gap_count = repeat_seq.count('-')
-                            #print repeats[j+1].split('--')[1], len(repeat_seq), repeat_len, gap_count
-                            repeat_len = str(int(repeat_len) - gap_count)
-
-                            rel_start = blockdict[r]['starts'][-1]
-                            gaps_before_start = whole_seq[:rel_start].count('-')
-
-                            if blockdict[r].has_key('gaps_before_start'):
-                                blockdict[r]['gaps_before_start'].append(gaps_before_start)  #lengths
-                            else:
-                                blockdict[r]['gaps_before_start'] = [gaps_before_start]       #lengths
-
-                            whole_seq_start= int(coords.split('-')[0])
-                            if blockdict[r].has_key('whole_seq_start'):
-                                blockdict[r]['whole_seq_start'].append(whole_seq_start)  #lengths
-                            else:
-                                blockdict[r]['whole_seq_start'] = [whole_seq_start]       #lengths
-
-                            if blockdict[r].has_key('lengths'):
-                                blockdict[r]['lengths'].append(repeat_len)  #lengths
-                            else:
-                                blockdict[r]['lengths'] = [repeat_len]       #lengths
-
-                            if blockdict[r].has_key('counts'):
-                                blockdict[r]['counts'].append(str(int(repeat_len)/lendict[repeats[j].strip()]))  #Repeat Unit
-                            else:
-                                blockdict[r]['counts'] = [str(int(repeat_len)/lendict[repeats[j].strip()])]         #Repeat Unit
-
-                            if blockdict[r].has_key('units'):
-                                blockdict[r]['units'].append(repeat_seq[:lendict[repeats[j].strip()]])  #Repeat Unit
-                            else:
-                                blockdict[r]['units'] = [repeat_seq[:lendict[repeats[j].strip()]]]         #Repeat Unit
-
-                        except Exception, eh:
-                            pass
-                        j+=2
-                    #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'.
-                    delete_index_list = []
-                    for ind, item in enumerate(blockdict[r]['ends']):
-                        try:
-                            if blockdict[r]['starts'][ind+1]-item < separation:
-                                if ind not in delete_index_list:
-                                    delete_index_list.append(ind)
-                                if ind+1 not in delete_index_list:
-                                    delete_index_list.append(ind+1)
-                        except Exception, ek:
-                            pass
-                    for index in delete_index_list:    #mark them for deletion
-                        try:
-                            blockdict[r]['starts'][index] = 'marked'
-                            blockdict[r]['ends'][index] = 'marked'
-                            blockdict[r]['types'][index] = 'marked'
-                            blockdict[r]['gaps_before_start'][index] = 'marked'
-                            blockdict[r]['whole_seq_start'][index] = 'marked'
-                            blockdict[r]['lengths'][index] = 'marked'
-                            blockdict[r]['counts'][index] = 'marked'
-                            blockdict[r]['units'][index] = 'marked'
-                        except Exception, ej:
-                            pass
-                    #remove 'marked' elements from all the lists
-                    """
-                    for key in blockdict[r].keys():
-                        for elem in blockdict[r][key]:
-                            if elem == 'marked':
-                                blockdict[r][key].remove(elem)
-                    """
-                    #print blockdict
-
-                #make sure that the blockdict has keys for both the species
-                if (1 not in blockdict) or (2 not in blockdict):
-                    continue
-
-                visited_2 = [0 for x in range(len(blockdict[2]['starts']))]
-                for ind1,coord_s1 in enumerate(blockdict[1]['starts']):
-                    if coord_s1 == 'marked':
-                        continue
-                    coord_e1 = blockdict[1]['ends'][ind1]
-                    out = []
-                    for ind2,coord_s2 in enumerate(blockdict[2]['starts']):
-                        if coord_s2 == 'marked':
-                            visited_2[ind2] = 1
-                            continue
-                        coord_e2 = blockdict[2]['ends'][ind2]
-                        #skip if the 2 repeats are not of the same type or don't have the same repeating unit.
-                        if allow_different_units == 0:
-                            if (blockdict[1]['types'][ind1] != blockdict[2]['types'][ind2]):
-                                continue
-                            else:
-                                if (blockdict[1]['units'][ind1] not in blockdict[2]['units'][ind2]*2) and (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2):
-                                    continue
-                        #print >>sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2)
-                        #skip if the repeat number thresholds are not met
-                        if blockdict[1]['types'][ind1] == 'mononucleotide':
-                            if (int(blockdict[1]['counts'][ind1]) < mono_threshold):
-                                continue
-                        else:
-                            if (int(blockdict[1]['counts'][ind1]) < non_mono_threshold):
-                                continue
-
-                        if blockdict[2]['types'][ind2] == 'mononucleotide':
-                            if (int(blockdict[2]['counts'][ind2]) < mono_threshold):
-                                continue
-                        else:
-                            if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold):
-                                continue
-                        #print "s1,e1=%s,%s; s2,e2=%s,%s" %(coord_s1,coord_e1,coord_s2,coord_e2)
-                        if (coord_s1 in range(coord_s2,coord_e2)) or (coord_e1 in range(coord_s2,coord_e2)):
-                            out.append(str(block_num))
-                            out.append(namelist[0])
-                            rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1]
-                            rel_end = rel_start + int(blockdict[1]['lengths'][ind1])
-                            out.append(str(rel_start))
-                            out.append(str(rel_end))
-                            out.append(blockdict[1]['types'][ind1])
-                            out.append(blockdict[1]['lengths'][ind1])
-                            out.append(blockdict[1]['counts'][ind1])
-                            out.append(blockdict[1]['units'][ind1])
-                            out.append(namelist[1])
-                            rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2]
-                            rel_end = rel_start + int(blockdict[2]['lengths'][ind2])
-                            out.append(str(rel_start))
-                            out.append(str(rel_end))
-                            out.append(blockdict[2]['types'][ind2])
-                            out.append(blockdict[2]['lengths'][ind2])
-                            out.append(blockdict[2]['counts'][ind2])
-                            out.append(blockdict[2]['units'][ind2])
-                            print >>fout, '\t'.join(out)
-                            visited_2[ind2] = 1
-                            out=[]
-
-                if 0 in visited_2:    #there are still some elements in 2nd set which haven't found orthologs yet.
-                    for ind2, coord_s2 in enumerate(blockdict[2]['starts']):
-                        if coord_s2 == 'marked':
-                            continue
-                        if visited_2[ind] != 0:
-                            continue
-                        coord_e2 = blockdict[2]['ends'][ind2]
-                        out = []
-                        for ind1,coord_s1 in enumerate(blockdict[1]['starts']):
-                            if coord_s1 == 'marked':
-                                continue
-                            coord_e1 = blockdict[1]['ends'][ind1]
-                            #skip if the 2 repeats are not of the same type or don't have the same repeating unit.
-                            if allow_different_units == 0:
-                                if (blockdict[1]['types'][ind1] != blockdict[2]['types'][ind2]):
-                                    continue
-                                else:
-                                    if (blockdict[1]['units'][ind1] not in blockdict[2]['units'][ind2]*2):# and reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2:
-                                        continue
-                            #skip if the repeat number thresholds are not met
-                            if blockdict[1]['types'][ind1] == 'mononucleotide':
-                                if (int(blockdict[1]['counts'][ind1]) < mono_threshold):
-                                    continue
-                            else:
-                                if (int(blockdict[1]['counts'][ind1]) < non_mono_threshold):
-                                    continue
-
-                            if blockdict[2]['types'][ind2] == 'mononucleotide':
-                                if (int(blockdict[2]['counts'][ind2]) < mono_threshold):
-                                    continue
-                            else:
-                                if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold):
-                                    continue
-
-                            if (coord_s2 in range(coord_s1,coord_e1)) or (coord_e2 in range(coord_s1,coord_e1)):
-                                out.append(str(block_num))
-                                out.append(namelist[0])
-                                rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1]
-                                rel_end = rel_start + int(blockdict[1]['lengths'][ind1])
-                                out.append(str(rel_start))
-                                out.append(str(rel_end))
-                                out.append(blockdict[1]['types'][ind1])
-                                out.append(blockdict[1]['lengths'][ind1])
-                                out.append(blockdict[1]['counts'][ind1])
-                                out.append(blockdict[1]['units'][ind1])
-                                out.append(namelist[1])
-                                rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2]
-                                rel_end = rel_start + int(blockdict[2]['lengths'][ind2])
-                                out.append(str(rel_start))
-                                out.append(str(rel_end))
-                                out.append(blockdict[2]['types'][ind2])
-                                out.append(blockdict[2]['lengths'][ind2])
-                                out.append(blockdict[2]['counts'][ind2])
-                                out.append(blockdict[2]['units'][ind2])
-                                print >>fout, '\t'.join(out)
-                                visited_2[ind2] = 1
-                                out=[]
-
-                    #print >>fout, blockdict
-    except Exception, exc:
-        print >>sys.stderr, "type(exc),args,exc: %s, %s, %s" %(type(exc), exc.args, exc)
-
-if __name__ == "__main__":
-    main()
-
--- a/tools/regVariation/microsats_alignment_level.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="microsats_align1" name="Extract Orthologous Microsatellites">
-  <description> from pair-wise alignments</description>
-  <command interpreter="python">
-  	microsats_alignment_level.py $input1 $separation $out_file1 "2way" $mono_threshold $non_mono_threshold $allow_different_units
-  </command>
-  <inputs>
-    <page>
-    	<param format="fasta" name="input1" type="data" label="Select data"/>
-    	<param name="separation" size="10" type="integer" value="10" label="Minimum base pair distance between adjacent microsatellites"
-    	help="A value of 10 means: Adjacent microsatellites separated by less than 10 base pairs will be excluded from the output."/>
-    	<param name="mono_threshold" size="10" type="integer" value="9" label="Minimum Threshold for the number of repeats for mononucleotide microsatellites"
-    	help="A value of 9 means: All mononucleotide microsatellites having fewer than 9 repeats will be excluded from the output."/>
-    	<param name="non_mono_threshold" size="10" type="integer" value="4" label="Minimum Threshold for the number of repeats for non-mononucleotide microsatellites"
-    	help="A value of 4 means: All non-mononucleotide microsatellites having fewer than 4 repeats will be excluded from the output."/>
-    	<param name="allow_different_units" size="5" type="select" label="Allow orthologous positions to have different microsatellite repeat units/motifs?">
-    		<option value="0" selected="true">No</option>
-          	<option value="1">Yes</option>
-         </param>
-    </page>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" metadata_source="input1"/>
-  </outputs>
-  <requirements>
-     <requirement type="package">sputnik</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input1" value="2way.maf"/>
-      <param name="separation" value="10"/>
-      <param name="mono_threshold" value="9"/>
-      <param name="non_mono_threshold" value="4"/>
-      <param name="allow_different_units" value="0"/>
-      <output name="out_file1" file="ortho_ms.tab"/>
-    </test>
-  </tests>
-
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool uses a modified version of SPUTNIK to fetch microsatellite repeats from the input fasta sequences and extracts orthologous repeats from the sputnik output. The modified version allows detection of mononucleotide microsatellites. More information on SPUTNIK can be found on this website_. The modified version is available here_.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- Any block/s not containing exactly 2 species will be omitted.
-
-- This tool will filter out microsatellites based on the user input values for minimum distance and repeat number thresholds. Further, this tool will also filter out microsatellites that have no orthologous microsatellites in one of the species.
-
-.. _website: http://espressosoftware.com/pages/sputnik.jsp
-.. _here: http://www.bx.psu.edu/svn/universe/dependencies/sputnik/
-</help>
-
-
-</tool>
--- a/tools/regVariation/microsats_mutability.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,489 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-"""
-This tool computes microsatellite mutability for the orthologous microsatellites fetched from  'Extract Orthologous Microsatellites from pair-wise alignments' tool.
-"""
-from galaxy import eggs
-import sys, string, re, commands, tempfile, os, fileinput
-from galaxy.tools.util.galaxyops import *
-from bx.intervals.io import *
-from bx.intervals.operations import quicksect
-
-fout = open(sys.argv[2],'w')
-p_group = int(sys.argv[3])        #primary "group-by" feature
-p_bin_size = int(sys.argv[4])
-s_group = int(sys.argv[5])        #sub-group by feature
-s_bin_size = int(sys.argv[6])
-mono_threshold = 9
-non_mono_threshold = 4
-p_group_cols = [p_group, p_group+7]
-s_group_cols = [s_group, s_group+7]
-num_generations = int(sys.argv[7])
-region = sys.argv[8]
-int_file = sys.argv[9]
-if int_file != "None": #User has specified an interval file
-    try:
-        fint = open(int_file, 'r')
-        dbkey_i = sys.argv[10]
-        chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[11] )
-    except:
-        stop_err("Unable to open input Interval file")
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def reverse_complement(text):
-    DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
-    comp = [ch for ch in text.translate(DNA_COMP)]
-    comp.reverse()
-    return "".join(comp)
-
-def get_unique_elems(elems):
-    seen=set()
-    return[x for x in elems if x not in seen and not seen.add(x)]
-
-def get_binned_lists(uniqlist, binsize):
-    binnedlist=[]
-    uniqlist.sort()
-    start = int(uniqlist[0])
-    bin_ind=0
-    l_ind=0
-    binnedlist.append([])
-    while l_ind < len(uniqlist):
-        elem = int(uniqlist[l_ind])
-        if elem in range(start,start+binsize):
-            binnedlist[bin_ind].append(elem)
-        else:
-            start += binsize
-            bin_ind += 1
-            binnedlist.append([])
-            binnedlist[bin_ind].append(elem)
-        l_ind += 1
-    return binnedlist
-
-def fetch_weight(H,C,t):
-    if (H-(C-H)) < t:
-        return 2.0
-    else:
-        return 1.0
-
-def mutabilityEstimator(repeats1,repeats2,thresholds):
-    mut_num = 0.0    #Mutability Numerator
-    mut_den = 0.0    #Mutability denominator
-    for ind,H in enumerate(repeats1):
-        C = repeats2[ind]
-        t = thresholds[ind]
-        w = fetch_weight(H,C,t)
-        mut_num += ((H-C)*(H-C)*w)
-        mut_den += w
-    return [mut_num, mut_den]
-
-def output_writer(blk, blk_lines):
-    global winspecies, speciesind
-    all_elems_1=[]
-    all_elems_2=[]
-    all_s_elems_1=[]
-    all_s_elems_2=[]
-    for bline in blk_lines:
-        if not(bline):
-            continue
-        items = bline.split('\t')
-        seq1 = items[1]
-        start1 = items[2]
-        end1 = items[3]
-        seq2 = items[8]
-        start2 = items[9]
-        end2 = items[10]
-        if p_group_cols[0] == 6:
-            items[p_group_cols[0]] = int(items[p_group_cols[0]])
-            items[p_group_cols[1]] = int(items[p_group_cols[1]])
-        if s_group_cols[0] == 6:
-            items[s_group_cols[0]] = int(items[s_group_cols[0]])
-            items[s_group_cols[1]] = int(items[s_group_cols[1]])
-        all_elems_1.append(items[p_group_cols[0]])    #primary col elements for species 1
-        all_elems_2.append(items[p_group_cols[1]])    #primary col elements for species 2
-        if s_group_cols[0] != -1:    #sub-group is not None
-            all_s_elems_1.append(items[s_group_cols[0]])    #secondary col elements for species 1
-            all_s_elems_2.append(items[s_group_cols[1]])    #secondary col elements for species 2
-    uniq_elems_1 = get_unique_elems(all_elems_1)
-    uniq_elems_2 = get_unique_elems(all_elems_2)
-    if s_group_cols[0] != -1:
-        uniq_s_elems_1 = get_unique_elems(all_s_elems_1)
-        uniq_s_elems_2 = get_unique_elems(all_s_elems_2)
-    mut1={}
-    mut2={}
-    count1 = {}
-    count2 = {}
-    """
-    if p_group_cols[0] == 7:    #i.e. the option chosen is group-by unit(AG, GTC, etc)
-        uniq_elems_1 = get_unique_units(j.sort(lambda x, y: len(x)-len(y)))
-    """
-    if p_group_cols[0] == 6:    #i.e. the option chosen is group-by repeat number.
-        uniq_elems_1 = get_binned_lists(uniq_elems_1,p_bin_size)
-        uniq_elems_2 = get_binned_lists(uniq_elems_2,p_bin_size)
-
-    if s_group_cols[0] == 6:    #i.e. the option chosen is subgroup-by repeat number.
-        uniq_s_elems_1 = get_binned_lists(uniq_s_elems_1,s_bin_size)
-        uniq_s_elems_2 = get_binned_lists(uniq_s_elems_2,s_bin_size)
-
-    for pitem1 in uniq_elems_1:
-        #repeats1 = []
-        #repeats2 = []
-        thresholds = []
-        if s_group_cols[0] != -1:    #Sub-group by feature is not None
-            for sitem1 in uniq_s_elems_1:
-                repeats1 = []
-                repeats2 = []
-                if type(sitem1) == type(''):
-                    sitem1 = sitem1.strip()
-                for bline in blk_lines:
-                    belems = bline.split('\t')
-                    if type(pitem1) == list:
-                        if p_group_cols[0] == 6:
-                            belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
-                        if belems[p_group_cols[0]] in pitem1:
-                            if belems[s_group_cols[0]]==sitem1:
-                                repeats1.append(int(belems[6]))
-                                repeats2.append(int(belems[13]))
-                                if belems[4] == 'mononucleotide':
-                                    thresholds.append(mono_threshold)
-                                else:
-                                    thresholds.append(non_mono_threshold)
-                                mut1[str(pitem1)+'\t'+str(sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
-                                if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:
-                                    if winspecies == 1:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1)
-                                    elif winspecies == 2:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2)
-                    else:
-                        if type(sitem1) == list:
-                            if s_group_cols[0] == 6:
-                                belems[s_group_cols[0]] = int(belems[s_group_cols[0]])
-                            if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]] in sitem1:
-                                repeats1.append(int(belems[6]))
-                                repeats2.append(int(belems[13]))
-                                if belems[4] == 'mononucleotide':
-                                    thresholds.append(mono_threshold)
-                                else:
-                                    thresholds.append(non_mono_threshold)
-                                mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
-                                if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:
-                                    if winspecies == 1:
-                                        count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats1)
-                                    elif winspecies == 2:
-                                        count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats2)
-                        else:
-                            if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]]==sitem1:
-                                repeats1.append(int(belems[6]))
-                                repeats2.append(int(belems[13]))
-                                if belems[4] == 'mononucleotide':
-                                    thresholds.append(mono_threshold)
-                                else:
-                                    thresholds.append(non_mono_threshold)
-                                mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
-                                if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:
-                                    if winspecies == 1:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1)
-                                    elif winspecies == 2:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2)
-        else:   #Sub-group by feature is None
-            for bline in blk_lines:
-                belems = bline.split('\t')
-                if type(pitem1) == list:
-                    #print >>sys.stderr, "item: " + str(item1)
-                    if p_group_cols[0] == 6:
-                        belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
-                    if belems[p_group_cols[0]] in pitem1:
-                        repeats1.append(int(belems[6]))
-                        repeats2.append(int(belems[13]))
-                        if belems[4] == 'mononucleotide':
-                            thresholds.append(mono_threshold)
-                        else:
-                            thresholds.append(non_mono_threshold)
-                else:
-                    if belems[p_group_cols[0]]==pitem1:
-                        repeats1.append(int(belems[6]))
-                        repeats2.append(int(belems[13]))
-                        if belems[4] == 'mononucleotide':
-                            thresholds.append(mono_threshold)
-                        else:
-                            thresholds.append(non_mono_threshold)
-            mut1["%s" %(pitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
-            if region == 'align':
-                count1["%s" %(pitem1)]=min(sum(repeats1),sum(repeats2))
-            else:
-                if winspecies == 1:
-                    count1[str(pitem1)]=sum(repeats1)
-                elif winspecies == 2:
-                    count1[str(pitem1)]=sum(repeats2)
-
-    for pitem2 in uniq_elems_2:
-        #repeats1 = []
-        #repeats2 = []
-        thresholds = []
-        if s_group_cols[0] != -1:    #Sub-group by feature is not None
-            for sitem2 in uniq_s_elems_2:
-                repeats1 = []
-                repeats2 = []
-                if type(sitem2)==type(''):
-                    sitem2 = sitem2.strip()
-                for bline in blk_lines:
-                    belems = bline.split('\t')
-                    if type(pitem2) == list:
-                        if p_group_cols[0] == 6:
-                            belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
-                        if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]]==sitem2:
-                            repeats2.append(int(belems[13]))
-                            repeats1.append(int(belems[6]))
-                            if belems[4] == 'mononucleotide':
-                                thresholds.append(mono_threshold)
-                            else:
-                                thresholds.append(non_mono_threshold)
-                            mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
-                            #count2[str(pitem2)+'\t'+str(sitem2)]=len(repeats2)
-                            if region == 'align':
-                                count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                            else:
-                                if winspecies == 1:
-                                    count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
-                                elif winspecies == 2:
-                                    count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
-                    else:
-                        if type(sitem2) == list:
-                            if s_group_cols[0] == 6:
-                                belems[s_group_cols[1]] = int(belems[s_group_cols[1]])
-                            if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]] in sitem2:
-                                repeats2.append(int(belems[13]))
-                                repeats1.append(int(belems[6]))
-                                if belems[4] == 'mononucleotide':
-                                    thresholds.append(mono_threshold)
-                                else:
-                                    thresholds.append(non_mono_threshold)
-                                mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
-                                if region == 'align':
-                                    count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                                else:
-                                    if winspecies == 1:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
-                                    elif winspecies == 2:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
-                        else:
-                            if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]]==sitem2:
-                                repeats1.append(int(belems[13]))
-                                repeats2.append(int(belems[6]))
-                                if belems[4] == 'mononucleotide':
-                                    thresholds.append(mono_threshold)
-                                else:
-                                    thresholds.append(non_mono_threshold)
-                                mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
-                                if region == 'align':
-                                    count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                                else:
-                                    if winspecies == 1:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
-                                    elif winspecies == 2:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
-        else:   #Sub-group by feature is None
-            for bline in blk_lines:
-                belems = bline.split('\t')
-                if type(pitem2) == list:
-                    if p_group_cols[0] == 6:
-                        belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
-                    if belems[p_group_cols[1]] in pitem2:
-                        repeats2.append(int(belems[13]))
-                        repeats1.append(int(belems[6]))
-                        if belems[4] == 'mononucleotide':
-                            thresholds.append(mono_threshold)
-                        else:
-                            thresholds.append(non_mono_threshold)
-                else:
-                    if belems[p_group_cols[1]]==pitem2:
-                        repeats2.append(int(belems[13]))
-                        repeats1.append(int(belems[6]))
-                        if belems[4] == 'mononucleotide':
-                            thresholds.append(mono_threshold)
-                        else:
-                            thresholds.append(non_mono_threshold)
-            mut2["%s" %(pitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
-            if region == 'align':
-                count2["%s" %(pitem2)]=min(sum(repeats1),sum(repeats2))
-            else:
-                if winspecies == 1:
-                    count2["%s" %(pitem2)]=sum(repeats2)
-                elif winspecies == 2:
-                    count2["%s" %(pitem2)]=sum(repeats1)
-    for key in mut1.keys():
-        if key in mut2.keys():
-            mut = (mut1[key][0]+mut2[key][0])/(mut1[key][1]+mut2[key][1])
-            count = count1[key]
-            del mut2[key]
-        else:
-            unit_found = False
-            if p_group_cols[0] == 7 or s_group_cols[0] == 7: #if it is Repeat Unit (AG, GCT etc.) check for reverse-complements too
-                if p_group_cols[0] == 7:
-                    this,other = 0,1
-                else:
-                    this,other = 1,0
-                groups1 = key.split('\t')
-                mutn = mut1[key][0]
-                mutd = mut1[key][1]
-                count = 0
-                for key2 in mut2.keys():
-                    groups2 = key2.split('\t')
-                    if groups1[other] == groups2[other]:
-                        if groups1[this] in groups2[this]*2 or reverse_complement(groups1[this]) in groups2[this]*2:
-                            #mut = (mut1[key][0]+mut2[key2][0])/(mut1[key][1]+mut2[key2][1])
-                            mutn += mut2[key2][0]
-                            mutd += mut2[key2][1]
-                            count += int(count2[key2])
-                            unit_found = True
-                            del mut2[key2]
-                            #break
-            if unit_found:
-                mut = mutn/mutd
-            else:
-                mut = mut1[key][0]/mut1[key][1]
-                count = count1[key]
-        mut = "%.2e" %(mut/num_generations)
-        if region == 'align':
-            print >>fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count)
-        elif region == 'win':
-            fout.write("%s\t%s\t%s\t%s\n" %(blk,key.strip(),mut,count))
-            fout.flush()
-
-    #catch any remaining repeats, for instance if the orthologous position contained different repeat units
-    for remaining_key in mut2.keys():
-        mut = mut2[remaining_key][0]/mut2[remaining_key][1]
-        mut = "%.2e" %(mut/num_generations)
-        count = count2[remaining_key]
-        if region == 'align':
-            print >>fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
-        elif region == 'win':
-            fout.write("%s\t%s\t%s\t%s\n" %(blk,remaining_key.strip(),mut,count))
-            fout.flush()
-            #print >>fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
-
-def counter(node, start, end, report_func):
-    if start <= node.start < end and start < node.end <= end:
-        report_func(node)
-        if node.right:
-            counter(node.right, start, end, report_func)
-        if node.left:
-            counter(node.left, start, end, report_func)
-    elif node.start < start and node.right:
-        counter(node.right, start, end, report_func)
-    elif node.start >= end and node.left and node.left.maxend > start:
-        counter(node.left, start, end, report_func)
-
-
-def main():
-    infile = sys.argv[1]
-
-    for i, line in enumerate( file ( infile )):
-        line = line.rstrip('\r\n')
-        if len( line )>0 and not line.startswith( '#' ):
-            elems = line.split( '\t' )
-            break
-        if i == 30:
-            break # Hopefully we'll never get here...
-
-    if len( elems ) != 15:
-        stop_err( "This tool only works on tabular data output by 'Extract Orthologous Microsatellites from pair-wise alignments' tool. The data in your input dataset is either missing or not formatted properly." )
-    global winspecies, speciesind
-    if region == 'win':
-        if dbkey_i in elems[1]:
-            winspecies = 1
-            speciesind = 1
-        elif dbkey_i in elems[8]:
-            winspecies = 2
-            speciesind = 8
-        else:
-            stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.")
-
-    fin = open(infile, 'r')
-    skipped = 0
-    blk=0
-    win=0
-    linestr=""
-
-    if region == 'win':
-
-        msats = NiceReaderWrapper( fileinput.FileInput( infile ),
-                                chrom_col = speciesind,
-                                start_col = speciesind+1,
-                                end_col = speciesind+2,
-                                strand_col = -1,
-                                fix_strand = True)
-        msatTree = quicksect.IntervalTree()
-        for item in msats:
-            if type( item ) is GenomicInterval:
-                msatTree.insert( item, msats.linenum, item.fields )
-
-        for iline in fint:
-            try:
-                iline = iline.rstrip('\r\n')
-                if not(iline) or iline == "":
-                    continue
-                ielems = iline.strip("\r\n").split('\t')
-                ichr = ielems[chr_col_i]
-                istart = int(ielems[start_col_i])
-                iend = int(ielems[end_col_i])
-                isrc = "%s.%s" %(dbkey_i,ichr)
-                if isrc not in msatTree.chroms:
-                    continue
-                result = []
-                root = msatTree.chroms[isrc]    #root node for the chrom
-                counter(root, istart, iend, lambda node: result.append( node ))
-                if not(result):
-                    continue
-                tmpfile1 = tempfile.NamedTemporaryFile('wb+')
-                for node in result:
-                    tmpfile1.write("%s\n" % "\t".join( node.other ))
-
-                tmpfile1.seek(0)
-                output_writer(iline, tmpfile1.readlines())
-            except:
-                skipped+=1
-        if skipped:
-            print "Skipped %d intervals as invalid." %(skipped)
-    elif region == 'align':
-        if s_group_cols[0] != -1:
-            print >>fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount"
-        else:
-            print >>fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount"
-        prev_bnum = -1
-        try:
-            for line in fin:
-                line = line.strip("\r\n")
-                if not(line) or line == "":
-                    continue
-                elems = line.split('\t')
-                try:
-                    assert int(elems[0])
-                    assert len(elems) == 15
-                except:
-                    continue
-                new_bnum = int(elems[0])
-                if new_bnum != prev_bnum:
-                    if prev_bnum != -1:
-                        output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
-                    linestr = line + "\n"
-                else:
-                    linestr += line
-                    linestr += "\n"
-                prev_bnum = new_bnum
-            output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
-        except Exception, ea:
-            print >>sys.stderr, ea
-            skipped += 1
-        if skipped:
-            print "Skipped %d lines as invalid." %(skipped)
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/tools/regVariation/microsats_mutability.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-<tool id="microsats_mutability1" name="Estimate microsatellite mutability" version="1.1.0">
-  <description>by specified attributes</description>
-  <command interpreter="python">
-  	microsats_mutability.py
-  	$input1
-  	$out_file1
-  	${pri_condition.primary_group}
-  	#if $pri_condition.primary_group == "6":
-      ${pri_condition.binsize} ${pri_condition.subgroup} -1
-    #else:
-      0 ${pri_condition.sub_condition.subgroup}
-      #if $pri_condition.sub_condition.subgroup == "6":
-       ${pri_condition.sub_condition.s_binsize}
-      #else:
-       -1
-      #end if
-    #end if
-  	$gens
-    ${region.type}
-    #if $region.type == "win":
-      ${region.input2} $input2.dbkey $input2.metadata.chromCol,$input2.metadata.startCol,$input2.metadata.endCol,$input2.metadata.strandCol
-    #else:
-      "None"
-    #end if
-  </command>
-  <inputs>
-    <page>
-      <param name="input1" type="data" format="tabular" label="Select dataset containing Orthologous microsatellites"/>
-      <conditional name="region">
-	      <param name="type" type="select" label="Estimate rates corresponding to" multiple="false">
-	         <option value="align">Alignment block</option>
-	         <option value="win">Intervals in your history</option>
-	     </param>
-	     <when value="win">
-	      	<param format="interval" name="input2" type="data" label="Choose intervals">
-	      		<validator type="unspecified_build" />
-	    	</param>
-	      </when>
-	      <when value="align" />
-      </conditional>
-      <param name="gens" size="10" type="integer" value="1" label="Number of generations between the two species in input file"/>
-      <conditional name="pri_condition">
-	      <param name="primary_group" type="select" label="Group by" multiple="false">
-	         <option value="4">Motif type (mono/di/tri etc.)</option>
-	         <option value="7">Repeat Unit (AG, GCT etc.)</option>
-	         <option value="6">Repeat Number </option>
-	      </param>
-	      <when value="6">
-	      	<param name="binsize" size="10" type="integer" value="1" label="Bin-size" help="Bin-size denotes the number of repeat numbers to be considered as a group. Bin-size of 5 will group every 5 consecutive repeat numbers into a group."/>
-	      	<param name="subgroup" type="select" label="Sub-group by" multiple="false">
-		      <option value="-1">None</option>
-			  <option value="4">Motif type (mono/di/tri etc.)</option>
-			  <option value="7">Repeat Unit (AG, GCT etc.)</option>
-			</param>
-	      </when>
-	      <when value="7">
-	        <conditional name="sub_condition">
-	    	   <param name="subgroup" type="select" label="Sub-group by" multiple="false">
-		    	 <option value="-1">None</option>
-				 <option value="4">Motif type (mono/di/tri etc.)</option>
-				 <option value="6">Repeat Number </option>
-			   </param>
-			   <when value="-1"></when>
-		       <when value="4"></when>
-		       <when value="6">
-		      	  <param name="s_binsize" size="10" type="integer" value="1" label="Bin size" help="Bin-size denotes the number of repeat numbers to be considered as a group. Bin-size of 5 will group every 5 consecutive repeat numbers into a group."/>
-		       </when>
-			</conditional>
-	      </when>
-	      <when value="4">
-			<conditional name="sub_condition">
-	    	   <param name="subgroup" type="select" label="Sub-group by" multiple="false">
-		    	 <option value="-1">None</option>
-				 <option value="7">Repeat Unit (AG, GCT etc.)</option>
-				 <option value="6">Repeat Number </option>
-			   </param>
-			   <when value="-1"></when>
-		       <when value="7"></when>
-			   <when value="6">
-		      	  <param name="s_binsize" size="10" type="integer" value="1" label="Bin size" help="Bin-size denotes the number of repeat numbers to be considered as a group. Bin-size of 5 will group every 5 consecutive repeat numbers into a group."/>
-		       </when>
-			</conditional>
-	      </when>
-      </conditional>
-    </page>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-  <!--
-  <tests>
-    <test>
-      <param name="input1" value="ortho_ms.tab"/>
-      <param name="type" value="align"/>
-      <param name="gens" value="1"/>
-      <param name="primary_group" value="4"/>
-      <param name="sub_condition|subgroup" value="7"/>
-      <output name="out_file1" file="ortho_ms_mut.tab"/>
-    </test>
-  </tests>
-   -->
-<help>
-.. class:: infomark
-
-**What it does**
-
-This tool computes microsatellite mutability for the orthologous microsatellites fetched from  'Extract Orthologous Microsatellites from pair-wise alignments' tool.
-
-Mutability is computed according to the method described in the following paper:
-
-*Webster et al., Microsatellite evolution inferred from human-chimpanzee genomic  sequence alignments, Proc Natl Acad Sci 2002 June 25; 99(13): 8748-8753*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-The user selected group and subgroup by features, the computed mutability and the count of the number of repeats used to compute that mutability are added as columns to the output.
-</help>
-</tool>
--- a/tools/regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5392 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Term::ANSIColor;
-use File::Basename;
-use IO::Handle;
-use Cwd;
-use File::Path;
-use File::Temp qw/ tempfile tempdir /;
-use vars qw($distance @thresholds @tags $printer $mergestarts $mergeends $mergemicros $interrtypecord $microscanned $interrcord $interr_poscord $no_of_interruptionscord $infocord $typecord $startcord $strandcord $endcord $microsatcord $motifcord $sequencepos $no_of_species $gapcord $prinkter);
-
-$ENV{'PATH'} .= ':' . dirname($0);
-my $date = `date`;
-
-my ($mafile, $orthfile, $threshold_array,  $species_set, $tree_definition, $separation) = @ARGV;
-if (!$mafile or !$orthfile or !$threshold_array or !$separation or !$tree_definition or !$species_set) { die "missing arguments\n"; }
-
-#-------------------------------------------------------------------------------
-# WHICH SPUTNIK USED?
-my $sputnikpath = ();
-$sputnikpath = "sputnik";
-#print "sputnik_Mac-PowerPC non-existant\n" if !-e $sputnikpath;
-#exit if !-e $sputnikpath;
-#$sputnikpath = "bx-sputnik" ;
-#print "ARGV input = @ARGV\n";
-#print "ARGV input :\n mafile=$mafile\n orthfile=$orthfile\n threshold_array=$threshold_array\n  species_set=$species_set\n tree_definition=$tree_definition\n separation=$separation\n";
-#-------------------------------------------------------------------------------
-# RUNFILE
-#-------------------------------------------------------------------------------
-$distance = 1; #bp
-$distance++;
-#-------------------------------------------------------------------------------
-# MICROSATELLITE THRESHOLD SETTINGS (LENGTH, BP)
-$threshold_array=~ s/,/_/g;
-my @thresharr = split("_",$threshold_array);
-my @thresholds=@thresharr;
-my $mono_threshold = $thresharr[0];
-my $di_threshold = $thresharr[1];
-my $tri_threshold = $thresharr[2];
-my $tetra_threshold = $thresharr[3];
-#my $threshold_array = join("_",($mono_threshold, $di_threshold, $tri_threshold, $tetra_threshold));
-my $tdir = tempdir( CLEANUP => 0 );
-chdir $tdir;
-my $dir = getcwd;
-#print "current dit=$dir\n";
-#-------------------------------------------------------------------------------
-# CREATE AXT FILES IN FORWARD AND REVERSE ORDERS IF NECESSARY
-my @chrfiles=();
-
-#my $mafile =  "/Users/ydk/work/rhesus_microsat/results/galay/align.txt"; #$ARGV[0];
-my $chromt=int(rand(10000));
-my $p_chr=$chromt;
-
-
-my @exactspeciesset_unarranged = split(/,/,$species_set);
-$tree_definition=~s/[\)\(, ]/\t/g;
-my @treespecies=split(/\t+/,$tree_definition);
-my @exactspecies=();
-
-foreach my $spec (@treespecies){
-	foreach my $espec (@exactspeciesset_unarranged){
-		push @exactspecies, $spec if $spec eq $espec;
-	}
-}
-#print "exactspecies=@exactspecies\n";
-my $focalspec = $exactspecies[0];
-my $arranged_species_set=join(".",@exactspecies);
-my $chr_name = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt");
-#print "sending to maftoAxt_multispecies: $mafile, $tree_definition, $chr_name, $species_set .. focalspec=$focalspec \n";
-maftoAxt_multispecies($mafile, $tree_definition, $chr_name, $species_set);
-#print "done maf to axt conversion\n";
-my $reverse_chr_name = join(".",("chr".$p_chr."r"),$arranged_species_set, "net", "axt");
-artificial_axdata_inverter ($chr_name, $reverse_chr_name);
-#print "reverse_chr_name=$reverse_chr_name\n";
-#-------------------------------------------------------------------------------
-# FIND THE CORRESPONDING CHIMP CHROMOSOME FROM FILE ORTp_chrS.TXT
-foreach my $direct ("reverse_direction","forward_direction"){
-	$p_chr=$chromt;
-	#print "direction = $direct\n";
-	$p_chr = $p_chr."r" if $direct eq "reverse_direction";
-	$p_chr = $p_chr if $direct eq "forward_direction";
-	my $config = $species_set;
-	$config=~s/,/./g;
-	my @orgs = split(/\./,$arranged_species_set);
-	#print "ORGS= @orgs\n";
-	my @tag=@orgs;
-
-
-	my $tags = join(",", @tag);
-	my @tags=@tag;
-	chomp $p_chr;
-	$tags = join("_", split(/,/, $tags));
-	my $pchr = "chr".$p_chr;
-
-	my $ptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1])."-".$threshold_array;
-	my @sp_tags = ();
-
-	#print "orgs=@orgs, pchr=$pchr, hence, ptag = $ptag\n";
-	foreach my $sp (@tag){
-		push(@sp_tags, ($sp.".".$ptag));
-	}
-
-	my $preptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1]);
-	my @presp_tags = ();
-
-	foreach my $sp (@tag){
-		push(@presp_tags, ($sp.".".$preptag));
-	}
-
-	my $resultdir = "";
-	my $orthdir = "";
-	my $filtereddir = "";
-	my $pipedir = "";
-
-	my @title_queries = ();
-	push(@title_queries, "^[0-9]+");
-	my $sep="\\s";
-	for my $or (0 ... $#orgs){
-		my $title =  join($sep, ($orgs[$or],  "[A-Za-z]+[0-9a-zA-Z]+", "[0-9]+", "[0-9]+", "[\\-\\+]"));
-		#$title =~ s/chr\\+\\s+\+/chr/g;
-		push(@title_queries, $title);
-	}
-	my $title_query = join($sep, @title_queries);
-	#print "title_queries=@title_queries\n";
-	#print "query = >$title_query<\n";
-	#print "orgs = @orgs\n";
-	#-------------------------------------------------------------------------------
-	# GET AXTNET FILES, EDIT THEM AND SPLIT THEM INTO HUMAN AND CHIMP INPUT FILES
-	my $t1input = $pchr.".".$arranged_species_set.".net.axt";
-
-	my @t1outputs = ();
-
-	foreach my $sp (@presp_tags){
-		push(@t1outputs, $sp."_gap_op");
-	}
-
-	multi_species_t1($t1input,$tags,(join(",", @t1outputs)), $title_query);
-	#print "t1outputs=@t1outputs\n";
-	#print "done t1\n";
-	#-------------------------------------------------------------------------------
-	#START T2.PL
-
-	my $stag  = (); my $tag1 = (); my $tag2 = ();  my $schrs = ();
-
-	for my $t (0 ... scalar(@tags)-1){
-		multi_species_t2($t1outputs[$t], $tag[$t]);
-	}
-	#-------------------------------------------------------------------------------
-	#START T2.2.PL
-
-	my @temp_tags = @tag;
-
-	foreach my $sp (@presp_tags){
-		my $t2input =  $sp."_nogap_op_unrand";
-		multi_species_t2_2($t2input, shift(@temp_tags));
-	}
-	undef (@temp_tags);
-
-	#-------------------------------------------------------------------------------
-	#START SPUTNIK
-
-	my @jobIDs = ();
-	@temp_tags = @tag;
-	my @sput_filelist = ();
-
-	foreach my $sp (@presp_tags){
-		#print "sp = $sp\n";
-		my $sputnikoutput = $pipedir.$sp."_sput_op0";
-		my $sputnikinput = $pipedir.$sp."_nogap_op_unrand";
-		push(@sput_filelist, $sputnikinput);
-		my $sputnikcommand = $sputnikpath." ".$sputnikinput." > ".$sputnikoutput;
-		#print "$sputnikcommand\n";
-		my @sputnikcommand_system = $sputnikcommand;
-		system(@sputnikcommand_system);
-	}
-
-	#-------------------------------------------------------------------------------
-	#START SPUTNIK OUTPUT CORRECTOR
-
-	foreach my $sp (@presp_tags){
-		my $corroutput = $pipedir.$sp."_sput_op1";
-		my $corrinput = $pipedir.$sp."_sput_op0";
-		sputnikoutput_corrector($corrinput,$corroutput);
-
-		my $t4output = $pipedir.$sp."_sput_op2";
-		multi_species_t4($corroutput,$t4output);
-
-		my $t5output = $pipedir.$sp."_sput_op3";
-		multi_species_t5($t4output,$t5output);
-		#print "done t5.pl for $sp\n";
-
-		my $t6output = $pipedir.$sp."_sput_op4";
-		multi_species_t6($t5output,$t6output,scalar(@orgs));
-	}
-	#-------------------------------------------------------------------------------
-	#START T9.PL FOR T10.PL AND FOR INTERRUPTED HUNTING
-
-	foreach my $sp (@presp_tags){
-		my $t9output = $pipedir.$sp."_gap_op_unrand_match";
-		my $t9sequence = $pipedir.$sp."_gap_op_unrand2";
-		my $t9micro = $pipedir.$sp."_sput_op4";
-		t9($t9micro,$t9sequence,$t9output);
-
-		my $t9output2 = $pipedir.$sp."_nogap_op_unrand2_match";
-		my $t9sequence2 = $pipedir.$sp."_nogap_op_unrand2";
-		t9($t9micro,$t9sequence2,$t9output2);
-	}
-	#print "done both t9.pl for all orgs\n";
-
-	#-------------------------------------------------------------------------------
-	# FIND COMPOUND MICROSATELLITES
-
-	@jobIDs = ();
-	my $species_counter = 0;
-
-	foreach my $sp (@presp_tags){
-		my $simple_microsats=$pipedir.$sp."_sput_op4_simple";
-		my $compound_microsats=$pipedir.$sp."_sput_op4_compound";
-		my $input_micro = $pipedir.$sp."_sput_op4";
-		my $input_seq = $pipedir.$sp."_nogap_op_unrand2_match";
-		multiSpecies_compound_microsat_hunter3($input_micro,$input_seq,$simple_microsats,$compound_microsats,$orgs[$species_counter], scalar(@sp_tags), $threshold_array );
-		$species_counter++;
-	}
-
-	#-------------------------------------------------------------------------------
-	# READING  AND FILTERING SIMPLE MICROSATELLITES
-	my $spcounter2=0;
-	foreach my $sp (@sp_tags){
-		my $presp = $presp_tags[$spcounter2];
-		$spcounter2++;
-		my $simple_microsats=$pipedir.$presp."_sput_op4_simple";
-		my $simple_filterout = $pipedir.$sp."_sput_op4_simple_filtered";
-		my $simple_residue = $pipedir.$sp."_sput_op4_simple_residue";
-		multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
-	}
-
-	#-------------------------------------------------------------------------------
-	# ANALYZE  COMPOUND MICROSATELLITES FOR BEING INTERRUPTED MICROSATS
-
-	$species_counter = 0;
-	foreach my $sp (@sp_tags){
-		my $presp = $presp_tags[$species_counter];
-		my $compound_microsats = $pipedir.$presp."_sput_op4_compound";
-		my $analyzed_simple_microsats=$pipedir.$presp."_sput_op4_compound_interrupted";
-		my $analyzed_compound_microsats=$pipedir.$presp."_sput_op4_compound_pure";
-		my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
-		multiSpecies_compound_microsat_analyzer($compound_microsats,$seq_file,$analyzed_simple_microsats,$analyzed_compound_microsats,$orgs[$species_counter], scalar(@sp_tags));
-		$species_counter++;
-	}
-	#-------------------------------------------------------------------------------
-	# REANALYZE COMPOUND MICROSATELLITES FOR PRESENCE OF SIMPLE ONES WITHIN THEM..
-	$species_counter = 0;
-
-	foreach my $sp (@sp_tags){
-		my $presp = $presp_tags[$species_counter];
-		my $compound_microsats = $pipedir.$presp."_sput_op4_compound_pure";
-		my $compound_interrupted = $pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
-		my $compound_compound = $pipedir.$presp."_sput_op4_compound_compound";
-		my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
-		multiSpecies_compoundClarifyer($compound_microsats,$seq_file,$compound_interrupted,$compound_compound,$orgs[$species_counter], scalar(@sp_tags), "2_4_6_8", "3_4_6_8", "2_4_6_8");
-		$species_counter++;
-	}
-	#-------------------------------------------------------------------------------
-	# READING  AND FILTERING SIMPLE AND COMPOUND MICROSATELLITES
-	$species_counter = 0;
-
-	foreach my $sp (@sp_tags){
-		my $presp = $presp_tags[$species_counter];
-
-		my $simple_microsats=$pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
-		my $simple_filterout = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
-		my $simple_residue = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_residue";
-		multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
-
-		my $simple_microsats2 = $pipedir.$presp."_sput_op4_compound_interrupted";
-		my $simple_filterout2 = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
-		my $simple_residue2 = $pipedir.$sp."_sput_op4_compound_interrupted_residue";
-		multiSpecies_filtering_interrupted_microsats($simple_microsats2, $simple_filterout2, $simple_residue2,$threshold_array,$threshold_array,scalar(@sp_tags));
-
-		my $compound_microsats=$pipedir.$presp."_sput_op4_compound_compound";
-		my $compound_filterout = $pipedir.$sp."_sput_op4_compound_compound_filtered";
-		my $compound_residue = $pipedir.$sp."_sput_op4_compound_compound_residue";
-		multispecies_filtering_compound_microsats($compound_microsats, $compound_filterout, $compound_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
-		$species_counter++;
-	}
-	#print "done filtering both simple and compound microsatellites \n";
-
-	#-------------------------------------------------------------------------------
-
-	my @combinedarray = ();
-	my @combinedarray_indicators = ("mononucleotide", "dinucleotide", "trinucleotide", "tetranucleotide");
-	my @combinedarray_tags = ("mono", "di", "tri", "tetra");
-	$species_counter = 0;
-
-	foreach my $sp (@sp_tags){
-		my $simple_interrupted = $pipedir.$sp."_simple_analyzed_simple";
-		push @{$combinedarray[$species_counter]}, $pipedir.$sp."_simple_analyzed_simple_mono", $pipedir.$sp."_simple_analyzed_simple_di", $pipedir.$sp."_simple_analyzed_simple_tri", $pipedir.$sp."_simple_analyzed_simple_tetra";
-		$species_counter++;
-	}
-
-	#-------------------------------------------------------------------------------
-	# PUT TOGETHER THE INTERRUPTED AND SIMPLE MICROSATELLITES BASED ON THEIR MOTIF SIZE FOR FURTHER EXTENTION
-	my $sp_counter = 0;
-	foreach my $sp (@sp_tags){
-		my $analyzed_simple = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
-		my $clarifyed_simple = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
-		my $simple = $pipedir.$sp."_sput_op4_simple_filtered";
-		my $simple_analyzed_simple = $pipedir.$sp."_simple_analyzed_simple";
-		`cat $analyzed_simple $clarifyed_simple $simple > $simple_analyzed_simple`;
-		for my $i (0 ... 3){
-			`grep "$combinedarray_indicators[$i]" $simple_analyzed_simple > $combinedarray[$sp_counter][$i]`;
-		}
-		$sp_counter++;
-	}
-	#print "\ndone grouping interrupted & simple microsats based on their motif size for further extention\n";
-
-	#-------------------------------------------------------------------------------
-	# BREAK CHROMOSOME INTO PARTS OF CERTAIN NO. CONTIGS EACH, FOR FUTURE SEARCHING OF INTERRUPTED MICROSATELLITES
-	# ESPECIALLY DI, TRI AND TETRANUCLEOTIDE MICROSATELLITES
-	@temp_tags = @sp_tags;
-	my $increment = 1000000;
-	my @splist = ();
-	my $targetdir = $pipedir;
-	$species_counter=0;
-
-	foreach my $sp (@sp_tags){
-		my $presp = $presp_tags[$species_counter];
-		$species_counter++;
-		my $localtag = shift @temp_tags;
-		my $locallist = $targetdir.$localtag."_".$p_chr."_list";
-		push(@splist, $locallist);
-		my $input = $pipedir.$presp."_nogap_op_unrand2_match";
-		chromosome_unrand_breaker($input,$targetdir,$locallist,$increment, $localtag, $pchr);
-	}
-
-
-	my @unionarray = ();
-	#print "splist=@splist\n";
-	#-------------------------------------------------------------------------------
-	# FIND INTERRUPTED MICROSATELLITES
-
-	$species_counter = 0;
-
-	for my $i (0 .. $#combinedarray){
-
-		@jobIDs = ();
-		open (JLIST1, "$splist[$i]") or die "Cannot open file $splist[$i]: $!";
-
-		while (my $sp1  = <JLIST1>){
-			#print "$splist[$i]: sp1=$sp1\n";
-			chomp $sp1;
-
-			for my $j (0 ... $#combinedarray_tags){
-				my $interr  = $sp1."_interr_".$combinedarray_tags[$j];
-				my $simple  = $sp1."_simple_".$combinedarray_tags[$j];
-				push @{$unionarray[$i]}, $interr, $simple;
-				multiSpecies_interruptedMicrosatHunter($combinedarray[$i][$j],$sp1,$interr ,$simple, $orgs[$species_counter], scalar(@sp_tags), "3_4_6_8");
-			}
-		}
-		$species_counter++;
-	}
-	close JLIST1;
-	#-------------------------------------------------------------------------------
-	#	REUNION AND ZIPPING BEFORE T10.PL
-
-	my @allarray = ();
-
-	for my $i (0 ... $#sp_tags){
-		my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
-		unlink $localfile if -e $localfile;
-		push(@allarray, $localfile);
-
-		my $unfiltered_localfile= $localfile."_unfiltered";
-		my $residue_localfile= $localfile."_residue";
-
-		unlink $unfiltered_localfile;
-		#unlink $unfiltered_localfile;
-		for my $j (0 ... $#{$unionarray[$i]}){
-			#print "listing files for species $i  and list number $j= \n$unionarray[$i][$j] \n";
-			`cat $unionarray[$i][$j] >> $unfiltered_localfile`;
-			unlink $unionarray[$i][$j];
-		}
-
-		multiSpecies_filtering_interrupted_microsats($unfiltered_localfile, $localfile, $residue_localfile,$threshold_array,$threshold_array,scalar(@sp_tags) );
-		my $analyzed_compound = $pipedir.$sp_tags[$i]."_sput_op4_compound_compound_filtered";
-		my $simple_residue = $pipedir.$sp_tags[$i]."_sput_op4_simple_residue";
-		my $compound_residue = $pipedir.$sp_tags[$i]."_sput_op4_compound_residue";
-
-		`cat $analyzed_compound >> $localfile`;
-	}
-	#-------------------------------------------------------------------------------
-	# MERGING MICROSATELLITES THAT ARE VERY CLOSE TO EACH OTHER, INCLUDING THOSE FOUND BY SEARCHING IN 2 OPPOSIT DIRECTIONS
-
-	my $toescape=0;
-
-
-	for my $i (0 ... $#sp_tags){
-		my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
-		$localfile =~ /$focalspec\-(chr[0-9a-zA-Z]+)\./;
-		my $direction = $1;
-		#print "localfile = $localfile , direction = $direction\n";
-#		`gzip $reverse_chr_name` if $direction =~ /chr[0-9a-zA-Z]+r/ && $switchboard{"deleting_processFiles"} != 1;
-		$toescape =1 if $direction =~ /chr[0-9a-zA-Z]+r/;
-		last if $direction =~ /chr[0-9a-zA-Z]+r/;
-		my $nogap_sequence = $pipedir.$presp_tags[$i]."_nogap_op_unrand2_match";
-		my $gap_sequence = $pipedir.$presp_tags[$i]."_gap_op_unrand_match";
-		my $reverselocal = $localfile;
-		$reverselocal =~ s/\-chr([0-9a-zA-Z]+)\./-chr$1r./g;
-		merge_interruptedMicrosats($nogap_sequence,$localfile, $reverselocal ,scalar(@sp_tags));
-		#-------------------------------------------------------------------------------
-		my $forward_separate = $localfile."_separate";
-		my $reverse_separate = $reverselocal."_separate";
-		my $diff = $forward_separate."_diff";
-		my $miss = $forward_separate."_miss";
-		my $common = $forward_separate."_common";
-		forward_reverse_sputoutput_comparer($nogap_sequence,$forward_separate, $reverse_separate, $diff, $miss, $common ,scalar(@sp_tags));
-		#-------------------------------------------------------------------------------
-		my $symmetrical_file = $localfile."_symmetrical";
-		my $merged_file = $localfile."_merged";
-		#print "cating: $merged_file $common  into -> $symmetrical_file \n";
-		`cat $merged_file $common > $symmetrical_file`;
-		#-------------------------------------------------------------------------------
-		my $t10output = $symmetrical_file."_fin_hit_all_2";
-		new_multispecies_t10($gap_sequence, $symmetrical_file, $t10output, join(".", @orgs));
-		#-------------------------------------------------------------------------------
-	}
-	next if $toescape == 1;
-	#------------------------------------------------------------------------------------------------
-	# BRINGING IT ALL TOGETHER: FINDING ORTHOLOGOUS MICROSATELLITES AMONG THE SPECIES
-
-
-	my @micros_array = ();
-	my $sampletag = ();
-	for my $i (0 ... $#sp_tags){
-		my $finhitFile = $pipedir.$sp_tags[$i]."_allmicrosats_symmetrical_fin_hit_all_2";
-		push(@micros_array, $finhitFile);
-		$sampletag = $sp_tags[$i];
-	}
-	#$sampletag =~ s/^([A-Z]+\.)/ORTH_/;
-	#$sampletag = $sampletag."_monoThresh-".$mono_threshold."bp";
-	my $orthanswer = multiSpecies_orthFinder4($t1input, join(":",@micros_array), $orthfile, join(":", @orgs), $separation);
-}
-$date = `date`;
-#print "date = $date\n";
-#remove_tree($tdir);
-#------------------------------------------------------------------------------------------------
-#------------------------------------------------------------------------------------------------
-#------------------------------------------------------------------------------------------------
-#------------------------------------------------------------------------------------------------
-
-#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
-
-sub maftoAxt_multispecies {
-	#print "in maftoAxt_multispecies : got @_\n";
-	my $fname=$_[0];
-	open(IN,"<$_[0]") or die "Cannot open $_[0]: $! \n";
-	my $treedefinition = $_[1];
-	open(OUT,">$_[2]") or die "Cannot open $_[2]: $! \n";
-	my $counter = 0;
-	my $exactspeciesset = $_[3];
-	my @exactspeciesset_unarranged = split(/,/,$exactspeciesset);
-
-	$treedefinition=~s/[\)\(, ]/\t/g;
-	my @species=split(/\t+/,$treedefinition);
-	my @exactspecies=();
-
-	foreach my $spec (@species){
-		foreach my $espec (@exactspeciesset_unarranged){
-			push @exactspecies, $spec if $spec eq $espec;
-		}
-	}
-	#print "exactspecies=@exactspecies\n";
-
-	###########
-	my $select = 2;
-	#select = 1 if all species need sequences to be present for each block otherwise, it is 0
-	#select = 2 only the allowed set make up the alignment. use the removeset
-	# information to detect alignmenets that have other important genomes aligned.
-	###########
-	my @allowedset = ();
-	@allowedset = split(/;/,allowedSetOfSpecies(join("_",@species))) if $select == 0;
-	@allowedset = join("_",0,@species) if $select == 1;
-	#print "species = @species , allowedset =",join("\n", @allowedset) ," \n";
-	@allowedset = join("_",0,@exactspecies) if $select == 2;
-	#print "allowedset = @allowedset and exactspecies = @exactspecies\n";
-
-	my $start = 0;
-	my @sequences = ();
-	my @titles = ();
-	my $species_counter = "0";
-	my $countermatch = 0;
-	my $outsideSpecies=0;
-
-	while(my $line = <IN>){
-		next if $line =~ /^#/;
-		next if $line =~ /^i/;
-		chomp $line;
-		#print "$line";
-		my @fields = split(/\s+/,$line);
-		chomp $line;
-		if ($line =~ /^a /){
-			$start = 1;
-		}
-
-		if ($line =~ /^s /){
-		#	print "fields1 = $fields[1] , start = $start\n";
-
-			foreach my $sp (@species){
-				if ($fields[1] =~ /$sp/){
-					$species_counter = $species_counter."_".$sp;
-					push(@sequences, $fields[6]);
-					my @sp_info = split(/\./,$fields[1]);
-					my $title = join(" ",@sp_info, $fields[2], ($fields[2]+$fields[3]), $fields[4]);
-					push(@titles, $title);
-
-				}
-			}
-		}
-
-		if (($line !~ /^a/) && ($line !~ /^s/) && ($line !~ /^#/) && ($line !~ /^i/) && ($start = 1)){
-			my $arranged = reorderSpecies($species_counter, @species);
-			my $stopper = 1;
-			my $arrno = 0;
-			foreach my $set (@allowedset){
-				if ($arranged eq $set){
-	#				print "$arranged == $set\n";
-					$stopper = 0; last;
-				}
-				$arrno++;
-			}
-
-			if ($stopper == 0) {
-			#	print "    accepted\n";
-				@titles = split ";", orderInfo(join(";", @titles), $species_counter, $arranged) if $species_counter ne $arranged;
-				@sequences = split ";", orderInfo(join(";", @sequences), $species_counter, $arranged) if $species_counter ne $arranged;
-				my $filteredseq = filter_gaps(@sequences);
-
-				if ($filteredseq ne "SHORT"){
-					$counter++;
-					print OUT join (" ",$counter, @titles), "\n";
-					print OUT $filteredseq, "\n";
-					print OUT "\n";
-					$countermatch++;
-				}
-			}
-			else{#print "\n";
-			}
-
-			@sequences = (); @titles = (); $start = 0;$species_counter = "0";
-			next;
-
-		}
-	}
-#	print "countermatch = $countermatch\n";
-}
-
-sub reorderSpecies{
-	my @inarr=@_;
-	my $currSpecies = shift (@inarr);
-	my $ordered_species = 0;
-	my @species=@inarr;
-	foreach my $order (@species){
-		$ordered_species = $ordered_species."_".$order	if	$currSpecies=~ /$order/;
-	}
-	return $ordered_species;
-
-}
-
-sub filter_gaps{
-	my @sequences = @_;
-#	print "sequences sent are @sequences\n";
-	my $seq_length = length($sequences[0]);
-	my $seq_no = scalar(@sequences);
-	my $allgaps = ();
-	for (1 ... $seq_no){
-		$allgaps = $allgaps."-";
-	}
-
-	my @seq_array = ();
-	my $seq_counter = 0;
-	foreach my $seq (@sequences){
-#		my @sequence = split(/\s*/,$seq);
-		$seq_array[$seq_counter] = [split(/\s*/,$seq)];
-#		push @seq_array, [@sequence];
-		$seq_counter++;
-	}
-	my $g = 0;
-	while ( $g < $seq_length){
-		last if (!exists $seq_array[0][$g]);
-		my $bases = ();
-		for my $u (0 ... $#seq_array){
-			$bases = $bases.$seq_array[$u][$g];
-		}
-#		print $bases, "\n";
-		if ($bases eq $allgaps){
-#			print "bases are $bases, position is $g \n";
-			for my $seq (@seq_array){
-				splice(@$seq , $g, 1);
-			}
-		}
-		else {
-			$g++;
-		}
-	}
-
-	my @outs = ();
-
-	foreach my $seq (@seq_array){
-		push(@outs, join("",@$seq));
-	}
-	return "SHORT" if length($outs[0]) <=100;
-	return (join("\n", @outs));
-}
-
-
-sub allowedSetOfSpecies{
-	my @allowed_species = split(/_/,$_[0]);
-	unshift @allowed_species, 0;
-#	print "allowed set = @allowed_species \n";
-	my @output = ();
-	for (0 ... scalar(@allowed_species) - 4){
-		push(@output, join("_",@allowed_species));
-		pop @allowed_species;
-	}
-	return join(";",reverse(@output));
-
-}
-
-
-sub orderInfo{
-	my @info = split(/;/,$_[0]);
-#	print "info = @info";
-	my @old = split(/_/,$_[1]);
-	my @new = split(/_/,$_[2]);
-	shift @old; shift @new;
-	my @outinfo = ();
-	foreach my $spe (@new){
-		for my $no (0 ... $#old){
-			if ($spe eq $old[$no]){
-				push(@outinfo, $info[$no]);
-			}
-		}
-	}
-#	print "outinfo = @outinfo \n";
-	return join(";", @outinfo);
-}
-
-#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
-
-#xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx
-sub artificial_axdata_inverter{
-	open(IN,"<$_[0]") or die "Cannot open file $_[0]: $!";
-	open(OUT,">$_[1]") or die "Cannot open file $_[1]: $!";
-	my $linecounter=0;
-	while (my $line = <IN>){
-		$linecounter++;
-		#print "$linecounter\n";
-		chomp $line;
-		my $final_line = $line;
-		my $trycounter = 0;
-		if ($line =~ /^[a-zA-Z\-]/){
-		#	while ($final_line eq $line){
-				my @fields = split(/\s*/,$line);
-
-				$final_line = join("",reverse(@fields));
-		#		print colored ['red'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
-		#		$trycounter++;
-		#		print "trying again....$trycounter : $final_line\n" if $final_line eq $line;
-		#	}
-		}
-
-	#	print colored ['yellow'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
-		if ($line =~ /^[0-9]/){
-			$line =~ s/chr([A-Z0-9a-b]+)/chr$1r/g;
-			$final_line = $line;
-		}
-		print OUT $final_line,"\n";
-		#print "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
-	}
-	close OUT;
-}
-#xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx
-
-
-#xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx
-
-sub multi_species_t1 {
-
-	my $input1 = $_[0];
-#	print "@_\n"; #<STDIN>;
-	my @tags = split(/_/, $_[1]);
-	my @outputs = split(/,/, $_[2]);
-	my $title_query = $_[3];
-	my @handles = ();
-
-	open(FILEB,"<$input1")or die "Cannot open file: $input1 $!";
-	my $i = 0;
-	foreach my $path (@outputs){
-		$handles[$i] = IO::Handle->new();
-		open ($handles[$i], ">$path") or die "Can't open $path : $!";
-		$i++;
-	}
-
-	my $curdef;
-	my $start = 0;
-
-	while (my $line = <FILEB> ) {
-		if ($line =~ /^\d/){
-			$line =~ s/ +/\t/g;
-			my @fields = split(/\s+/, $line);
-			if (($line =~ /$title_query/)){
-				my $title = $line;
-				my $counter = 0;
-				foreach my $tag (@tags){
-					$line = <FILEB>;
-					print {$handles[$counter]} ">",$tag,"\t",$title, " ",$line;
-					$counter++;
-				}
-			}
-			else{
-					foreach my $tag (@tags){
-					my $tine = <FILEB>;
-				}
-			}
-
-		}
-	}
-
-	foreach my $hand (@handles){
-		$hand->close();
-	}
-
-	close FILEB;
-}
-
-#xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx
-
-#xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx
-
-sub multi_species_t2{
-
-	my $input = $_[0];
-	my $species = $_[1];
-	my $output1 = $input."_unr";
-
-	#------------------------------------------------------------------------------------------
-	open (FILEF1, "<$input") or die "Cannot open file $input :$!";
-	open (FILEF2, ">$output1") or die "Cannot open file $output1 :$!";
-
-	my $line1 = <FILEF1>;
-
-	while($line1){
-	{
-	#    chomp($line);
-		if ($line1 =~ (m/^\>$species/)){
-		chomp($line1);
-		print FILEF2 $line1;
-		$line1 = <FILEF1>;
-		chomp($line1);
-		print FILEF2 "\t", $line1,"\n";
-	   }
-	}
-	$line1 = <FILEF1>;
-	}
-
-	close FILEF1;
-	close FILEF2;
-	#------------------------------------------------------------------------------------------
-
-	my $output2 = $output1."and";
-	my $output3 = $output1."and2";
-	open(IN,"<$output1");
-	open (FILEF3, ">$output2");
-	open (FILEF4, ">$output3");
-
-
-	while (<IN>){
-		my $line = $_;
-		chomp($line);
-		my @fields=split (/\t/, $line);
-	#   print $line,"\n";
-		if($fields[5] ne "chrUn_random"){
-			print FILEF3 join ("\t",@fields[0 ... scalar(@fields)-2]), "\n", $fields[scalar(@fields)-1], "\n";
-			print FILEF4 join ("\t",@fields[0 ... scalar(@fields)-2]), "\t", $fields[scalar(@fields)-1], "\n";
-		}
-	}
-
-
-	close IN;
-	close FILEF3;
-	close FILEF4;
-	unlink $output1;
-
-	#------------------------------------------------------------------------------------------
-	# OLD T3.PL RUDIMENT
-
-	my $t3output = $output2;
-	$t3output =~ s/gap_op_unrand/nogap_op_unrand/g;
-
-	open(IN,"<$output2");
-	open(OUTA,">$t3output");
-
-
-	while (<IN>){
-		s/-//g unless /^>/;
-		print OUTA;
-	}
-
-	close IN;
-	close OUTA;
-	#------------------------------------------------------------------------------------------
-}
-#xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx
-
-
-#xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxxmulti_species_t2_2 xxxxxxx
-sub multi_species_t2_2{
-	#print "IN multi_species_t2_2 : @_\n";
-	my $input = $_[0];
-	my $species = $_[1];
-	my $output1 = $input."2";
-
-
-	open (FILEF1, "<$input");
-	open (FILEF2, ">$output1");
-
-	my $line1 = <FILEF1>;
-
-	while($line1){
-	{
-	#    chomp($line);
-		if ($line1 =~ (m/^\>$species/)){
-		chomp($line1);
-		print FILEF2 $line1;
-		$line1 = <FILEF1>;
-		chomp($line1);
-		print FILEF2 "\t", $line1,"\n";
-	   }
-	}
-	$line1 = <FILEF1>;
-	}
-
-	close FILEF1;
-	close FILEF2;
-}
-
-#xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx
-
-
-#xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx
-sub sputnikoutput_corrector{
-	my $input = $_[0];
-	my $output = $_[1];
-	open(IN,"<$input") or die "Cannot open file $input :$!";
-	open(OUT,">$output") or die "Cannot open file $output :$!";
-	my $tine;
-	while (my $line=<IN>){
-		if($line =~/length /){
-			$tine = $line;
-			$tine =~ s/\s+/\t/g;
-			my @fields = split(/\t/,$tine);
-			if ($fields[6] > 60){
-				print OUT $line;
-				$line = <IN>;
-
-				while (($line !~ /nucleotide/) && ($line !~ /^>/)){
-					chomp $line;
-					print OUT $line;
-					$line = <IN>;
-				}
-				print OUT "\n";
-				print OUT $line;
-			}
-			else{
-				print OUT $line;
-			}
-		}
-		else{
-			print OUT $line;
-		}
-	}
-	close IN;
-	close OUT;
-}
-#xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx
-
-
-#xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx
-sub multi_species_t4{
-#	print "multi_species_t4 : @_\n";
-	my $input = $_[0];
-	my $output = $_[1];
-	open (FILEA, "<$input");
-	open (FILEB, ">$output");
-
-	my $line = <FILEA>;
-
-	while ($line) {
-	   # chomp $line;
-		if ($line =~ />/) {
-			chomp $line;
-			print FILEB $line, "\n";
-		}
-
-
-		if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
-		chomp $line;
-		print FILEB $line, " " ;
-		$line = <FILEA>;
-		chomp $line;
-		print FILEB $line,"\n";
-		}
-
-		$line = <FILEA>;
-	}
-
-
-	close FILEA;
-	close FILEB;
-
-}
-
-#xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx
-
-
-#xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx
-sub multi_species_t5{
-
-	my $input = $_[0];
-	my $output = $_[1];
-
-	open(FILEB,"<$input");
-	open(FILEC,">$output");
-
-	my $curdef;
-
-	while (my $line = <FILEB> ) {
-
-		if ($line =~ /^>/){
-		chomp $line;
-		$curdef = $line;
-		next;
-	}
-
-	if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
-		print  FILEC $curdef," ",$line;
-	}
-
-	}
-
-
-	close FILEB;
-	close FILEC;
-
-}
-#xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx
-
-
-#xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx
-sub multi_species_t6{
-	my $input = $_[0];
-	my $output = $_[1];
-	my $focalstrand=$_[3];
-#	print "inpput = @_\n";
-	open (FILE, "<$input");
-	open (FILE_MICRO, ">$output");
-	my $linecounter=0;
-	while (my $line = <FILE>){
-		$linecounter++;
-		chomp $line;
-		#print "line = $line\n";
-		#MONO#
-		$line =~ /$focalspec\s[a-zA-Z]+[0-9a-zA-Z]+\s[0-9]+\s[0-9]+\s([+\-])/;
-		my $strand=$1;
-		my $no_of_species = ($line =~ s/\s+[+\-]\s+/ /g);
-		#print "line = $line\n";
-		my $specfieldsend = 2 + ($no_of_species*4) - 1;
-		my @fields = split(/\s+/, $line);
-		my @speciesdata = @fields[0 ... $specfieldsend];
-		$line =~ /([a-z]+nucleotide)\s([0-9]+)\s:\s([0-9]+)/;
-		my ($tide, $start, $end) = ($1, $2, $3);
-		#print "no_of_species=$no_of_species.. speciesdata = @speciesdata and ($tide, $start, $end)\n";
-		if($line  =~ /mononucleotide/){
-			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  mono($fields[$#fields]),),"\n";
-		}
-		#DI#
-		elsif($line =~ /dinucleotide/){
-			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  di($fields[$#fields]),),"\n";
-		}
-		#TRI#
-		elsif($line =~ /trinucleotide/ ){
-			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  tri($fields[$#fields]),),"\n";
-		}
-		#TETRA#
-		elsif($line =~ /tetranucleotide/){
-			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  tetra($fields[$#fields]),),"\n";
-		}
-		#PENTA#
-		elsif($line =~ /pentanucleotide/){
-			#print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  penta($fields[$#fields]),),"\n";
-		}
-		else{
-		#	print "not: @fields\n";
-		}
-	}
-#	print "linecounter=$linecounter\n";
-	close FILE;
-	close FILE_MICRO;
-}
-
-sub mono {
-	my $st = $_[0];
-	my $tp = unpack "A1"x(length($st)/1),$st;
-	my $var1 = substr($tp, 0, 1);
-	return join ("\t", $var1);
-}
-sub di {
-	my $st = $_[0];
-	my $tp = unpack "A2"x(length($st)/2),$st;
-	my $var1 = substr($tp, 0, 2);
-	return join ("\t", $var1);
-}
-sub tri {
-	my $st = $_[0];
-	my $tp = unpack "A3"x(length($st)/3),$st;
-	my $var1 = substr($tp, 0, 3);
-	return join ("\t", $var1);
-}
-sub tetra {
-	my $st = $_[0];
-	my $tp = unpack "A4"x(length($st)/4),$st;
-	my $var1 = substr($tp, 0, 4);
-	return join ("\t", $var1);
-}
-sub penta {
-	my $st = $_[0];
-	my $tp = unpack "A5"x(length($st)/5),$st;
-	my $var1 = substr($tp, 0, 5);
-	return join ("\t", $var1);
-}
-
-#xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx
-
-
-#xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx
-sub t9{
-	my $input1 = $_[0];
-	my $input2 = $_[1];
-	my $output = $_[2];
-
-
-	open(IN1,"<$input1") if -e $input1;
-	open(IN2,"<$input2") or die "cannot open file $_[1] : $!";
-	open(OUT,">$output") or die "cannot open file $_[2] : $!";
-
-
-	my %seen = ();
-	my $prevkey = 0;
-
-	if (-e $input1){
-		while (my $line = <IN1>){
-			chomp($line);
-			my @fields = split(/\t/,$line);
-			my $key1 = join ("_",@fields[0,1,3,4,5]);
-		#	print "key in t9 = $key1\n";
-			$seen{$key1}++	if ($prevkey ne $key1) ;
-			$prevkey = $key1;
-		}
-#		print "done first hash\n";
-		close IN1;
-	}
-
-	while (my $line = <IN2>){
-	#	print $line, "**\n";
-		if (-e $input1){
-			chomp($line);
-			my @fields = split(/\t/,$line);
-			my $key2 = join ("_",@fields[0,1,3,4,5]);
-			if (exists $seen{$key2}){
-				print OUT "$line\n"	;
-				delete $seen{$key2};
-			}
-		}
-		else {
-			print OUT "$line\n"	;
-		}
-	}
-
-	close IN2;
-	close OUT;
-}
-#xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx
-
-
-sub multiSpecies_compound_microsat_hunter3{
-
-	my $input1 = $_[0];  ###### the *_sput_op4_ii file
-	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
-	my $output1 = $_[2]; ###### plain microsatellite file
-	my $output2 = $_[3]; ###### compound microsatellite file
-	my $org = $_[4]; ###### 1 or 2
-	$no_of_species = $_[5];
-	#print "IN multiSpecies_compound_microsat_hunter3: @_\n";
-	#my @tags = split(/\t/,$info);
-	sub compoundify;
-	open(IN,"<$input1") or die "Cannot open file $input1 $!";
-	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
-	open(OUT,">$output1") or die "Cannot open file $output1 $!";
-	open(OUT2,">$output2") or die "Cannot open file $output2 $!";
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-	my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
-
-	my @thresholds = ("0");
-	push(@thresholds, split(/_/,$_[6]));
-	sub thresholdCheck;
-	my %micros = ();
-	while (my $line = <IN>){
-	#	print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
-		next if $line =~ /\t\t/;
-		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $2, $3, $4, $5);
-		#	print $key, "#-#-#-#-#-#-#-#\n";
-			push (@{$micros{$key}},$line);
-		}
-		else{
-		}
-	}
-	close IN;
-	my @deletedlines = ();
-
-	my $linecount = 0;
-
-	while(my $sine = <SEQ>){
-		my %microstart=();
-		my %microend=();
-
-		my @sields = split(/\t/,$sine);
-
-		my $key = ();
-
-		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			$key = join("\t",$1, $2, $3, $4, $5);
-		#	print $key, "<-<-<-<-<-<-<-<\n";
-		}
-		else{
-		}
-
-		if (exists $micros{$key}){
-			$linecount++;
-			my @microstring = @{$micros{$key}};
-			my @tempmicrostring = @{$micros{$key}};
-
-			foreach my $line (@tempmicrostring){
-				my @fields = split(/\t/,$line);
-				my $start = $fields[$startcord];
-				my $end = $fields[$endcord];
-				push (@{$microstart{$start}},$line);
-				push (@{$microend{$end}},$line);
-			}
-			my $firstflag = 'down';
-			while( my $line =shift(@microstring)){
-	#			print "-----------\nline = $line ";
-				chomp $line;
-				my @fields = split(/\t/,$line);
-				my $start = $fields[$startcord];
-				my $end = $fields[$endcord];
-				my $startmicro = $line;
-				my $endmicro = $line;
-
-			#	print "fields=@fields, start = $start end=$end, startcord=$startcord, endcord=$endcord\n";
-
-				delete ($microstart{$start});
-				delete ($microend{$end});
-				my $flag = 'down';
-				my $startflag = 'down';
-				my $endflag = 'down';
-				my $prestart = $start - $distance;
-				my $postend = $end + $distance;
-				my @compoundlines = ();
-				my %compoundhash = ();
-				push (@compoundlines, $line);
-				push (@{$compoundhash{$line}},$line);
-				my $startrank = 1;
-				my $endrank = 1;
-
-				while( ($startflag eq "down") || ($endflag eq "down") ){
-				if ((($prestart < 0) && $firstflag eq "up") || (($postend > length($sields[$sequencepos])) && $firstflag eq "up") ) {
-#					print "coming to the end of sequence,prestart = $prestart &  post end = $postend and sequence length =", length($sields[$sequencepos])," so exiting\n";
-					last;
-				}
-
-				$firstflag = "up";
-				if ($startflag eq "down"){
-					for my $i ($prestart ... $start){
-
-						if(exists $microend{$i}){
-							chomp $microend{$i}[0];
-							if(exists $compoundhash{$microend{$i}[0]}) {next;}
-	#						print "sending from microend $startmicro, $microend{$i}[0] |||\n";
-							if (identityMatch_thresholdCheck($startmicro, $microend{$i}[0], $startrank) eq "proceed"){
-								push(@compoundlines, $microend{$i}[0]);
-	#							print "accepted\n";
-								my @tields = split(/\t/,$microend{$i}[0]);
-								$startmicro = $microend{$i}[0];
-								chomp $startmicro;
-								$start = $tields[$startcord];
-								$flag = 'down';
-								$startrank++;
-	#							print "startcompund = $microend{$i}[0]\n";
-								delete $microend{$i};
-								delete $microstart{$start};
-								$startflag = 'down';
-								$prestart = $start - $distance;
-								last;
-							}
-							else{
-								$flag = 'up';
-								$startflag = 'up';
-							}
-						}
-						else{
-							$flag = 'up';
-							$startflag = 'up';
-						}
-					}
-				}
-
-				$endrank = $startrank;
-
-				if ($endflag eq "down"){
-					for my $i ($end ... $postend){
-
-						if(exists $microstart{$i} ){
-							chomp $microstart{$i}[0];
-							if(exists $compoundhash{$microstart{$i}[0]}) {next;}
-	#						print "sending from microstart $endmicro, $microstart{$i}[0] |||\n";
-
-							if(identityMatch_thresholdCheck($endmicro,$microstart{$i}[0], $endrank) eq "proceed"){
-								push(@compoundlines, $microstart{$i}[0]);
-	#								print "accepted\n";
-								my @tields = split(/\t/,$microstart{$i}[0]);
-								$end = $tields[$endcord]-0;
-								$endmicro = $microstart{$i}[0];
-								$endrank++;
-								chomp $endmicro;
-								$flag = 'down';
-	#							print "endcompund = $microstart{$i}[0]\n";
-								delete $microstart{$i};
-								delete $microend{$end};
-								shift @microstring;
-								$postend = $end + $distance;
-								$endflag = 'down';
-								last;
-							}
-							else{
-								$flag = 'up';
-								$endflag = 'up';
-							}
-						}
-						else{
-							$flag = 'up';
-							$endflag = 'up';
-						}
-					}
-				}
-	#			print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
-			} 														#end while( $flag eq "down")
-	#			print "compoundlines = @compoundlines \n";
-			if (scalar (@compoundlines) == 1){
-				print OUT $line,"\n";
-			}
-			if (scalar (@compoundlines) > 1){
-				my $compoundline = compoundify(\@compoundlines, $sields[$sequencepos]);
-	#				print $compoundline,"\n";
-				print OUT2 $compoundline,"\n";
-			}
-			} #end foreach my $line (@microstring){
-		}	#if (exists $micros{$key}){
-
-
-	}
-
-	close OUT;
-	close OUT2;
-}
-
-
-#------------------------------------------------------------------------------------------------
-sub compoundify{
-	my ($compoundlines, $sequence)  = @_;
-#	print "\nfound to compound : @$compoundlines and$sequence \n";
-	my $noOfComps = @$compoundlines;
-#	print "Number of elements in hash is $noOfComps\n";
-	my @starts;
-	my @ends;
-	foreach my $line (@$compoundlines){
-#		print "compoundify.. line = $line \n";
-		chomp $line;
-		my @fields = split(/\t/,$line);
-		my $start = $fields[$startcord];
-		my $end = $fields[$endcord];
-	#	print "start = $start, end = $end \n";
-		push(@starts, $start);
-		push(@ends,$end);
-	}
-	my @temp = @$compoundlines;
-	my $startline=$temp[0];
-	my @mields  = split(/\t/,$startline);
-	my $startcoord = $mields[$startcord];
-	my $startgapsign=$mields[$endcord];
-	my @startsorted = sort { $a <=> $b } @starts;
-	my @endsorted = sort { $a <=> $b } @ends;
-	my @intervals;
-	for my $end (0 ... (scalar(@endsorted)-2)){
-		my $interval = substr($sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1));
-		push(@intervals,$interval);
-	#	print "interval = $interval =\n";
-	#	print "substr(sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1))\n";
-	}
-	push(@intervals,"");
-	my $compoundmicrosat=();
-	my $multiunit="";
-	foreach my $line (@$compoundlines){
-		my @fields = split(/\t/,$line);
-		my $component="[".$fields[$microsatcord]."]".shift(@intervals);
-		$compoundmicrosat=$compoundmicrosat.$component;
-		$multiunit=$multiunit."[".$fields[$motifcord]."]";
-#		print "multiunit = $multiunit\n";
-	}
-	my $compoundcopy = $compoundmicrosat;
-	$compoundcopy =~ s/\[|\]//g;
-	my $compoundlength = $mields[$startcord] + length($compoundcopy) - 1;
-
-
-	my $compoundline = join("\t",(@mields[0 ... $infocord], "compound",@mields[$startcord ... $startcord+1],$compoundlength,$compoundmicrosat, $multiunit));
-	return $compoundline;
-}
-
-#------------------------------------------------------------------------------------------------
-
-sub identityMatch_thresholdCheck{
-	my $line1 = $_[0];
-	my $line2 = $_[1];
-	my $rank = $_[2];
-	my @lields1 = split(/\t/,$line1);
-	my @lields2 = split(/\t/,$line2);
-#	print "recieved $line1 && $line2\n motif comparison: ", length($lields1[$motifcord])," : ",length($lields2[$motifcord]),"\n";
-
-	if (length($lields1[$motifcord]) == length($lields2[$motifcord])){
-		my $probe = $lields1[$motifcord].$lields1[$motifcord];
-			#print "$probe :: $lields2[$motifcord]\n";
-		return "proceed" if $probe =~ /$lields2[$motifcord]/;
-			#print "line recieved\n";
-		if ($rank ==1){
-			return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
-		}
-		else {
-			return "proceed" if thresholdCheck($line2) eq "proceed";
-			return "stop";
-		}
-	}
-	else{
-		if ($rank ==1){
-			return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
-		}
-		else {
-			return "proceed" if thresholdCheck($line2) eq "proceed";
-			return "stop";
-		}
-	}
-	return "stop";
-}
-#------------------------------------------------------------------------------------------------
-
-sub thresholdCheck{
-	my @checkthresholds=(0,@thresholds);
-	#print "IN thresholdCheck: @_\n";
-	my $line = $_[0];
-	my @lields = split(/\t/,$line);
-	return "proceed" if length($lields[$microsatcord]) >= $checkthresholds[length($lields[$motifcord])];
-	return "stop";
-}
-#xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx
-
-sub multiSpecies_filtering_interrupted_microsats{
-#	print "IN multiSpecies_filtering_interrupted_microsats: @_\n";
-	my $unfiltered = $_[0];
-	my $filtered = $_[1];
-	my $residue = $_[2];
-	my $no_of_species = $_[5];
-	open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
-	open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
-	open(RES,">$residue") or die "Cannot open file $residue: $!";
-
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-
-
-	my @sub_thresholds = (0);
-
-	push(@sub_thresholds, split(/_/,$_[3]));
-	my @thresholds = (0);
-
-	push(@thresholds, split(/_/,$_[4]));
-
-	while (my $line = <UNF>) {
-		next if $line !~ /[a-z]/;
-		#print $line;
-		chomp $line;
-		my @fields = split(/\t/,$line);
-		my $motif = $fields[$motifcord];
-		my $realmotif = $motif;
-		#print "motif = $motif\n";
-		if ($motif =~ /^\[/){
-			$motif =~ s/^\[//g;
-			my @motifs = split(/\]/,$motif);
-			$realmotif = $motifs[0];
-		}
-#		print "realmotif = $realmotif";
-		my $motif_size = length($realmotif);
-
-		my $microsat = $fields[$microsatcord];
-#		print "microsat = $microsat\n";
-		$microsat =~ s/^\[|\]$//sg;
-		my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);
-
-		$microsat = join("",@microsats);
-		if (length($microsat) < $thresholds[$motif_size]) {
-		#	print length($microsat)," < ",$thresholds[$motif_size],"\n";
-			print RES $line,"\n"; next;
-		}
-		my @lengths = ();
-		foreach my $mic (@microsats){
-			push(@lengths, length($mic));
-		}
-		if (largest_microsat(@lengths) < $sub_thresholds[$motif_size]) {
-	#		print largest_microsat(@lengths)," < ",$sub_thresholds[$motif_size],"\n";
-			print RES $line,"\n"; next;}
-			else {print FIL $line,"\n"; next;
-		}
-	}
-	close FIL;
-	close RES;
-
-}
-
-sub largest_microsat{
-	my $counter = 0;
-	my($max) = shift(@_);
-    foreach my $temp (@_) {
-    	#print "finding largest array: $maxcounter \n";
-    	if($temp > $max){
-        	$max = $temp;
-        }
-    }
-    return($max);
-}
-
-#xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx
-sub multiSpecies_compound_microsat_analyzer{
-	####### PARAMETER ########
-	##########################
-
-	my $input1 = $_[0];  ###### the *_sput_op4_ii file
-	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
-	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
-	my $output2 = $_[3]; ###### the pure compound microsatellites
-	my $org = $_[4];
-	my $no_of_species = $_[5];
-#	print "IN multiSpecies_compound_microsat_analyzer: $input1\n $input2\n $output1\n $output2\n $org\n $no_of_species\n";
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$typecord = 2 + (4*$no_of_species) + 1 - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-
-	open(IN,"<$input1") or die "Cannot open file $input1 $!";
-	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
-
-	open(OUT,">$output1") or die "Cannot open file $output1 $!";
-	open(OUT2,">$output2") or die "Cannot open file $output2 $!";
-
-
-#	print "opened files \n";
-	my %micros = ();
-	my $keycounter=0;
-	my $linecounter=0;
-	while (my $line = <IN>){
-		$linecounter++;
-		if ($line =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
-			push (@{$micros{$key}},$line);
-			$keycounter++;
-		}
-		else{
-	#		print "no key\n";
-		}
-	}
-	close IN;
-	my @deletedlines = ();
-#	print "done hash . linecounter=$linecounter, keycounter=$keycounter\n";
-	#---------------------------------------------------------------------------------------------------
-	# NOW READING THE SEQUENCE FILE
-	my $keyfound=0;
-	my $keyexists=0;
-	my $inter=0;
-	my $pure=0;
-
-	while(my $sine = <SEQ>){
-		my %microstart=();
-		my %microend=();
-		my @sields = split(/\t/,$sine);
-		my $key = ();
-		if ($sine =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			$key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
-			$keyfound++;
-		}
-		else{
-
-		}
-		if (exists $micros{$key}){
-			$keyexists++;
-			my @microstring = @{$micros{$key}};
-
-			my @filteredmicrostring;
-
-			foreach my $line (@microstring){
-				chomp $line;
-				my $copy_line = $line;
-				my @fields = split(/\t/,$line);
-				my $start = $fields[$startcord];
-				my $end = $fields[$endcord];
-				# FOR COMPOUND MICROSATELLITES
-				if ($fields[$typecord] eq "compound"){
-					$line = compound_microsat_analyser($line);
-					if ($line eq "NULL") {
-						print OUT2 "$copy_line\n";
-						$pure++;
-						next;
-					}
-					else{
-						print OUT "$line\n";
-						$inter++;
-						next;
-					}
-				}
-			}
-
-		}	#if (exists $micros{$key}){
-	}
-	close OUT;
-	close OUT2;
-#	print "keyfound=$keyfound, keyexists=$keyexists, pure=$pure, inter=$inter\n";
-}
-
-sub compound_microsat_analyser{
-	my $line = $_[0];
-	my @fields = split(/\t/,$line);
-	my $motifline = $fields[$motifcord];
-	my $microsat = $fields[$microsatcord];
-	$motifline =~ s/^\[|\]$//g;
-	$microsat =~ s/^\[|\]$//g;
-	$microsat =~ s/-//g;
-	my @interruptions = ();
-	my @motields = split(/\]\[/,$motifline);
-	my @microields = split(/\][a-zA-Z|-]*\[/,$microsat);
-	my @inields = split(/[.*]/,$microsat);
-	shift @inields;
-	my @motifcount = scalar(@motields);
-	my $prevmotif = $motields[0];
-	my $prevmicro = $microields[0];
-	my $prevphase = substr($microields[0],-(length($motields[0])),length($motields[0]));
-	my $localflag = 'down';
-	my @infoarray = ();
-
-	for my $l (1 ... (scalar(@motields)-1)){
-		my $probe = $prevmotif.$prevmotif;
-		if (length $prevmotif != length $motields[$l]) {$localflag = "up"; last;}
-
-		if ($probe =~ /$motields[$l]/i){
-			my $curr_endphase = substr($microields[$l],-length($motields[$l]),length($motields[$l]));
-			my $curr_startphase = substr($microields[$l],0,length($motields[$l]));
-			if ($curr_startphase =~ /$prevphase/i) {
-				$infoarray[$l-1] = "insertion";
-			}
-			else {
-				$infoarray[$l-1] = "indel/substitution";
-			}
-
-			$prevmotif = $motields[$l]; $prevmicro = $microields[$l]; $prevphase = $curr_endphase;
-			next;
-		}
-		else {$localflag = "up"; last;}
-	}
-	if ($localflag eq 'up') {return "NULL";}
-
-	if (length($prevmotif) == 1) {$fields[$typecord] = "mononucleotide";}
-	if (length($prevmotif) == 2) {$fields[$typecord] = "dinucleotide";}
-	if (length($prevmotif) == 3) {$fields[$typecord] = "trinucleotide";}
-	if (length($prevmotif) == 4) {$fields[$typecord] = "tetranucleotide";}
-	if (length($prevmotif) == 5) {$fields[$typecord] = "pentanucleotide";}
-
-	@microields = split(/[\[|\]]/,$microsat);
-	my @microsats = ();
-	my @positions = ();
-	my $lengthtracker = 0;
-
-	for my $i (0 ... (scalar(@microields ) - 1)){
-		if ($i%2 == 0){
-			push(@microsats,$microields[$i]);
-			$lengthtracker = $lengthtracker + length($microields[$i]);
-
-		}
-		else{
-			push(@interruptions,$microields[$i]);
-			push(@positions, $lengthtracker+1);
-			$lengthtracker = $lengthtracker + length($microields[$i]);
-		}
-
-	}
-	my $returnline = join("\t",(join("\t",@fields),join(",",(@infoarray)),join(",",(@interruptions)),join(",",(@positions)),scalar(@interruptions)));
-	return($returnline);
-}
-
-#xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx
-
-sub multiSpecies_compoundClarifyer{
-#	print "IN multiSpecies_compoundClarifyer: @_\n";
-	my $input1 = $_[0];  ###### the *_sput_compound
-	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
-	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
-	my $output2 = $_[3]; ###### compound file
-	my $org = $_[4];
-	my $no_of_species = $_[5];
-	@thresholds = "0";
-	push(@thresholds, split(/_/,$_[6]));
-
-
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$typecord = 2 + (4*$no_of_species) + 1 - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
-
-	$interr_poscord = $motifcord + 3;
-	$no_of_interruptionscord = $motifcord + 4;
-	$interrcord = $motifcord + 2;
-	$interrtypecord = $motifcord + 1;
-
-
-	open(IN,"<$input1") or die "Cannot open file $input1 $!";
-	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
-
-	open(INT,">$output1") or die "Cannot open file $output2 $!";
-	open(COMP,">$output2") or die "Cannot open file $output2 $!";
-	#open(CH,">changed") or die "Cannot open file changed $!";
-
-#	print "opened files \n";
-	my $linecounter = 0;
-	my $microcounter = 0;
-
-	my %micros = ();
-	while (my $line = <IN>){
-	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
-		$linecounter++;
-		if ($line =~ /([a-zA-Z0-9]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([a-zA-Z0-9]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([a-zA-Z0-9]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
-	#		print $key, "#-#-#-#-#-#-#-#\n";
-			push (@{$micros{$key}},$line);
-			$microcounter++;
-		}
-		else {print $line;}
-	}
-#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
-	close IN;
-	my @deletedlines = ();
-#	print "done hash \n";
-	$linecounter = 0;
-	#---------------------------------------------------------------------------------------------------
-	# NOW READING THE SEQUENCE FILE
-	my @microsat_types = qw(_ mononucleotide dinucleotide trinucleotide tetranucleotide);
-	 $printer = 0;
-
-	while(my $sine = <SEQ>){
-		my %microstart=();
-		my %microend=();
-		my @sields = split(/\t/,$sine);
-		my $key = ();
-		if ($sine =~ /([a-z0-9A-Z]+)\s+(chr[0-9a-zA-Z]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+(chr[0-9a-zA-Z]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+(chr[0-9a-zA-Z]+)\s+([0-9]+)\s+([0-9]+)\s/ ) {
-			$key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
-		}
-		else{
-#			print "no key in $sine\nfor pattern ([a-z0-9A-Z]+) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+)   / \n";
-		}
-
-		if (exists $micros{$key}){
-			my @microstring = @{$micros{$key}};
-			delete $micros{$key};
-
-			foreach my $line (@microstring){
-#				print "#---------#---------#---------#---------#---------#---------#---------#---------\n" if $printer == 1;
-#				print "microsat = $line" if $printer == 1;
-				$linecounter++;
-				my $copy_line = $line;
-				my @mields = split(/\t/,$line);
-				my @fields = @mields;
-				my $start = $fields[$startcord];
-				my $end = $fields[$endcord];
-				my $microsat = $fields[$microsatcord];
-				my $motifline = $fields[$motifcord];
-				my $microsatcopy = $microsat;
-				my $positioner = $microsat;
-				$positioner =~ s/[a-zA-Z|-]/_/g;
-				$microsatcopy =~ s/^\[|\]$//gs;
-				chomp $microsatcopy;
-				my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
-				my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
-				my $absolutstart = 1; my $absolutend = $absolutstart + ($end-$start);
-#				print "absolut: start = $absolutstart, end = $absolutend\n" if $printer == 1;
-				shift @inields;
-				#print "inields =@inields<\n";
-				$motifline =~ s/^\[|\]$//gs;
-				chomp $motifline;
-				#print "microsat = $microsat, its copy = $microsatcopy motifline = $motifline<\n";
-				my @motields = split(/\]\[/,$motifline);
-				my $seq = $microsatcopy;
-				$seq =~ s/\[|\]//g;
-				my $seqlen = length($seq);
-				$seq = " ".$seq;
-
-				my $longestmotif_no = longest_array_element(@motields);
-				my $shortestmotif_no = shortest_array_element(@motields);
-				#print "shortest motif = $motields[$shortestmotif_no], longest motif = $motields[$longestmotif_no] \n";
-
-				my $search = $motields[$longestmotif_no].$motields[$longestmotif_no];
-				if ((length($motields[$longestmotif_no]) == length($motields[$shortestmotif_no])) && ($search !~ /$motields[$shortestmotif_no]/) ){
-					print COMP $line;
-					next;
-				}
-
-				my @shortestmotif_nos = ();
-				for my $m (0 ... $#motields){
-					push(@shortestmotif_nos, $m) if (length($motields[$m]) == length($motields[$shortestmotif_no]) );
-				}
-				## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
-				my $newleft =();
-				my $leftstopper = 0; my $rightstopper = 0;
-				foreach my $shortmotif_no (@shortestmotif_nos){
-					next if $shortmotif_no == 0;
-					my $last_left =  $shortmotif_no; #$#motields;
-					my $last_hitter = 0;
-					for (my $i =($shortmotif_no-1); $i>=0; $i--){
-						my $search  = $motields[$shortmotif_no];
-						if (length($motields[$shortmotif_no]) == 1){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
-						if( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
-							$last_hitter = 1;
-							$last_left = $i+1; last;
-						}
-						my $probe = $motields[$i];
-						if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
-
-						if ($probe !~ /$search/){
-							$last_hitter = 1;
-							$last_left = $i+1;
-	#						print "hit the last match: before $microields[$i]..last left = $last_left.. exiting.\n";
-							last;
-						}
-						$last_left--;$last_hitter = 1;
-	#					print "passed tests, last left = $last_left\n";
-					}
-	#				print "comparing whether $last_left < $shortmotif_no, lasthit = $last_hitter\n";
-					if (($last_left) < $shortmotif_no && $last_hitter == 1) {$leftstopper=0; last;}
-					else {$leftstopper = 1;
-	#					print "leftstopper = 1\n";
-					}
-				}
-
-				## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
-				my $newright =();
-				foreach my $shortmotif_no (@shortestmotif_nos){
-					next if $shortmotif_no == $#motields;
-					my $last_right =  $shortmotif_no;# -1;
-					for my $i ($shortmotif_no+1 ... $#motields){
-						my $search  = $motields[$shortmotif_no];
-						if (length($motields[$shortmotif_no]) == 1 ){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
-						if ( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
-							$last_right = $i-1; last;
-						}
-						my $probe = $motields[$i];
-						if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
-						if (  $probe !~ /$search/){
-							$last_right = $i-1; last;
-						}
-						$last_right++;
-					}
-					if (($last_right) > $shortmotif_no) {$rightstopper=0; last;# print "rightstopper = 0\n";
-					}
-					else {$rightstopper = 1;
-					}
-				}
-
-
-				if ($rightstopper == 1 && $leftstopper == 1){
-					print COMP $line;
-#					print "rightstopper == 1 && leftstopper == 1\n" if $printer == 1;
-					next;
-				}
-
-#				print "pased initial testing phase \n" if $printer == 1;
-				my @outputs = ();
-				my @orig_starts = ();
-				my @orig_ends = ();
-				for my $mic (0 ... $#microields){
-					my $miclen = length($microields[$mic]);
-					my $microleftlen = 0;
-					#print "\nmic = $mic\n";
-					if($mic > 0){
-						for my $submin (0 ... $mic-1){
-							my $interval = ();
-							if (!exists $inields[$submin]) {$interval = "";}
-							else {$interval = $inields[$submin];}
-							#print "inield =$interval< and microield =$microields[$submin]<\n  ";
-							$microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
-						}
-					}
-					push(@orig_starts,($microleftlen+1));
-					push(@orig_ends, ($microleftlen+1 + $miclen -1));
-				}
-
-	#############  F I N A L L Y   S T U D Y I N G   S E Q U E N C E S  #########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
-
-
-				for my $mic (0 ... $#microields){
-					my $miclen = length($microields[$mic]);
-					my $microleftlen = 0;
-					if($mic > 0){
-						for my $submin (0 ... $mic-1){
-						#	if(!exists $inields[$submin]) {$inields[$submin] = "";}
-							my $interval = ();
-							if (!exists $inields[$submin]) {$interval = "";}
-							else {$interval = $inields[$submin];}
-							#print "inield =$interval< and microield =$microields[$submin]<\n  ";
-							$microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
-						}
-					}
-					$fields[$startcord] = $microleftlen+1;
-					$fields[$endcord] = $fields[$startcord] + $miclen -1;
-					$fields[$typecord] = $microsat_types[length($motields[$mic])];
-					$fields[$microsatcord] = $microields[$mic];
-					$fields[$motifcord] = $motields[$mic];
-					my $templine = join("\t", (@fields[0 .. $motifcord]) );
-					my $orig_templine = join("\t", (@fields[0 .. $motifcord]) );
-					my $newline;
-					my $lefter = 1; my $righter = 1;
-					if ( $fields[$startcord] < 2){$lefter = 0;}
-					if ($fields[$endcord] == $seqlen){$righter = 0;}
-
-					while($lefter == 1){
-						$newline = left_extender($templine, $seq,$org);
-#						print "returned line from left extender= $newline \n" if $printer == 1;
-						if ($newline eq $templine){$templine = $newline; last;}
-						else {$templine = $newline;}
-
-						if (left_extention_permission_giver($templine) eq "no") {last;}
-					}
-					while($righter == 1){
-						$newline = right_extender($templine, $seq,$org);
-#						print "returned line from right extender= $newline \n" if $printer == 1;
-						if ($newline eq $templine){$templine = $newline; last;}
-						else {$templine = $newline;}
-						if (right_extention_permission_giver($templine) eq "no") {last;}
-					}
-					my @tempfields = split(/\t/,$templine);
-					$tempfields[$microsatcord] =~ s/\]|\[//g;
-					$tempfields[$motifcord] =~ s/^\[|\]$//gs;
-					my @tempmotields = split(/\]\[/,$tempfields[$motifcord]);
-
-					if (scalar(@tempmotields) == 1 && $templine eq $orig_templine) {
-#						print "scalar ( tempmotields) = 1\n" if $printer == 1;
-						next;
-					}
-					my $prevmotif = shift(@tempmotields);
-					my $stopper = 0;
-
-					foreach my $tempmot (@tempmotields){
-						if (length($tempmot) != length($prevmotif)) {$stopper = 1; last;}
-						my $search = $prevmotif.$prevmotif;
-						if ($search !~ /$tempmot/) {$stopper = 1; last;}
-						$prevmotif = $tempmot;
-					}
-					if ( $stopper == 1) {
-#						print "length tempmot  != length prevmotif\n" if $printer == 1;
-						next;
-					}
-					my $lastend  = 0;
-					#----------------------------------------------------------
-					my $left_captured = (); my $right_captured = ();
-					my $left_bp = (); my $right_bp = ();
-	#				print "new startcord = $tempfields[$startcord] , new endcord  = $tempfields[$endcord].. orig strts = @orig_starts and orig ends = @orig_ends\n";
-					for my $o (0 ... $#orig_starts){
-#						print "we are talking abut tempstart:$tempfields[$startcord] >= origstart:$lastend && tempstart:$tempfields[$startcord] <= origend: $orig_ends[$o] \n" if $printer == 1;
-#						print "we are talking abut tempend:$tempfields[$endcord] >= origstart:$lastend && tempstart:$tempfields[$endcord] >= origend: $orig_ends[$o] \n" if $printer == 1;
-
-						if (($tempfields[$startcord] > $lastend)  && ($tempfields[$startcord] <= $orig_ends[$o])){ # && ($tempfields[$startcord] != $fields[$startcord])
-#							print "motif captured on left is $microields[$o] from $microsat\n" if $printer == 1;
-							$left_captured  = $o;
-							$left_bp =  $orig_ends[$o] - $tempfields[$startcord] + 1;
-						}
-						elsif ($tempfields[$endcord] > $lastend  && $tempfields[$endcord] <= $orig_ends[$o]){ #&& $tempfields[$endcord] != $fields[$endcord])
-#							print "motif captured on right is $microields[$o] from $microsat\n" if $printer == 1;
-							$right_captured  = $o;
-							$right_bp = $tempfields[$endcord]  - $orig_starts[$o] + 1;
-						}
-						$lastend = $orig_ends[$o]
-					}
-#					print "leftcaptured = $left_captured, right = $right_captured\n" if $printer==1;
-					my $leftmotif = (); my $left_trashed = ();
-					if ($tempfields[$startcord] != $fields[$startcord]) {
-						$leftmotif = $motields[$left_captured];
-#						print "$left_captured in @microields: $motields[$left_captured]\n" if $printer == 1;
-						if ( $left_captured !~ /[0-9]+/) {print $line,"\n", $templine,"\n"; }
-						 $left_trashed = length($microields[$left_captured]) - $left_bp;
-					}
-					my $rightmotif = (); my $right_trashed = ();
-					if ($tempfields[$endcord] != $fields[$endcord]) {
-#						print "$right_captured in @microields: $motields[$right_captured]\n" if $printer == 1;
-						$rightmotif = $motields[$right_captured];
-						$right_trashed = length($microields[$right_captured]) - $right_bp;
-					}
-
-					########## P A R A M S #####################@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
-					$stopper = 0;
-					my $deletioner = 0;
-					#if($tempfields[$startcord] != $fields[$startcord]){
-#						print "enter left: tempfields,startcord  : $tempfields[$startcord] != $absolutstart && left_captured: $left_captured != 0 \n" if $printer==1;
-						if ($left_captured != 0){
-#							print "at line 370, going: 0 ... $left_captured-1 \n" if $printer == 1;
-							for my $e (0 ... $left_captured-1){
-								if( length($motields[$e]) > 2 && length($microields[$e]) > (3* length($motields[$e]) )){
-#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;
-									$deletioner++; last;
-								}
-								if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
-#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;
-									$deletioner++; last;
-								}
-								if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
-#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;
-									$deletioner++; last;
-								}
-							}
-						}
-					#}
-#					print "after left search, deletioner = $deletioner\n" if $printer == 1;
-					if ($deletioner >= 1) {
-#						print "deletioner = $deletioner\n" if $printer == 1;
-						next;
-					}
-
-					$deletioner = 0;
-
-					#if($tempfields[$endcord] != $fields[$endcord]){
-#						print "if tempfields endcord: $tempfields[$endcord] != absolutend: $absolutend\n and $right_captured != $#microields\n" if $printer==1;
-						if ($right_captured != $#microields){
-#							print "at line 394, going: $right_captured+1 ... $#microields \n" if $printer == 1;
-							for my $e ($right_captured+1 ... $#microields){
-								if( length($motields[$e]) > 2 &&  length($microields[$e]) > (3* length($motields[$e])) ){
-#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
-									$deletioner++; last;
-								}
-								if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
-#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
-									$deletioner++; last;
-								}
-								if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
-#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
-									$deletioner++; last;
-								}
-							}
-						}
-					#}
-#					print "deletioner = $deletioner\n" if $printer == 1;
-					if ($deletioner >= 1) {
-						next;
-					}
-					my $leftMotifs_notCaptured = ();
-					my $rightMotifs_notCaptured = ();
-
-					if ($tempfields[$startcord] != $fields[$startcord] ){
-						#print "in left params: (length($leftmotif) == 1 && $tempfields[$startcord] != $fields[$startcord]) ... and .... $left_trashed > (1.5* length($leftmotif]) && ($tempfields[$startcord] != $fields[$startcord])\n";
-						if (length($leftmotif) == 1 && $left_trashed > 3){
-#							print "invaded left motif is long mononucleotide" if $printer == 1;
-							 next;
-
-						}
-						elsif ((length($leftmotif) != 1 && $left_trashed > ( thrashallow($leftmotif)) && ($tempfields[$startcord] != $fields[$startcord]) ) ){
-#							print "invaded left motif too long" if $printer == 1;
-							 next;
-						}
-					}
-					if ($tempfields[$endcord] != $fields[$endcord] ){
-						#print "in right params: after $tempfields[$endcord] != $fields[$endcord]  .....   (length($rightmotif)==1 && $tempfields[$endcord] != $fields[$endcord]) ... and ... $right_trashed > (1.5* length($rightmotif))\n";
-						if (length($rightmotif)==1 && $right_trashed){
-#							print "invaded right motif is long mononucleotide" if $printer == 1;
-							 next;
-
-						}
-						elsif (length($rightmotif) !=1 && ($right_trashed > ( thrashallow($rightmotif))  && $tempfields[$endcord] != $fields[$endcord])){
-#							print "invaded right motif too long" if $printer == 1;
-							 next;
-
-						}
-					}
-					push @outputs, $templine;
-				}
-				if (scalar(@outputs) == 0){ print COMP $line; next;}
-	#			print "outputs are:", join("\n",@outputs),"\n";
-				if (scalar(@outputs) == 1){
-					my @oields = split(/\t/,$outputs[0]);
-					my $start = $oields[$startcord]+$mields[$startcord]-1;
-					my $end = $start+($oields[$endcord]-$oields[$startcord]);
-					$oields[$startcord] = $start; $oields[$endcord] = $end;
-					print INT join("\t",@oields), "\n";
-				#	print CH $line,;
-				}
-				if (scalar(@outputs) > 1){
-					my $motif_min = 10;
-					my $chosen_one = $outputs[0];
-					foreach my $micro (@outputs){
-						my @oields = split(/\t/,$micro);
-						my $tempmotif = $oields[$motifcord];
-						$tempmotif =~ s/^\[|\]$//gs;
-						my @omots = split(/\]\[/, $tempmotif);
-			#			print "motif_min  = $motif_min, current motif  = $tempmotif\n";
-						my $start = $oields[$startcord]+$mields[$startcord]-1;
-						my $end = $start+($oields[$endcord]-$oields[$startcord]);
-						$oields[$startcord] = $start; $oields[$endcord] = $end;
-						if(length($omots[0]) < $motif_min) {
-							$chosen_one = join("\t",@oields);
-							$motif_min = length($omots[0]);
-						}
-					}
-					print INT $chosen_one, "\n";
-				#	print "chosen one is ".$chosen_one, "\n";
-				#	print CH $line;
-
-
-				}
-
-			}
-
-		}	#if (exists $micros{$key}){
-		else{
-		}
-	}
-	close INT;
-	close COMP;
-}
-sub left_extender{
-	#print "left extender\n";
-	my ($line, $seq, $org) = @_;
-#	print "in left extender... line passed = $line and sequence is $seq\n";
-	chomp $line;
-	my @fields = split(/\t/,$line);
-	my $rstart = $fields[$startcord];
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/\[|\]//g;
-	my $rend = $rstart + length($microsat)-1;
-	$microsat =~ s/-//g;
-	my $motif = $fields[$motifcord];
-	my $firstmotif = ();
-
-	if ($motif =~ /^\[/){
-		$motif =~ s/^\[//g;
-		$motif =~ /([a-zA-Z]+)\].*/;
-		$firstmotif = $1;
-	}
-	else {$firstmotif = $motif;}
-
-	#print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n";
-	my $leftphase = substr($microsat, 0,length($firstmotif));
-	my $phaser = $leftphase.$leftphase;
-	my @phase = split(/\s*/,$leftphase);
-	my @phases;
-	my @copy_phases = @phases;
-	my $crawler=0;
-	for (0 ... (length($leftphase)-1)){
-		push(@phases, substr($phaser, $crawler, length($leftphase)));
-		$crawler++;
-	}
-
-	my $start = $rstart;
-	my $end = $rend;
-
-	my $leftseq = substr($seq, 0, $start);
-#	print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n";
-	my @extentions = ();
-	my @trappeds = ();
-	my @intervalposs = ();
-	my @trappedposs = ();
-	my @trappedphases = ();
-	my @intervals = ();
-	my $firstmotif_length = length($firstmotif);
-	foreach my $phase (@phases){
-#		print "left phase\t",substr($leftseq, -10),"\t$phase\n";
-#		print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n";
-		if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
-#			print "in left pattern\n";
-			my $trapped = $1;
-			my $trappedpos = length($leftseq)-length($trapped);
-			my $interval = $3;
-			my $intervalpos = index($trapped, $interval) + 1;
-#			print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n";
-
-			my $extention = substr($trapped, 0, length($trapped)-length($interval));
-			my $leftpeep = substr($seq, 0, ($start-length($trapped)));
-			my @passed_overhangs;
-
-			for my $i (1 ... length($phase)-1){
-				my $overhang = substr($phase, -length($phase)+$i);
-#				print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n";
-				#TEMPORARY... BETTER METHOD NEEDED
-				$leftpeep =~ s/-//g;
-				if ($leftpeep =~ /$overhang$/i){
-					push(@passed_overhangs,$overhang);
-#					print "l overhang\n";
-				}
-			}
-
-			if(scalar(@passed_overhangs)>0){
-				my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
-				$extention = $overhang.$extention;
-				$trapped = $overhang.$trapped;
-				#print "trapped extended to $trapped \n";
-				$trappedpos = length($leftseq)-length($trapped);
-			}
-
-			push(@extentions,$extention);
-#			print "extentions = @extentions \n";
-
-			push(@trappeds,$trapped );
-			push(@intervalposs,length($extention)+1);
-			push(@trappedposs, $trappedpos);
-#			print "trappeds = @trappeds\n";
-			push(@trappedphases, substr($extention,0,length($phase)));
-			push(@intervals, $interval);
-		}
-	}
-	if (scalar(@trappeds == 0)) {return $line;}
-
-	my $nikaal = shortest_array_element(@intervals);
-
-	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
-	$fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
-	##print "new fields 9 = $fields[9]\n";
-	$fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);
-
-	if($fields[$microsatcord] !~ /^\[/i){
-		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
-	}
-
-	$fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
-
-	if (exists ($fields[$motifcord+1])){
-		$fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
-	}
-	else{$fields[$motifcord+1] = "indel/deletion";}
-	##print "new fields 14 = $fields[14]\n";
-
-	if (exists ($fields[$motifcord+2])){
-		$fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
-	}
-	else{$fields[$motifcord+2] =  $intervals[$nikaal];}
-	my @seventeen=();
-	if (exists ($fields[$motifcord+3])){
-		@seventeen = split(/,/,$fields[$motifcord+3]);
-	#	#print "scalarseventeen =@seventeen<-\n";
-		for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
-		$fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
-		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
-	}
-
-	else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
-
-	##print "new fields 16 = $fields[16]\n";
-	##print "new fields 17 = $fields[17]\n";
-
-
-	my $returnline =  join("\t",@fields);
-	my $pastline  = $returnline;
-	if ($fields[$microsatcord] =~ /\[/){
-		$returnline = multiSpecies_compoundClarifyer_merge($returnline);
-	}
-	return $returnline;
-}
-sub right_extender{
-	my ($line, $seq, $org) = @_;
-	chomp $line;
-	my @fields = split(/\t/,$line);
-	my $rstart = $fields[$startcord];
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/\[|\]//g;
-	my $rend = $rstart + length($microsat)-1;
-	$microsat =~ s/-//g;
-	my $motif = $fields[$motifcord];
-	my $temp_lastmotif = ();
-
-	if ($motif =~ /\]$/s){
-		$motif =~ s/\]$//sg;
-		$motif =~ /.*\[([a-zA-Z]+)/;
-		$temp_lastmotif = $1;
-	}
-	else {$temp_lastmotif = $motif;}
-	my $lastmotif = substr($microsat,-length($temp_lastmotif));
-	##print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n";
-	my $rightphase = substr($microsat, -length($lastmotif));
-	my $phaser = $rightphase.$rightphase;
-	my @phase = split(/\s*/,$rightphase);
-	my @phases;
-	my @copy_phases = @phases;
-	my $crawler=0;
-	for (0 ... (length($rightphase)-1)){
-		push(@phases, substr($phaser, $crawler, length($rightphase)));
-		$crawler++;
-	}
-
-	my $start = $rstart;
-	my $end = $rend;
-
-	my $rightseq = substr($seq, $end+1);
-	my @extentions = ();
-	my @trappeds = ();
-	my @intervalposs = ();
-	my @trappedposs = ();
-	my @trappedphases = ();
-	my @intervals = ();
-	my $lastmotif_length = length($lastmotif);
-	foreach my $phase (@phases){
-		if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
-			my $trapped = $1;
-			my $trappedpos = $end+1;
-			my $interval = $2;
-			my $intervalpos = index($trapped, $interval) + 1;
-
-			my $extention = substr($trapped, length($interval));
-			my $rightpeep = substr($seq, ($end+length($trapped))+1);
-			my @passed_overhangs = "";
-
-			#TEMPORARY... BETTER METHOD NEEDED
-			$rightpeep =~ s/-//g;
-
-			for my $i (1 ... length($phase)-1){
-				my $overhang = substr($phase,0, $i);
-#				#print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n";
-				if ($rightpeep =~ /^$overhang/i){
-					push(@passed_overhangs, $overhang);
-#					#print "r overhang\n";
-				}
-			}
-			if (scalar(@passed_overhangs) > 0){
-				my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
-				$extention = $extention.$overhang;
-				$trapped = $trapped.$overhang;
-#				#print "trapped extended to $trapped \n";
-			}
-
-			push(@extentions,$extention);
-			##print "extentions = @extentions \n";
-
-			push(@trappeds,$trapped );
-			push(@intervalposs,$intervalpos);
-			push(@trappedposs, $trappedpos);
-#			#print "trappeds = @trappeds\n";
-			push(@trappedphases, substr($extention,0,length($phase)));
-			push(@intervals, $interval);
-		}
-	}
-	if (scalar(@trappeds == 0)) {return $line;}
-
-#	my $nikaal = longest_array_element(@trappeds);
-	my $nikaal = shortest_array_element(@intervals);
-
-#	#print "longest element found = $nikaal \n";
-
-	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
-	$fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
-	##print "new fields 9 = $fields[9]";
-	$fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);
-
-	##print "new fields 11 = $fields[11]\n";
-
-	if($fields[$microsatcord] !~ /^\[/i){
-		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
-	}
-
-	$fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
-	##print "new fields 12 = $fields[12]\n";
-
-	##print "scalar of fields = ",scalar(@fields),"\n";
-	if (exists ($fields[$motifcord+1])){
-#		print " print fields = @fields.. scalar=", scalar(@fields),".. motifcord+1 = $motifcord + 1 \n " if !exists $fields[$motifcord+1];
-#		<STDIN> if !exists $fields[$motifcord+1];
-		$fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
-	}
-	else{$fields[$motifcord+1] = "indel/deletion";}
-	##print "new fields 14 = $fields[14]\n";
-
-	if (exists ($fields[$motifcord+2])){
-		$fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
-	}
-	else{$fields[$motifcord+2] =  $intervals[$nikaal];}
-	##print "new fields 15 = $fields[15]\n";
-
-	my @seventeen=();
-	if (exists ($fields[$motifcord+3])){
-		##print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n";
-#		print " print fields = @fields\n " if !exists $fields[$motifcord+3];
-		<STDIN> if !exists $fields[$motifcord+3];
-		my $currpos = length($microsat)+$intervalposs[$nikaal];
-		$fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
-		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
-
-	}
-
-	else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
-
-	##print "new fields 16 = $fields[16]\n";
-
-	##print "new fields 17 = $fields[17]\n";
-	my $returnline = join("\t",@fields);
-	my $pastline  = $returnline;
-	if ($fields[$microsatcord] =~ /\[/){
-		$returnline = multiSpecies_compoundClarifyer_merge($returnline);
-	}
-	#print "finally right-extended line = ",$returnline,"\n";
-	return $returnline;
-}
-sub longest_array_element{
-	my $counter = 0;
-	my($max) = shift(@_);
-	my $maxcounter = 0;
-    foreach my $temp (@_) {
-    	$counter++;
-    	#print "finding largest array: $maxcounter \n" if $prinkter == 1;
-    	if(length($temp) > length($max)){
-        	$max = $temp;
-        	$maxcounter = $counter;
-        }
-    }
-    return($maxcounter);
-}
-sub shortest_array_element{
-	my $counter = 0;
-	my($min) = shift(@_);
-	my $mincounter = 0;
-    foreach my $temp (@_) {
-    	$counter++;
-    	#print "finding largest array: $mincounter \n" if $prinkter == 1;
-    	if(length($temp) < length($min)){
-        	$min = $temp;
-        	$mincounter = $counter;
-        }
-    }
-    return($mincounter);
-}
-
-
-sub left_extention_permission_giver{
-	my @fields = split(/\t/,$_[0]);
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/(^\[)|-//g;
-	my $motif = $fields[$motifcord];
-	my $firstmotif = ();
-	my $firststretch = ();
-	my @stretches=();
-	if ($motif =~ /^\[/){
-		$motif =~ s/^\[//g;
-		$motif =~ /([a-zA-Z]+)\].*/;
-		$firstmotif = $1;
-		@stretches = split(/\]/,$microsat);
-		$firststretch = $stretches[0];
-		##print "firststretch = $firststretch\n";
-	}
-	else {$firstmotif = $motif;$firststretch = $microsat;}
-
-	if (length($firststretch) < $thresholds[length($firstmotif)]){
-		return "no";
-	}
-	else {return "yes";}
-
-}
-sub right_extention_permission_giver{
-	my @fields = split(/\t/,$_[0]);
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/-|(\]$)//sg;
-	my $motif = $fields[$motifcord];
-	my $temp_lastmotif = ();
-	my $laststretch = ();
-	my @stretches=();
-
-
-	if ($motif =~ /\]/){
-		$motif =~ s/\]$//gs;
-		$motif =~ /.*\[([a-zA-Z]+)$/;
-		$temp_lastmotif = $1;
-		@stretches = split(/\[/,$microsat);
-		$laststretch = pop(@stretches);
-		##print "last stretch = $laststretch\n";
-	}
-	else {$temp_lastmotif = $motif; $laststretch = $microsat;}
-
-	if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
-		return "no";
-	}
-	else { return "yes";}
-
-
-}
-sub multiSpecies_compoundClarifyer_merge{
-	my $line = $_[0];
-	#print "sent for mering: $line \n";
-	my @mields = split(/\t/,$line);
-	my @fields = @mields;
-	my $microsat = $fields[$microsatcord];
-	my $motifline = $fields[$motifcord];
-	my $microsatcopy = $microsat;
-	$microsatcopy =~ s/^\[|\]$//sg;
-	my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
-	my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
-	shift @inields;
-	#print "inields =@inields<\n";
-	$motifline =~ s/^\[|\]$//sg;
-	my @motields = split(/\]\[/,$motifline);
-	my @firstmotifs = ();
-	my @lastmotifs = ();
-	for my $i  (0 ... $#microields){
-		$firstmotifs[$i] =  substr($microields[$i],0,length($motields[$i]));
-		$lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
-	}
-	#print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n";
-	my @mergelist = ();
-	my @inter_poses = split(/,/,$fields[$interr_poscord]);
-	my $no_of_interruptions = $fields[$no_of_interruptionscord];
-	my @interruptions = split(/,/,$fields[$interrcord]);
-	my @interrtypes = split(/,/,$fields[$interrtypecord]);
-	my $stopper = 0;
-	for my $i (0 ... $#motields-1){
-		#print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n";
-		if (($lastmotifs[$i] eq $firstmotifs[$i+1]) && !exists $inields[$i]){
-			$stopper = 1;
-			push(@mergelist, ($i)."_".($i+1));
-		}
-	}
-
-	return $line if scalar(@mergelist) == 0;
-
-	foreach my $merging (@mergelist){
-		my @sets = split(/_/, $merging);
-		my @tempmicro = ();
-		my @tempmot = ();
-		for my $i (0 ... $sets[0]-1){
-			push(@tempmicro, "[".$microields[$i]."]");
-			push(@tempmicro, $inields[$i]);
-			push(@tempmot, "[".$motields[$i]."]");
-			#print "adding pre-motifs number $i\n";
-		}
-		my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
-		push (@tempmicro, $pusher);
-		push(@tempmot, "[".$motields[$sets[0]]."]");
-		my $outcoming = -2;
-		for my $i ($sets[1]+1 ... $#microields-1){
-			push(@tempmicro, "[".$microields[$i]."]");
-			push(@tempmicro, $inields[$i]);
-			push(@tempmot, "[".$motields[$i]."]");
-			#print "adding post-motifs number $i\n";
-			$outcoming  = $i;
-		}
-		if ($outcoming != -2){
-			#print "outcoming = $outcoming \n";
-			push(@tempmicro, "[".$microields[$outcoming+1 ]."]");
-			push(@tempmot,"[". $motields[$outcoming+1]."]");
-		}
-		$fields[$microsatcord] = join("",@tempmicro);
-		$fields[$motifcord] = join("",@tempmot);
-
-		splice(@interrtypes, $sets[0], 1);
-		$fields[$interrtypecord] = join(",",@interrtypes);
-		splice(@interruptions, $sets[0], 1);
-		$fields[$interrcord] = join(",",@interruptions);
-		splice(@inter_poses, $sets[0], 1);
-		$fields[$interr_poscord] = join(",",@inter_poses);
-		$no_of_interruptions = $no_of_interruptions - 1;
-	}
-
-	if ($no_of_interruptions == 0){
-		$fields[$microsatcord] =~ s/^\[|\]$//sg;
-		$fields[$motifcord] =~ s/^\[|\]$//sg;
-		$line = join("\t", @fields[0 ... $motifcord]);
-	}
-	else{
-		$line = join("\t", @fields);
-	}
-	return $line;
-}
-
-sub thrashallow{
-	my $motif = $_[0];
-	return 4 if length($motif) == 2;
-	return 6 if length($motif) == 3;
-	return 8 if length($motif) == 4;
-
-}
-
-#xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx
-sub multispecies_filtering_compound_microsats{
-	my $unfiltered = $_[0];
-	my $filtered = $_[1];
-	my $residue = $_[2];
-	my $no_of_species = $_[5];
-	open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
-	open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
-	open(RES,">$residue") or die "Cannot open file $residue: $!";
-
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-
-	my @sub_thresholds = ("0");
-	push(@sub_thresholds, split(/_/,$_[3]));
-	my @thresholds = ("0");
-	push(@thresholds, split(/_/,$_[4]));
-
-	while (my $line = <UNF>) {
-		if ($line !~ /compound/){
-			print FIL $line,"\n"; next;
-		}
-		chomp $line;
-		my @fields = split(/\t/,$line);
-		my $motifline = $fields[$motifcord];
-		$motifline =~ s/^\[|\]$//g;
-		my @motifs = split(/\]\[/,$motifline);
-		my $microsat = $fields[$microsatcord];
-		$microsat =~ s/^\[|\]$|-//g;
-		my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);
-
-		my $stopper = 0;
-		for my $i (0 ... $#motifs){
-			my @common = ();
-			my $probe = $motifs[$i].$motifs[$i];
-			my $motif_size = length($motifs[$i]);
-
-			for my $j (0 ... $#motifs){
-				next if length($motifs[$i]) != length($motifs[$j]);
-				push(@common, length($microsats[$j])) if $probe =~ /$motifs[$j]/i;
-			}
-
-			if (largest_microsat(@common) < $sub_thresholds[$motif_size]) {$stopper = 1; last;}
-			else {next;}
-		}
-
-		if ($stopper  == 1){
-			print RES $line,"\n";
-		}
-		else { print FIL $line,"\n"; }
-	}
-	close FIL;
-	close RES;
-}
-
-#xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx
-
-sub chromosome_unrand_breaker{
-#	print "IN chromosome_unrand_breaker: @_\n ";
-	my $input1 = $_[0];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
-	my $dir = $_[1]; ###### directory where subsets are put
-	my $output2 = $_[2]; ###### list of subset files
-	my $increment = $_[3];
-	my $info = $_[4];
-	my $chr = $_[5];
-	open(SEQ,"<$input1") or die "Cannot open file $input1 $!";
-
-	open(OUT,">$output2") or die "Cannot open file $output2 $!";
-
-	#---------------------------------------------------------------------------------------------------
-	# NOW READING THE SEQUENCE FILE
-
-	my $seed = 0;
-	my $subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
-	print OUT $subset,"\n";
-	open(SUB,">$subset");
-
-	while(my $sine = <SEQ>){
-		$seed++;
-		print SUB $sine;
-
-		if ($seed%$increment == 0 ){
-			close SUB;
-			$subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
-			open(SUB,">$subset");
-			print SUB $sine;
-			print OUT $subset,"\n";
-	#		print $subset,"\n";
-		}
-	}
-	close OUT;
-	close SUB;
-}
-#xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx
-sub multiSpecies_interruptedMicrosatHunter{
-#	print "IN multiSpecies_interruptedMicrosatHunter: @_\n";
-	my $input1 = $_[0];  ###### the *_sput_op4_ii file
-	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
-	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
-	my $output2 = $_[3]; ###### uninterrupted microsatellite file
-	my $org = $_[4];
-	my $no_of_species = $_[5];
-
-	my @thresholds = "0";
-	push(@thresholds, split(/_/,$_[6]));
-
-#	print "thresholds = @thresholds \n";
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$typecord = 2 + (4*$no_of_species) + 1 - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
-
-	$interr_poscord = $motifcord + 3;
-	$no_of_interruptionscord = $motifcord + 4;
-	$interrcord = $motifcord + 2;
-	$interrtypecord = $motifcord + 1;
-
-
-	$prinkter = 0;
-#	print "prionkytet = $prinkter\n";
-
-	open(IN,"<$input1") or die "Cannot open file $input1 $!";
-	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
-
-	open(INT,">$output1") or die "Cannot open file $output2 $!";
-	open(UNINT,">$output2") or die "Cannot open file $output2 $!";
-
-#	print "opened files !!\n";
-	my $linecounter = 0;
-	my $microcounter = 0;
-
-	my %micros = ();
-	while (my $line = <IN>){
-	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
-		$linecounter++;
-		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s+(chr[0-9a-zA-Z]+)\s([0-9]+)\s+([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $2, $3, $4, $5);
-		#	print $key, "#-#-#-#-#-#-#-#\n" if $prinkter == 1;
-			push (@{$micros{$key}},$line);
-			$microcounter++;
-		}
-		else {#print $line if $prinkter == 1;
-		}
-	}
-#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
-	close IN;
-	my @deletedlines = ();
-#	print "done hash \n";
-	$linecounter = 0;
-	#---------------------------------------------------------------------------------------------------
-	# NOW READING THE SEQUENCE FILE
-	while(my $sine = <SEQ>){
-		#print $linecounter,"\n" if $linecounter % 1000 == 0;
-		my %microstart=();
-		my %microend=();
-		my @sields = split(/\t/,$sine);
-		my $key = ();
-		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			$key = join("\t",$1, $2, $3, $4, $5);
-	#		print $key, "<-<-<-<-<-<-<-<\n";
-		}
-
-	#	$prinkter = 1 if $sine =~ /^>H\t499\t/;
-
-		if (exists $micros{$key}){
-			my @microstring = @{$micros{$key}};
-			delete $micros{$key};
-			my @filteredmicrostring;
-#			print "sequence = $sields[$sequencepos]" if $prinkter == 1;
-			foreach my $line (@microstring){
-				$linecounter++;
-				my $copy_line = $line;
-				my @fields = split(/\t/,$line);
-				my $start = $fields[$startcord];
-				my $end = $fields[$endcord];
-
-#				print $line if $prinkter == 1;
-				#LOOKING FOR LEFTWARD EXTENTION OF MICROSATELLITE
-				my $newline;
-				while(1){
-				#	print "\n before left sequence = $sields[$sequencepos]\n" if $prinkter == 1;
-					if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
-
-					$newline = multiSpecies_interruptedMicrosatHunter_left_extender($line, $sields[$sequencepos],$org);
-					if ($newline eq $line){$line = $newline; last;}
-					else {$line = $newline;}
-
-					if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
-#					print "returned line from left extender= $line \n" if $prinkter == 1;
-				}
-				while(1){
-				#	print "sequence = $sields[$sequencepos]\n" if $prinkter == 1;
-					if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
-
-					$newline = multiSpecies_interruptedMicrosatHunter_right_extender($line, $sields[$sequencepos],$org);
-					if ($newline eq $line){$line = $newline; last;}
-					else {$line = $newline;}
-
-					if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
-#					print "returned line from right extender= $line \n" if $prinkter == 1;
-				}
-#				print "\n>>>>>>>>>>>>>>>>\n In the end, the line is: \n$line\n<<<<<<<<<<<<<<<<\n" if $prinkter == 1;
-
-				my @tempfields = split(/\t/,$line);
-				if ($tempfields[$microsatcord] =~ /\[/){
-					print INT $line,"\n";
-				}
-				else{
-					print UNINT $line,"\n";
-				}
-
-				if ($line =~ /NULL/){ next; }
-				push(@filteredmicrostring, $line);
-				push (@{$microstart{$start}},$line);
-				push (@{$microend{$end}},$line);
-			}
-
-			my $firstflag = 'down';
-
-		}	#if (exists $micros{$key}){
-	}
-	close INT;
-	close UNINT;
-#	print "final number of lines = $linecounter\n";
-}
-
-sub multiSpecies_interruptedMicrosatHunter_left_extender{
-	my ($line, $seq, $org) = @_;
-#	print "left extender, like passed = $line\n" if $prinkter == 1;
-#	print "in left extender... line passed = $line and sequence is $seq\n" if $prinkter == 1;
-	chomp $line;
-	my @fields = split(/\t/,$line);
-	my $rstart = $fields[$startcord];
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/\[|\]//g;
-	my $rend = $rstart + length($microsat)-1;
-	$microsat =~ s/-//g;
-	my $motif = $fields[$motifcord];
-	my $firstmotif = ();
-
-	if ($motif =~ /^\[/){
-		$motif =~ s/^\[//g;
-		$motif =~ /([a-zA-Z]+)\].*/;
-		$firstmotif = $1;
-	}
-	else {$firstmotif = $motif;}
-
-#	print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n" if $prinkter == 1;
-	my $leftphase = substr($microsat, 0,length($firstmotif));
-	my $phaser = $leftphase.$leftphase;
-	my @phase = split(/\s*/,$leftphase);
-	my @phases;
-	my @copy_phases = @phases;
-	my $crawler=0;
-	for (0 ... (length($leftphase)-1)){
-		push(@phases, substr($phaser, $crawler, length($leftphase)));
-		$crawler++;
-	}
-
-	my $start = $rstart;
-	my $end = $rend;
-
-	my $leftseq = substr($seq, 0, $start);
-#	print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n" if $prinkter == 1;
-	my @extentions = ();
-	my @trappeds = ();
-	my @intervalposs = ();
-	my @trappedposs = ();
-	my @trappedphases = ();
-	my @intervals = ();
-	my $firstmotif_length = length($firstmotif);
-	foreach my $phase (@phases){
-#		print "left phase\t",substr($leftseq, -10),"\t$phase\n" if $prinkter == 1;
-#		print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n" if $prinkter == 1;
-		if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
-#			print "in left pattern\n" if $prinkter == 1;
-			my $trapped = $1;
-			my $trappedpos = length($leftseq)-length($trapped);
-			my $interval = $3;
-			my $intervalpos = index($trapped, $interval) + 1;
-#			print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n" if $prinkter == 1;
-
-			my $extention = substr($trapped, 0, length($trapped)-length($interval));
-			my $leftpeep = substr($seq, 0, ($start-length($trapped)));
-			my @passed_overhangs;
-
-			for my $i (1 ... length($phase)-1){
-				my $overhang = substr($phase, -length($phase)+$i);
-#				print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n" if $prinkter == 1;
-				#TEMPORARY... BETTER METHOD NEEDED
-				$leftpeep =~ s/-//g;
-				if ($leftpeep =~ /$overhang$/i){
-					push(@passed_overhangs,$overhang);
-#					print "l overhang\n" if $prinkter == 1;
-				}
-			}
-
-			if(scalar(@passed_overhangs)>0){
-				my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
-				$extention = $overhang.$extention;
-				$trapped = $overhang.$trapped;
-#				print "trapped extended to $trapped \n" if $prinkter == 1;
-				$trappedpos = length($leftseq)-length($trapped);
-			}
-
-			push(@extentions,$extention);
-#			print "extentions = @extentions \n" if $prinkter == 1;
-
-			push(@trappeds,$trapped );
-			push(@intervalposs,length($extention)+1);
-			push(@trappedposs, $trappedpos);
-#			print "trappeds = @trappeds\n" if $prinkter == 1;
-			push(@trappedphases, substr($extention,0,length($phase)));
-			push(@intervals, $interval);
-		}
-	}
-	if (scalar(@trappeds == 0)) {return $line;}
-
-############################	my $nikaal = longest_array_element(@trappeds);
-	my $nikaal = shortest_array_element(@intervals);
-
-#	print "longest element found = $nikaal \n" if $prinkter == 1;
-
-	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
-	$fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
-	#print "new fields 9 = $fields[9]\n" if $prinkter == 1;
-	$fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);
-
-	#print "new fields 9 = $fields[9]\n" if $prinkter == 1;
-
-	if($fields[$microsatcord] !~ /^\[/i){
-		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
-	}
-
-	$fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
-	#print "new fields 14 = $fields[12]\n" if $prinkter == 1;
-
-	#print "scalar of fields = ",scalar(@fields),"\n" if $prinkter == 1;
-
-
-	if (scalar(@fields) > $motifcord+1){
-		$fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
-	}
-	else{$fields[$motifcord+1] = "indel/deletion";}
-	#print "new fields 14 = $fields[14]\n" if $prinkter == 1;
-
-	if (scalar(@fields)>$motifcord+2){
-		$fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
-	}
-	else{$fields[$motifcord+2] =  $intervals[$nikaal];}
-	#print "new fields 15 = $fields[15]\n" if $prinkter == 1;
-
-	my @seventeen=();
-
-	if (scalar(@fields)>$motifcord+3){
-		@seventeen = split(/,/,$fields[$motifcord+3]);
-	#	print "scalarseventeen =@seventeen<-\n" if $prinkter == 1;
-		for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
-		$fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
-		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
-	}
-
-	else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
-
-	#print "new fields 16 = $fields[16]\n" if $prinkter == 1;
-	#print "new fields 17 = $fields[17]\n" if $prinkter == 1;
-
-#	return join("\t",@fields);
-	my $returnline = join("\t",@fields);
-	my $pastline  = $returnline;
-	if ($fields[$microsatcord] =~ /\[/){
-		$returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
-	}
-#	print "finally left-extended line = ",$returnline,"\n" if $prinkter == 1;
-	return $returnline;
-}
-
-sub multiSpecies_interruptedMicrosatHunter_right_extender{
-#	print "right extender\n" if $prinkter == 1;
-	my ($line, $seq, $org) = @_;
-#	print "in right extender... line passed = $line\n" if $prinkter == 1;
-#	print "line = $line, sequence = ",$seq, "\n" if $prinkter == 1;
-	chomp $line;
-	my @fields = split(/\t/,$line);
-	my $rstart = $fields[$startcord];
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/\[|\]//g;
-	my $rend = $rstart + length($microsat)-1;
-	$microsat =~ s/-//g;
-	my $motif = $fields[$motifcord];
-	my $temp_lastmotif = ();
-
-	if ($motif =~ /\]$/){
-		$motif =~ s/\]$//g;
-		$motif =~ /.*\[([a-zA-Z]+)/;
-		$temp_lastmotif = $1;
-	}
-	else {$temp_lastmotif = $motif;}
-	my $lastmotif = substr($microsat,-length($temp_lastmotif));
-#	print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n" if $prinkter == 1;
-	my $rightphase = substr($microsat, -length($lastmotif));
-	my $phaser = $rightphase.$rightphase;
-	my @phase = split(/\s*/,$rightphase);
-	my @phases;
-	my @copy_phases = @phases;
-	my $crawler=0;
-	for (0 ... (length($rightphase)-1)){
-		push(@phases, substr($phaser, $crawler, length($rightphase)));
-		$crawler++;
-	}
-
-	my $start = $rstart;
-	my $end = $rend;
-
-	my $rightseq = substr($seq, $end+1);
-#	print "length of sequence  = " ,length($seq), "the coordinate to start from = ", $end+1, "\n" if $prinkter == 1;
-#	print "right phases are @phases , end = $end right sequence = ",substr($rightseq,0,10),"\n" if $prinkter == 1;
-	my @extentions = ();
-	my @trappeds = ();
-	my @intervalposs = ();
-	my @trappedposs = ();
-	my @trappedphases = ();
-	my @intervals = ();
-	my $lastmotif_length = length($lastmotif);
-	foreach my $phase (@phases){
-#		print "right phase\t$phase\t",substr($rightseq,0,10),"\n" if $prinkter == 1;
-#		print "search patter = (([a-zA-Z|-]{0,$lastmotif_length})($phase)+) \n" if $prinkter == 1;
-		if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
-#			print "in right pattern\n" if $prinkter == 1;
-			my $trapped = $1;
-			my $trappedpos = $end+1;
-			my $interval = $2;
-			my $intervalpos = index($trapped, $interval) + 1;
-#			print "trapped = $trapped, interval = $interval\n" if $prinkter == 1;
-
-			my $extention = substr($trapped, length($interval));
-			my $rightpeep = substr($seq, ($end+length($trapped))+1);
-			my @passed_overhangs = "";
-
-			#TEMPORARY... BETTER METHOD NEEDED
-			$rightpeep =~ s/-//g;
-
-			for my $i (1 ... length($phase)-1){
-				my $overhang = substr($phase,0, $i);
-#				print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n" if $prinkter == 1;
-				if ($rightpeep =~ /^$overhang/i){
-					push(@passed_overhangs, $overhang);
-#					print "r overhang\n" if $prinkter == 1;
-				}
-			}
-			if (scalar(@passed_overhangs) > 0){
-				my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
-				$extention = $extention.$overhang;
-				$trapped = $trapped.$overhang;
-#				print "trapped extended to $trapped \n" if $prinkter == 1;
-			}
-
-			push(@extentions,$extention);
-			#print "extentions = @extentions \n" if $prinkter == 1;
-
-			push(@trappeds,$trapped );
-			push(@intervalposs,$intervalpos);
-			push(@trappedposs, $trappedpos);
-#			print "trappeds = @trappeds\n" if $prinkter == 1;
-			push(@trappedphases, substr($extention,0,length($phase)));
-			push(@intervals, $interval);
-		}
-	}
-	if (scalar(@trappeds == 0)) {return $line;}
-
-###################################	my $nikaal = longest_array_element(@trappeds);
-	my $nikaal = shortest_array_element(@intervals);
-
-#	print "longest element found = $nikaal \n" if $prinkter == 1;
-
-	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
-	$fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
-	$fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);
-
-
-	if($fields[$microsatcord] !~ /^\[/i){
-		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
-	}
-
-	$fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
-
-
-	if (scalar(@fields) > $motifcord+1){
-		$fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
-	}
-	else{$fields[$motifcord+1] = "indel/deletion";}
-
-	if (scalar(@fields)>$motifcord+2){
-		$fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
-	}
-	else{$fields[$motifcord+2] =  $intervals[$nikaal];}
-
-	my @seventeen=();
-	if (scalar(@fields)>$motifcord+3){
-		#print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n" if $prinkter == 1;
-		my $currpos = length($microsat)+$intervalposs[$nikaal];
-		$fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
-		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
-
-	}
-
-	else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
-
-#	print "finally right-extended line = ",join("\t",@fields),"\n" if $prinkter == 1;
-#	return join("\t",@fields);
-
-	my $returnline = join("\t",@fields);
-	my $pastline  = $returnline;
-	if ($fields[$microsatcord] =~ /\[/){
-		$returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
-	}
-#	print "finally right-extended line = ",$returnline,"\n" if $prinkter == 1;
-	return $returnline;
-
-}
-
-sub multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver{
-	my @fields = split(/\t/,$_[0]);
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/(^\[)|-//sg;
-	my $motif = $fields[$motifcord];
-	chomp $motif;
-#	print $motif, "\n" if $motif !~ /^\[/;
-	my $firstmotif = ();
-	my $firststretch = ();
-	my @stretches=();
-
-#	print "motif = $motif, microsat = $microsat\n" if $prinkter == 1;
-	if ($motif =~ /^\[/){
-		$motif =~ s/^\[//sg;
-		$motif =~ /([a-zA-Z]+)\].*/;
-		$firstmotif = $1;
-		@stretches = split(/\]/,$microsat);
-		$firststretch = $stretches[0];
-		#print "firststretch = $firststretch\n" if $prinkter == 1;
-	}
-	else {$firstmotif = $motif;$firststretch = $microsat;}
-#	print "if length:firststretch - length($firststretch) < threshes length :firstmotif ($firstmotif) - $thresholds[length($firstmotif)]\n" if $prinkter == 1;
-	if (length($firststretch) < $thresholds[length($firstmotif)]){
-		return "no";
-	}
-	else {return "yes";}
-
-}
-sub multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver{
-	my @fields = split(/\t/,$_[0]);
-	my $microsat = $fields[$microsatcord];
-	$microsat =~ s/-|(\]$)//sg;
-	my $motif = $fields[$motifcord];
-	chomp $motif;
-	my $temp_lastmotif = ();
-	my $laststretch = ();
-	my @stretches=();
-
-
-	if ($motif =~ /\]/){
-		$motif =~ s/\]$//sg;
-		$motif =~ /.*\[([a-zA-Z]+)$/;
-		$temp_lastmotif = $1;
-		@stretches = split(/\[/,$microsat);
-		$laststretch = pop(@stretches);
-		#print "last stretch = $laststretch\n" if $prinkter == 1;
-	}
-	else {$temp_lastmotif = $motif; $laststretch = $microsat;}
-
-	if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
-		return "no";
-	}
-	else { return "yes";}
-
-
-}
-sub checking_substitutions{
-
-	my ($line, $seq, $startprobes, $endprobes) = @_;
-	#print "sequence = $seq \n" if $prinkter == 1;
-	#print "COMMAND  = \n $line, \n $seq, \n $startprobes \n, $endprobes\n";
-		#		<STDIN>;
-	my @seqarray = split(/\s*/,$seq);
-	my @startsubst_probes = split(/\|/,$startprobes);
-	my @endsubst_probes = split(/\|/,$endprobes);
-	chomp $line;
-	my @fields = split(/\t/,$line);
-	my $start = $fields[11] - $fields[10];
-	my $end = $fields[13] - $fields[10];
-	my $motif = $fields[9]; #IN FUTURE, USE THIS AS A PROBE, LIKE MOTIF = $FIELDS[9].$FIELDS[9]
-	$motif =~ s/\[|\]//g;
-	my $microsat = $fields[14];
-	$microsat =~ s/\[|\]//g;
-	#------------------------------------------------------------------------
-	# GETTING START AND END PHASES
-	my $startphase = substr($microsat,0, length($motif));
-	my $endphase = substr($microsat,-length($motif), length($motif));
-	#print "start and end phases are  - $startphase and $endphase\n";
-	my $startflag = 'down';
-	my $endflag = 'down';
-	my $substitution_distance = length($motif);
-	my $prestart = $start - $substitution_distance;
-	my $postend = $end + $substitution_distance;
-	my @endadds = ();
-	my @startadds = ();
-		if (($prestart < 0) || ($postend > scalar(@seqarray))) {
-			last;
-		}
-	#------------------------------------------------------------------------#------------------------------------------------------------------------
-	# CHECKING FOR SUBSTITUTION PROBES NOW
-
-	if ($fields[8] ne "mononucleotide"){
-		while ($startflag eq "down"){
-			my $search = join("",@seqarray[$prestart...($start-1)]);
-			#print "search is from $prestart...($start-1) = $search\n";
-			foreach my $probe (@startsubst_probes){
-				#print "\t\tprobe = $probe\n";
-				if ($search =~ /^$probe/){
-					#print "\tfound addition to the left - $search \n";
-					my $copyprobe = $probe;
-					my $type;
-					my $subspos = 0;
-					my $interruption = "";
-					if ($search eq $startphase) { $type = "NONE";}
-					else{
-						$copyprobe =~ s/\[a-zA-Z\]/^/g;
-						$subspos = index($copyprobe,"^") + 1;
-						$type = "substitution";
-						$interruption  = substr($search, $subspos,1);
-					}
-					my $addinfo = join("\t",$prestart, $start, $search, $type, $interruption, $subspos);
-					#print "adding information: $addinfo \n";
-					push(@startadds, $addinfo);
-					$prestart = $prestart - $substitution_distance;
-					$start = $start-$substitution_distance;
-					$startflag = 'down';
-
-					last;
-				}
-				else{
-					$startflag = 'up';
-				}
-			}
-		}
-		#<STDIN>;
-		while ($endflag eq "down"){
-			my $search = join("",@seqarray[($end+1)...$postend]);
-			#print "search is from ($end+1)...$postend] = $search\n";
-
-			foreach my $probe (@endsubst_probes){
-				#print "\t\tprobe = $probe\n";
-				if ($search =~ /$probe$/){
-					my $copyprobe = $probe;
-					my $type;
-					my $subspos = 0;
-					my $interruption = "";
-					if ($search eq $endphase) { $type = "NONE";}
-					else{
-						$copyprobe =~ s/\[a-zA-Z\]/^/g;
-						$subspos = index($copyprobe,"^") + 1;
-						$type = "substitution";
-						$interruption  = substr($search, $subspos,1);
-					}
-					my $addinfo = join("\t",$end, $postend, $search, $type, $interruption, $subspos);
-					#print "adding information: $addinfo \n";
-					push(@endadds, $addinfo);
-					$postend = $postend + $substitution_distance;
-					$end = $end+$substitution_distance;
-					push(@endadds, $search);
-					$endflag = 'down';
-					last;
-				}
-				else{
-					$endflag = 'up';
-				}
-			}
-		}
-		#print "startadds = @startadds, endadds  = @endadds \n";
-
-	}
-}
-sub microsat_packer{
-	my $microsat = $_[0];
-	my $addition = $_[1];
-
-
-
-}
-sub multiSpecies_interruptedMicrosatHunter_merge{
-		$prinkter = 0;
-#	print "~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~\n";
-	my $line = $_[0];
-#	print "sent for mering: $line \n" if $prinkter ==1;
-	my @mields = split(/\t/,$line);
-	my @fields = @mields;
-	my $microsat = allCaps($fields[$microsatcord]);
-	my $motifline = allCaps($fields[$motifcord]);
-	my $microsatcopy = $microsat;
-#	print "microsat = $microsat|\n" if $prinkter ==1;
-	$microsatcopy =~ s/^\[|\]$//sg;
-	chomp $microsatcopy;
-	my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
-	my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
-	shift @inields;
-#	print "inields =",join("|",@inields)," microields = ",join("|",@microields)," and count of microields = ", $#microields,"\n" if $prinkter ==1;
-	$motifline =~ s/^\[|\]$//sg;
-	my @motields = split(/\]\[/,$motifline);
-	my @firstmotifs = ();
-	my @lastmotifs = ();
-	for my $i  (0 ... $#microields){
-		$firstmotifs[$i] =  substr($microields[$i],0,length($motields[$i]));
-		$lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
-	}
-#	print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n" if $prinkter ==1;
-	my @mergelist = ();
-	my @inter_poses = split(/,/,$fields[$interr_poscord]);
-	my $no_of_interruptions = $fields[$no_of_interruptionscord];
-	my @interruptions = split(/,/,$fields[$interrcord]);
-	my @interrtypes = split(/,/,$fields[$interrtypecord]);
-	my $stopper = 0;
-	for my $i (0 ... $#motields-1){
-#		print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n:$lastmotifs[$i] eq $firstmotifs[$i+1]?\n" if $prinkter ==1;
-		if ((allCaps($lastmotifs[$i]) eq allCaps($firstmotifs[$i+1])) && (!exists $inields[$i] || $inields[$i] !~ /[a-zA-Z]/)){
-			$stopper = 1;
-			push(@mergelist, ($i)."_".($i+1)); #<STDIN> if $prinkter ==1;
-		}
-	}
-
-#	print "mergelist = @mergelist\n" if $prinkter ==1;
-	return $line if scalar(@mergelist) == 0;
-#	print "merging @mergelist\n" if $prinkter ==1;
-#	<STDIN> if $prinkter ==1;
-
-	foreach my $merging (@mergelist){
-		my @sets = split(/_/, $merging);
-#		print "sets = @sets\n" if $prinkter ==1;
-		my @tempmicro = ();
-		my @tempmot = ();
-#		print "for loop going from 0 ... ", $sets[0]-1, "\n" if $prinkter ==1;
-		for my $i (0 ... $sets[0]-1){
-#			print " adding pre- i = $i adding: microields= $microields[$i]. motields = $motields[$i], inields = |$inields[$i]|\n" if $prinkter ==1;
-			push(@tempmicro, "[".$microields[$i]."]");
-			push(@tempmicro, $inields[$i]);
-			push(@tempmot, "[".$motields[$i]."]");
-#			print "adding pre-motifs number $i\n" if $prinkter ==1;
-#			print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
-		}
-#		print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
-#		print "now pushing ", "[",$microields[$sets[0]]," and ",$microields[$sets[1]],"]\n" if $prinkter ==1;
-		my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
-#		print "middle is, from @motields -   @sets, number 0 which is  is\n";
-#		print ": $motields[$sets[0]]\n";
-		push (@tempmicro, $pusher);
-		push(@tempmot, "[".$motields[$sets[0]]."]");
-		push (@tempmicro, $inields[$sets[1]]) if $sets[1] != $#microields && exists $sets[1] && exists $inields[$sets[1]];
-		my $outcoming = -2;
-#		print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
-#		print "for loop going from ",$sets[1]+1, " ... ", $#microields, "\n" if $prinkter ==1;
-		for my $i ($sets[1]+1 ... $#microields){
-#			print " adding post- i = $i adding: microields= $microields[$i]. motields = $motields[$i]\n" if $prinkter ==1;
-			push(@tempmicro, "[".$microields[$i]."]") if exists $microields[$i];
-			push(@tempmicro, $inields[$i]) unless $i == $#microields || !exists $inields[$i];
-			push(@tempmot, "[".$motields[$i]."]");
-#			print "adding post-motifs number $i\n" if $prinkter ==1;
-			$outcoming  = $i;
-		}
-#		print "____________________________________________________________________________\n";
-		$prinkter = 0;
-		$fields[$microsatcord] = join("",@tempmicro);
-		$fields[$motifcord] = join("",@tempmot);
-#		print "tempmot = @tempmot, tempmicro = @tempmicro . microsat = $fields[$microsatcord] and motif = $fields[$motifcord] \n" if $prinkter ==1;
-
-		splice(@interrtypes, $sets[0], 1);
-		$fields[$interrtypecord] = join(",",@interrtypes);
-		splice(@interruptions, $sets[0], 1);
-		$fields[$interrcord] = join(",",@interruptions);
-		splice(@inter_poses, $sets[0], 1);
-		$fields[$interr_poscord] = join(",",@inter_poses);
-		$no_of_interruptions = $no_of_interruptions - 1;
-	}
-
-	if ($no_of_interruptions == 0 && $line !~ /compound/){
-		$fields[$microsatcord] =~ s/^\[|\]$//sg;
-		$fields[$motifcord] =~ s/^\[|\]$//sg;
-		$line = join("\t", @fields[0 ... $motifcord]);
-	}
-	else{
-		$line = join("\t", @fields);
-	}
-#	print "post merging, the line is $line\n" if $prinkter ==1;
-	#<STDIN> if $stopper ==1;
-	return $line;
-}
-sub interval_asseser{
-	my $pre_phase = $_[0]; my $post_phase = $_[1]; my $inter = $_[3];
-}
-#---------------------------------------------------------------------------------------------------
-sub allCaps{
-	my $motif = $_[0];
-	$motif =~ s/a/A/g;
-	$motif =~ s/c/C/g;
-	$motif =~ s/t/T/g;
-	$motif =~ s/g/G/g;
-	return $motif;
-}
-
-
-#xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  chromosome_unrand_breamultiSpecies_interruptedMicrosatHunterker xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx
-sub merge_interruptedMicrosats{
-#	print "IN merge_interruptedMicrosats: @_\n";
-	my $input0 = $_[0];  ######looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
-	my $input1 = $_[1];  ###### the *_sput_op4_ii file
-	my $input2 = $_[2];  ###### the *_sput_op4_ii file
-	$no_of_species = $_[3];
-
-	my $output1 = $_[1]."_separate";    #$_[3]; ###### plain microsatellite file forward
-	my $output2 = $_[2]."_separate";    ##$_[4]; ###### plain microsatellite file reverse
-	my $output3 = $_[1]."_merged";    ##$_[5]; ###### plain microsatellite file forward
-	#my $output4 = $_[2]."_merged";    ##$_[6]; ###### plain microsatellite file reverse
-	#my $info = $_[4];
-	#my @tags = split(/\t/,$info);
-
-	open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
-	open(INF,"<$input1") or die "Cannot open file $input1 $!";
-	open(INR,"<$input2") or die "Cannot open file $input2 $!";
-	open(OUTF,">$output1") or die "Cannot open file $output1 $!";
-	open(OUTR,">$output2") or die "Cannot open file $output2 $!";
-	open(MER,">$output3") or die "Cannot open file $output3 $!";
-	#open(MERR,">$output4") or die "Cannot open file $output4 $!";
-
-
-
-	 $printer = 0;
-
-#	print "files opened \n";
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-	$typecord = $infocord + 1;
-	my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
-
-	$interrtypecord = $motifcord + 1;
-	$interrcord = $motifcord + 2;
-	$interr_poscord = $motifcord + 3;
-	$no_of_interruptionscord = $motifcord + 4;
-	$mergestarts  = $no_of_interruptionscord+ 1;
-	$mergeends = $no_of_interruptionscord+ 2;
-	$mergemicros = $no_of_interruptionscord+ 3;
-
-	# NOW ADDING FORWARD MICROSATELLITES TO HASH
-	my %fmicros = ();
-	my $microcounter=0;
-	my $linecounter = 0;
-	while (my $line = <INF>){
-	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
-		$linecounter++;
-		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $2, $4, $5);
-		#	print $key, "#-#-#-#-#-#-#-#\n";
-			push (@{$fmicros{$key}},$line);
-			$microcounter++;
-		}
-		else {print $line;}
-	}
-#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
-	close INF;
-	my @deletedlines = ();
-#	print "done forward hash \n";
-	$linecounter = 0;
-	#---------------------------------------------------------------------------------------------------
-	# NOW ADDING REVERSE MICROSATELLITES TO HASH
-	my %rmicros = ();
-	$microcounter=0;
-	while (my $line = <INR>){
-	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
-		$linecounter++;
-		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $2, $4, $5);
-	#		print $key, "#-#-#-#-#-#-#-#\n";
-			push (@{$rmicros{$key}},$line);
-			$microcounter++;
-		}
-		else {print "cant make key\n";}
-	}
-#	print "number of reverse microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
-	close INR;
-#	print "done reverse hash \n";
-	$linecounter = 0;
-
-	#------------------------------------------------------------------------------------------------
-
-	while(my $sine = <SEQ>){
-		#<STDIN> if $sine =~ /16349128/;
-		next if $sine !~ /[a-zA-Z0-9]/;
-#		print "-" x 150, "\n"  if $printer == 1;
-		my @sields = split(/\t/,$sine);
-		my @merged = ();
-
-		my $key = ();
-
-		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			$key = join("\t",$1, $2, $4, $5);
-		#	print $key, "<-<-<-<-<-<-<-<\n";
-		}
-	#	print "key = $key\n";
-
-		my @sets1;
-		my @sets2;
-		chomp $sields[$sequencepos];
-		my $rev_sequence = reverse($sields[$sequencepos]);
-		$rev_sequence =~ s/ //g;
-		$rev_sequence = " ".$rev_sequence;
-		next if (!exists $fmicros{$key} && !exists $rmicros{$key});
-
-		if (exists $fmicros{$key}){
-		#	print "line no : $linecount\n";
-			my @raw_microstring = @{$fmicros{$key}};
-			my %starts = (); my %ends = ();
-#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
-			my @microstring=();
-			for my $u (0 ... $#raw_microstring){
-				my @tields = split(/\t/,$raw_microstring[$u]);
-				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
-				push(@microstring, $raw_microstring[$u]);
-				$starts{$tields[$startcord]} = $tields[$startcord];
-				$ends{$tields[$endcord]} = $tields[$endcord];
-			}
-
-	#		print "founf microstring in forward\n: @microstring\n";
-			chomp @microstring;
-			my $clusterresult = (find_clusters(@microstring, $sields[$sequencepos]));
-			@sets1 = split("\=", $clusterresult);
-			my @temp = split(/_/,$sets1[0]) ; $microscanned+= scalar(@temp);
-		#	print "sets = ", join("<all\nmerged>", @sets1), "\n<<-sets1\n"; <STDIN>;
-		}	#if (exists $micros{$key}){
-
-		if (exists $rmicros{$key}){
-		#	print "line no : $linecount\n";
-			my @raw_microstring = @{$rmicros{$key}};
-			my %starts = (); my %ends = ();
-#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
-			my @microstring=();
-			for my $u (0 ... $#raw_microstring){
-				my @tields = split(/\t/,$raw_microstring[$u]);
-				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
-				push(@microstring, $raw_microstring[$u]);
-				$starts{$tields[$startcord]} = $tields[$startcord];
-				$ends{$tields[$endcord]} = $tields[$endcord];
-			}
-	#		print "founf microstring in reverse\n: @microstring\n"; <STDIN>;
-			chomp @microstring;
-	#		print "sending reversed sequence\n";
-			my $clusterresult = (find_clusters(@microstring, $rev_sequence ) );
-			@sets2 = split("\=", $clusterresult);
-			my @temp = split(/_/,$sets2[0]) ; $microscanned+= scalar(@temp);
-		}	#if (exists $micros{$key}){
-
-		my @popout1 = ();
-		my @popout2 = ();
-		my @forwardset = ();
-		if (exists $sets2[1] ){
-			if(exists $sets1[0]) {
-				push (@popout1, $sets1[0],$sets2[1]);
-				my @forwardset = split("=", popOuter(@popout1, $rev_sequence ));#
-				print OUTF join("\n",split("_", $forwardset[0])), "\n";
-				my @localmerged = split("_", $forwardset[1]);
-				my $sequence = $sields[$sequencepos];
-				$sequence =~ s/ //g;
-				for my $j (0 ... $#localmerged){
-					$localmerged[$j] = 	invert_justCoordinates ($localmerged[$j], length($sequence));
-				}
-
-				push (@merged, @localmerged);
-
-			}
-			else{
-				my @localmerged = split("_", $sets2[1]);
-				my $sequence = $sields[$sequencepos];
-				$sequence =~ s/ //g;
-				for my $j (0 ... $#localmerged){
-					$localmerged[$j] = 	invert_justCoordinates ($localmerged[$j], length($sequence));
-				}
-
-				push (@merged, @localmerged);
-			}
-		}
-		elsif (exists $sets1[0]){
-			print OUTF join("\n",split("_", $sets1[0])), "\n";
-		}
-
-		my @reverseset= ();
-		if (exists $sets1[1]){
-			if (exists $sets2[0]){
-				push (@popout2, $sets2[0],$sets1[1]);
-			#	print "popout2 = @popout2\n";
-				my @reverseset = split("=", popOuter(@popout2, $sields[$sequencepos]));
-				#print "reverseset = $reverseset[1] < --- reverseset1\n";
-				print OUTR join("\n",split("_", $reverseset[0])), "\n";
-				push(@merged,  (split("_", $reverseset[1])));
-			}
-			else{
-				push(@merged,  (split("_", $sets1[1])));
-			}
-		}
-		elsif (exists $sets2[0]){
-			print OUTR join("\n",split("_", $sets2[0])), "\n";
-
-		}
-
-		if (scalar @merged > 0){
-			my @filtered_merged = split("__",(filterDuplicates_merged(@merged)));
-			print MER join("\n", @filtered_merged),"\n";
-		}
-	#		<STDIN> if $sine =~ /16349128/;
-
-	}
-	close(SEQ);
-	close(INF);
-	close(INR);
-	close(OUTF);
-	close(OUTR);
-	close(MER);
-
-}
-sub find_clusters{
-	my @input = @_;
-	my $sequence = pop(@input);
-	$sequence =~ s/ //g;
-	my @microstring0 = @input;
-#	print "IN: find_clusters:\n";
-	my %microstart=();
-	my %microend=();
-	my @nonmerged = ();
-	my @mergedSet = ();
-#		print "set of microsats = @microstring \n";
-	my @microstring = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @microstring0;
-#	print "microstring = ", join("\n",@microstring0) ," \n---->\n", join("\n", @microstring),"\n ,,+." if $printer == 1;
-	#<STDIN> if $printer == 1;
-	my @tempmicrostring = @microstring;
-	foreach my $line (@tempmicrostring){
-		my @fields = split(/\t/,$line);
-		my $start = $fields[$startcord];
-		my $end = $fields[$endcord];
-		next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/;
-	#		print " starts >>> start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";
-		push (@{$microstart{$start}},$line);
-		push (@{$microend{$end}},$line);
-	}
-	my $firstflag = 'down';
-	while( my $line =shift(@microstring)){
-#		print "-----------\nline = $line \n" if $printer == 1;
-		chomp $line;
-		my @fields = split(/\t/,$line);
-		my $start = $fields[$startcord];
-		my $end = $fields[$endcord];
-		next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/ || $distance !~ /[0-9]+/ ;
-		my $startmicro = $line;
-		my $endmicro = $line;
-#		print "start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";
-
-		delete ($microstart{$start});
-		delete ($microend{$end});
-		my $flag = 'down';
-		my $startflag = 'down';
-		my $endflag = 'down';
-		my $prestart = $start - $distance;
-		my $postend = $end + $distance;
-		my @compoundlines = ();
-		my %compoundhash = ();
-		push (@compoundlines, $line);
-		push (@{$compoundhash{$line}},$line);
-		my $startrank = 1;
-		my $endrank = 1;
-
-		while( ($startflag eq "down") || ($endflag eq "down") ){
-#			print "prestart=$prestart, post end =$postend.. seqlen =", length($sequence)," firstflag = $firstflag \n" if $printer == 1;
-			if ( (($prestart < 0) && $firstflag eq "up") || (($postend > length($sequence) && $firstflag eq "up")) ){
-#				print "coming to the end of sequence,post end = $postend and sequence length =", length($sequence)," so exiting\n" if $printer == 1;
-				last;
-			}
-
-			$firstflag = "up";
-			if ($startflag eq "down"){
-				for my $i ($prestart ... $end){
-					if(exists $microend{$i}){
-						chomp $microend{$i}[0];
-						if(exists $compoundhash{$microend{$i}[0]}) {next;}
-							chomp $microend{$i}[0];
-							push(@compoundlines, $microend{$i}[0]);
-							my @tields = split(/\t/,$microend{$i}[0]);
-							$startmicro = $microend{$i}[0];
-							chomp $startmicro;
-							$flag = 'down';
-							$startrank++;
-#							print "deleting $microend{$i}[0] and $microstart{$tields[$startcord]}[0]\n" if $printer == 1;
-							delete $microend{$i};
-							delete $microstart{$tields[$startcord]};
-							$end = $tields[$endcord];
-							$startflag = 'down';
-							$prestart = $tields[$startcord] - $distance;
-							last;
-					}
-					else{
-						$flag = 'up';
-						$startflag = 'up';
-					}
-				}
-			}
-
-			if ($endflag eq "down"){
-
-				for my $i ($start ... $postend){
-#					print "$start ----> $i -----> $postend\n" if $printer == 1;
-					if(exists $microstart{$i} ){
-						chomp $microstart{$i}[0];
-						if(exists $compoundhash{$microstart{$i}[0]}) {next;}
-							chomp $microstart{$i}[0];
-							push(@compoundlines, $microstart{$i}[0]);
-							my @tields = split(/\t/,$microstart{$i}[0]);
-							$endmicro = $microstart{$i}[0];
-							$endrank++;
-							chomp $endmicro;
-							$flag = 'down';
-#							print "deleting $microend{$tields[$endcord]}[0]\n" if $printer == 1;
-
-							delete $microstart{$i} if exists $microstart{$i} ;
-							delete $microend{$tields[$endcord]} if exists $microend{$tields[$endcord]};
-#							print "done\n" if $printer == 1;
-
-							shift @microstring;
-							$end = $tields[$endcord];
-							$postend = $tields[$endcord] + $distance;
-							$endflag = 'down';
-							last;
-					}
-					else{
-						$flag = 'up';
-						$endflag = 'up';
-					}
-#					print "out of the if\n" if $printer == 1;
-				}
-#				print "out of the for\n" if $printer == 1;
-
-			}
-#			print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
-		} 														#end while( $flag eq "down")
-#			print "compoundlines = @compoundlines \n" if $printer == 1;
-
-		if (scalar (@compoundlines) == 1){
-			push(@nonmerged, $line);
-
-		}
-		if (scalar (@compoundlines) > 1){
-#			print "FROM CLUSTERER\n"  if $printer == 1;
-			push(@mergedSet,merge_microsats(@compoundlines, $sequence) );
-		}
-	} #end foreach my $line (@microstring){
-#	print join("\n",@mergedSet),"<-----mergedSet\n"  if $printer == 1;
-#<STDIN> if scalar(@mergedSet) > 0;
-#	print "EXIT: find_clusters\n";
-return (join("_",@nonmerged). "=".join("_",@mergedSet));
-}
-
-sub custom {
-	$a->[$startcord+1] <=> $b->[$startcord+1];
-}
-
-sub popOuter {
-#	print "\nIN: popOuter @_\n";
-	my @all = split ("_",$_[0]);
-#	<STDIN> if !defined $_[0];
-	my @merged = split ("_",$_[1]);
-	my $sequence = $_[2];
-	my $seqlen = length($sequence);
-#	print "\nIN: popOuter @_\n" if scalar(@_) != 3;
-#	<STDIN> if scalar(@_) != 3;
-	my %microstart=();
-	my %microend=();
-	my @mergedSet = ();
-	my @nonmerged = ();
-#	print "\n\n\n all = @all\n<--all\n";
-
-	foreach my $line (@all){
-		my @fields = split(/\t/,$line);
-		my $start = $seqlen - $fields[$startcord]+ 1;
-		my $end = $seqlen - $fields[$endcord] + 1;
-		push (@{$microstart{$start}},$line);
-		push (@{$microend{$end}},$line);
-	}
-	my $firstflag = 'down';
-
-	my %forPopouting = ();
-
-	while( my $line =shift(@merged)){
-#		print "\n MErgedline: $line \n" if $printer == 1;
-		chomp $line;
-		my @fields = split(/\t/,$line);
-		my $start = $fields[$startcord];
-		my $end = $fields[$endcord];
-		my $startmicro = $line;
-		my $endmicro = $line;
-
-		delete ($microstart{$start});
-		delete ($microend{$end});
-		my $flag = 'down';
-		my $startflag = 'down';
-		my $endflag = 'down';
-		my $prestart = $start - $distance;
-		my $postend = $end + $distance;
-		my @compoundlines = ();
-		my %compoundhash = ();
-		push (@compoundlines, $line);
-		my $startrank = 1;
-		my $endrank = 1;
-
-	#	print "\nstart = $start, end = $end\n";
-	#	<STDIN>;
-		for my $i ($start ... $end){
-			if(exists $microend{$i}){
-		#		print "\nmicrosat exists: $microend{$i}[0] microsat exists\n";
-				chomp $microend{$i}[0];
-				my @fields = split(/\t/,$microend{$i}[0]);
-				delete $microstart{$seqlen - $fields[$startcord] + 1};
-				my $invertseq = $sequence;
-				$invertseq =~ s/ //g;
-				push(@compoundlines, invert_microsat($microend{$i}[0] , $invertseq ));
-				delete $microend{$i};
-
-			}
-
-			if(exists $microstart{$i} ){
-		#		print "\nmicrosat exists: $microstart{$i}[0] microsat exists\n";
-
-				chomp $microstart{$i}[0];
-				my @fields = split(/\t/,$microstart{$i}[0]);
-				delete $microend{$seqlen - $fields[$endcord] + 1};
-				my $invertseq = $sequence;
-				$invertseq =~ s/ //g;
-				push(@compoundlines, invert_microsat($microstart{$i}[0], $invertseq) );
-				delete $microstart{$i};
-			}
-		}
-
-		if (scalar (@compoundlines) == 1){
-			push(@mergedSet,join("\t",@compoundlines) );
-		}
-		else {
-#			print "FROM POPOUTER\n" if $printer == 1;
-			push(@mergedSet, merge_microsats(@compoundlines, $sequence) );
-		}
-	}
-
-	foreach my $key (sort keys %microstart) {
-    	push(@nonmerged,$microstart{$key}[0]);
-	}
-
-	return (join("_",@nonmerged). "=".join("_",@mergedSet) );
-}
-
-
-
-sub invert_justCoordinates{
-	my $microsat = $_[0];
-#	print "IN invert_justCoordinates\n" if $printer == 1;
-	chomp $microsat;
-	my $seqLength = $_[1];
-	my @fields = split(/\t/,$microsat);
-	my $start = $seqLength - $fields[$endcord] + 1;
-	my $end = $seqLength - $fields[$startcord] + 1;
-	$fields[$startcord] = $start;
-	$fields[$endcord] = $end;
-	$fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
-#	print "RETURNIG: ", join("\t",@fields), "\n" if $printer == 1;
-	return join("\t",@fields);
-}
-
-sub largest_number{
-	my $counter = 0;
-	my($max) = shift(@_);
-    foreach my $temp (@_) {
-    	#print "finding largest array: $maxcounter \n";
-    	if($temp > $max){
-        	$max = $temp;
-        }
-    }
-    return($max);
-}
-sub smallest_number{
-	my $counter = 0;
-	my($min) = shift(@_);
-    foreach my $temp (@_) {
-    	#print "finding largest array: $maxcounter \n";
-    	if($temp < $min){
-        	$min = $temp;
-        }
-    }
-    return($min);
-}
-
-
-sub filterDuplicates_merged{
-	my @merged = @_;
-	my %revmerged = ();
-	my @fmerged = ();
-	foreach my $micro (@merged) {
-		my @fields = split(/\t/,$micro);
-		if ($fields[3] =~ /chr[A-Z0-9a-z]+r/){
-			my $key = join("_",$fields[1], $fields[$startcord], $fields[$endcord]);
-	#		print "adding  ... \n$key\n$micro\n";
-			push(@{$revmerged{$key}}, $micro);
-		}
-		else{
-		#	print "pushing.. $micro\n";
-			push(@fmerged, $micro);
-		}
-	}
-#	print "\n";
-	foreach my $micro (@fmerged) {
-			my @fields = split(/\t/,$micro);
-			my $key = join("_",$fields[1], $fields[$startcord], $fields[$endcord]);
-	#		print "searching for key $key\n";
-			if (exists $revmerged{$key}){
-		#		print "deleting $revmerged{$key}[0]\n";
-				delete $revmerged{$key};
-			}
-	}
-	foreach my $key (sort keys %revmerged) {
-    	push(@fmerged,$revmerged{$key}[0]);
-	}
-#	print "returning ", join("\n", @fmerged),"\n" ;
-	return join("__", @fmerged);
-}
-
-sub invert_microsat{
-	my $micro = $_[0];
-	chomp $micro;
-	if ($micro =~ /chr[A-Z0-9a-z]+r/) { $micro =~  s/chr([0-9a-b]+)r/chr$1/g ;}
-	else {  $micro =~  s/chr([0-9a-b]+)/chr$1r/g ; }
-	my $sequence = $_[1];
-	$sequence =~ s/ //g;
-	my $seqlen = length($sequence);
-	my @fields = split(/\t/,$micro);
-	my $start = $seqlen - $fields[$endcord] +1;
-	my $end = $seqlen - $fields[$startcord] +1;
-	$fields[$startcord]  = $start;
-	$fields[$endcord] = $end;
-	$fields[$motifcord] = reverse_micro($fields[$motifcord]);
-	$fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
-	if ($fields[$typecord] ne "compound" && exists $fields[$no_of_interruptionscord] ){
-		my @intertypes = split(/,/,$fields[$interrtypecord]);
-		my @inters = split(/,/,$fields[$interrcord]);
-		my @interposes = split(/,/,$fields[$interr_poscord]);
-		$fields[$interrtypecord] = join(",",reverse(@intertypes));
-		$fields[$no_of_interruptionscord] = scalar(@interposes);
-		for my $i (0 ... $fields[$no_of_interruptionscord]-1){
-			if (exists $inters[$i] && $inters[$i] =~ /[a-zA-Z]/){
-				$inters[$i] = reverse($inters[$i]);
-				$interposes[$i] = $interposes[$i] + length($inters[$i]) - 1;
-			}
-			else{
-				$inters[$i] = "";
-				$interposes[$i] = $interposes[$i] - 1;
-			}
-			$interposes[$i] = ($end - $start + 1) - $interposes[$i] + 1;
-		}
-
-		$fields[$interrcord] = join(",",reverse(@inters));
-		$fields[$interr_poscord] = join(",",reverse(@interposes));
-	}
-
-	my $finalmicrosat = join("\t", @fields);
-	return $finalmicrosat;
-
-}
-sub reverse_micro{
-	my $micro = reverse($_[0]);
-	my @strand = split(/\s*/,$micro);
-	for my $i (0 ... $#strand){
-		if ($strand[$i] =~ /\[/i) {$strand[$i] = "]";next;}
-		if ($strand[$i] =~ /\]/i) {$strand[$i] = "[";next;}
-	}
-	return join("",@strand);
-}
-
-#xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx
-
-sub forward_reverse_sputoutput_comparer	{
-#	print "IN forward_reverse_sputoutput_comparer: @_\n";
-	my $input0 = $_[0];  ###### the *nogap_unrand_match file
-	my $input1 = $_[1];  ###### the real file, *sput* data
-	my $input2 = $_[2];  ###### the reverse file, *sput* data
-	my $output1 = $_[3]; ###### microsats different in real file
-	my $output2 = $_[4]; ###### microsats missing in real file
-	my $output3 = $_[5]; ###### microsats common among real and reverse file
-	my $no_of_species = $_[6];
-
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$typecord = 2 + (4*$no_of_species) + 1 - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
-	$interrtypecord = $motifcord + 1;
-	$interrcord = $motifcord + 2;
-	$interr_poscord = $motifcord + 3;
-	$no_of_interruptionscord = $motifcord + 4;
-	$mergestarts  = $no_of_interruptionscord+ 1;
-	$mergeends = $no_of_interruptionscord+ 2;
-	$mergemicros = $no_of_interruptionscord+ 3;
-
-
-	open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
-	open(INF,"<$input1") or die "Cannot open file $input1 $!";
-	open(INR,"<$input2") or die "Cannot open file $input2 $!";
-
-	open(DIFF,">$output1") or die "Cannot open file $output1 $!";
-	#open(MISS,">$output2") or die "Cannot open file $output2 $!";
-	open(SAME,">$output3") or die "Cannot open file $output3 $!";
-
-
-#	print "opened files \n";
-	my $linecounter = 0;
-	my $fcounter = 0;
-	my $rcounter = 0;
-
-	$printer = 0;
-	#---------------------------------------------------------------------------------------------------
-	# NOW ADDING FORWARD MICROSATELLITES TO HASH
-	my %fmicros = ();
-	my $microcounter=0;
-	while (my $line = <INF>){
-		$linecounter++;
-		if ($line =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
-		#	print $key, "#-#-#-#-#-#-#-#\n";
-			push (@{$fmicros{$key}},$line);
-			$microcounter++;
-		}
-		else {
-		#print $line;
-		}
-	}
-#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
-	close INF;
-	my @deletedlines = ();
-#	print "done forward hash \n";
-	$linecounter = 0;
-	#---------------------------------------------------------------------------------------------------
-	# NOW ADDING REVERSE MICROSATELLITES TO HASH
-	my %rmicros = ();
-	$microcounter=0;
-	while (my $line = <INR>){
-		$linecounter++;
-		if ($line =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
-		#	print $key, "#-#-#-#-#-#-#-#\n";
-			push (@{$rmicros{$key}},$line);
-			$microcounter++;
-		}
-		else {}
-	}
-#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
-	close INR;
-#	print "done reverse hash \n";
-	$linecounter = 0;
-	#---------------------------------------------------------------------------------------------------
-	#---------------------------------------------------------------------------------------------------
-	# NOW READING THE SEQUENCE FILE
-	while(my $sine = <SEQ>){
-		my %microstart=();
-		my %microend=();
-		my @sields = split(/\t/,$sine);
-		my $key = ();
-		if ($sine =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			$key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
-		}
-		else {
-			next;
-		}
-		$printer = 0;
-		my $sequence = $sields[$sequencepos];
-		chomp $sequence;
-		$sequence =~ s/ //g;
-		my @localfs = ();
-		my @localrs = ();
-
-		if (exists $fmicros{$key}){
-			@localfs = @{$fmicros{$key}};
-			delete $fmicros{$key};
-		}
-
-		my %forwardstarts = ();
-		my %forwardends = ();
-
-		foreach my $f (@localfs){
-			my @fields = split(/\t/,$f);
-			push (@{$forwardstarts{$fields[$startcord]}},$f);
-			push (@{$forwardends{$fields[$endcord]}},$fields[$startcord]);
-		}
-
-		if (exists $rmicros{$key}){
-			@localrs = @{$rmicros{$key}};
-			delete $rmicros{$key};
-		}
-		else{
-		}
-
-		foreach my $r (@localrs){
-			chomp $r;
-			my @rields = split(/\t/,$r);
-#			print "rields = @rields\n" if $printer == 1;
-			my $reciprocalstart = length($sequence) - $rields[$endcord] + 1;
-			my $reciprocalend = length($sequence) - $rields[$startcord] + 1;
-#			print "reciprocal start = $reciprocalstart = ",length($sequence)," - $rields[$endcord] + 1\n" if $printer == 1;
-			my $microsat = reverse_micro(all_caps($rields[$microsatcord]));
-			my @localcollection=();
-			for my $i ($reciprocalstart+1 ... $reciprocalend-1){
-				if (exists $forwardstarts{$i}){
-					push(@localcollection, $forwardstarts{$i}[0] );
-					delete $forwardstarts{$i};
-				}
-				if (exists $forwardends{$i}){
-					next if !exists $forwardstarts{$forwardends{$i}[0]};
-					push(@localcollection, $forwardstarts{$forwardends{$i}[0]}[0] );
-				}
-			}
-			if (exists $forwardstarts{$reciprocalstart} && exists $forwardends{$reciprocalend}) {push(@localcollection,$forwardstarts{$reciprocalstart}[0]);}
-
-			if (scalar(@localcollection) == 0){
-				print SAME invert_microsat($r,($sequence) ), "\n";
-			}
-
-			elsif (scalar(@localcollection) == 1){
-#				print "f microsat = $localcollection[0]\n"  if $printer == 1;
-				my @lields = split(/\t/,$localcollection[0]);
-				$lields[$microsatcord]=all_caps($lields[$microsatcord]);
-#				print "comparing: $microsat and $lields[$microsatcord]\n" if $printer == 1;
-#				print "coordinates are: $lields[$startcord]-$lields[$endcord] and $reciprocalstart-$reciprocalend\n" if $printer == 1;
-				if ($microsat eq $lields[$microsatcord]){
-					chomp $localcollection[0];
-					print SAME $localcollection[0], "\n";
-				}
-				if ($microsat ne $lields[$microsatcord]){
-					chomp $localcollection[0];
-					my $newmicro = microsatChooser(join("\t",@lields), join("\t",@rields), $sequence);
-#					print "newmicro = $newmicro\n"  if $printer == 1;
-					if ($newmicro =~ /[a-zA-Z]/){
-						print SAME $newmicro,"\n";
-					}
-					else{
-					print DIFF join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n";
-#					print join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n" if $printer == 1;
-#					print "@rields\n@lields\n" if $printer == 1;
-					}
-				}
-			}
-			else{
-#				print "multiple found for $r --> ", join("\t",@localcollection),"\n" if $printer == 1;
-			}
-		}
-	}
-
-	close(SEQ);
-	close(INF);
-	close(INR);
-	close(DIFF);
-	close(SAME);
-
-}
-sub all_caps{
-	my @strand = split(/\s*/,$_[0]);
-	for my $i (0 ... $#strand){
-		if ($strand[$i] =~ /c/) {$strand[$i] = "C";next;}
-		if ($strand[$i] =~ /a/) {$strand[$i] = "A";next;}
-		if ($strand[$i] =~ /t/) { $strand[$i] = "T";next;}
-		if ($strand[$i] =~ /g/) {$strand[$i] = "G";next;}
-	}
-	return join("",@strand);
-}
-sub microsatChooser{
-	my $forward = $_[0];
-	my $reverse = $_[1];
-	my $sequence = $_[2];
-	my $seqLength = length($sequence);
-	$sequence =~ s/ //g;
-	my @fields = split(/\t/,$forward);
-	my @rields = split(/\t/,$reverse);
-	my $r_start = $seqLength - $rields[$endcord] + 1;
-	my $r_end = $seqLength - $rields[$startcord] + 1;
-
-
-	my $f_microsat = $fields[$microsatcord];
-	my $r_microsat = $rields[$microsatcord];
-
-	if ($fields[$typecord] =~ /\./ && $rields[$typecord] =~ /\./) {
-		return $forward if length($f_microsat) >= length($r_microsat);
-		return invert_microsat($reverse, $sequence) if length($f_microsat) < length($r_microsat);
-	}
-	return $forward if all_caps($fields[$motifcord]) eq all_caps($rields[$motifcord]) && $fields[$startcord] == $rields[$startcord] && $fields[$endcord] == $rields[$endcord];
-
-	my $f_microsat_copy = $f_microsat;
-	my $r_microsat_copy = $r_microsat;
-	$f_microsat_copy =~ s/^\[|\]$//g;
-	$r_microsat_copy =~ s/^\[|\]$//g;
-
-	my @f_microields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
-	my @r_microields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
-	my @f_intields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
-	my @r_intields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
-
-	my $f_motif = $fields[$motifcord];
-	my $r_motif = $rields[$motifcord];
-	my $f_motif_copy = $f_motif;
-	my $r_motif_copy = $r_motif;
-	$f_motif_copy =~ s/^\[|\]$//g;
-	$r_motif_copy =~ s/^\[|\]$//g;
-
-	my @f_motields = split(/\]\[/,$f_motif_copy);
-	my @r_motields = split(/\]\[/,$r_motif_copy);
-
-	my $f_purestretch = join("",@f_microields);
-	my $r_purestretch = join("",@r_microields);
-
-	if ($fields[$typecord]=~/nucleotide/ && $rields[$typecord]=~/nucleotide/){
-#		print "now.. studying $forward\n$reverse\n" if $printer == 1;
-		if ($fields[$typecord] eq $rields[$typecord]){
-#			print "comparing motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;
-
-			if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
-				my $subset_answer = isSubset($forward, $reverse, $seqLength);
-#				print "subset answer = $subset_answer\n" if $printer == 1;
-				return $forward if $subset_answer == 1;
-				return invert_microsat($reverse, $sequence) if $subset_answer == 2;
-				return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
-				return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
-				return $forward if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) >= length($r_purestretch);
-				return invert_microsat($reverse, $sequence) if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) < length($r_purestretch);
-				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence) if $subset_answer == 3 ;
-			}
-			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
-				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-			}
-			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
-				return $forward;
-			}
-			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
-				return invert_microsat($reverse, $sequence);
-			}
-		}
-		else{
-			my $fmotlen = ();
-			my $rmotlen = ();
-	 		$fmotlen =1 if $fields[$typecord] eq "mononucleotide";
-	 		$fmotlen =2 if $fields[$typecord] eq "dinucleotide";
-	 		$fmotlen =3 if $fields[$typecord] eq "trinucleotide";
-	 		$fmotlen =4 if $fields[$typecord] eq "tetranucleotide";
-	 		$rmotlen =1 if $rields[$typecord] eq "mononucleotide";
-	 		$rmotlen =2 if $rields[$typecord] eq "dinucleotide";
-	 		$rmotlen =3 if $rields[$typecord] eq "trinucleotide";
-	 		$rmotlen =4 if $rields[$typecord] eq "tetranucleotide";
-
-			if ($fmotlen < $rmotlen){
-				if (abs($fields[$startcord] -  $r_start) <= $fmotlen || abs($fields[$endcord] -  $r_end) <= $fmotlen ){
-					return $forward;
-				}
-				else{
-					return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-				}
-			}
-			if ($fmotlen > $rmotlen){
-				if (abs($fields[$startcord] -  $r_start) <= $rmotlen || abs($fields[$endcord] -  $r_end) <= $rmotlen){
-					return invert_microsat($reverse, $sequence);
-				}
-				else{
-					return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-				}
-			}
-		}
-	}
-	if ($fields[$typecord] eq "compound" && $rields[$typecord] eq "compound"){
-#			print "comparing compound motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;
-			if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
-				my $subset_answer = isSubset($forward, $reverse, $seqLength);
-#				print "subset answer = $subset_answer\n" if $printer == 1;
-				return $forward if $subset_answer == 1;
-				return invert_microsat($reverse, $sequence) if $subset_answer == 2;
-#				print length($f_purestretch) ,">", length($r_purestretch)," \n" if $printer == 1;
-				return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
-				return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
-				if ($subset_answer == 3){
-					if ($fields[$startcord] < $r_start || $fields[$endcord] > $r_end){
-						if (abs($fields[$startcord] -  $r_start) < length($f_motields[0]) || abs($fields[$endcord] -  $r_end)  < length($f_motields[$#f_motields]) ){
-							return $forward;
-						}
-						else{
-							return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-						}
-					}
-					if ($fields[$startcord] > $r_start || $fields[$endcord] < $r_end){
-						if (abs($fields[$startcord] -  $r_start) < length($r_motields[0]) || abs($fields[$endcord] -  $r_end) < length($r_motields[$#r_motields]) ){
-							return invert_microsat($reverse, $sequence);
-						}
-						else{
-							return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-						}
-					}
-				}
-			}
-			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
-				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-			}
-			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
-				return $forward;
-			}
-			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
-				return invert_microsat($reverse, $sequence);
-			}
-
-	}
-
-	if ($fields[$typecord] eq "compound" && $rields[$typecord] =~ /nucleotide/){
-#		print "one compound, one nucleotide\n" if $printer == 1;
-		return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-	}
-	if ($fields[$typecord] =~ /nucleotide/ && $rields[$typecord]eq "compound"){
-#		print "one compound, one nucleotide\n" if $printer == 1;
-		return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
-	}
-}
-
-sub isSubset{
-	my $forward = $_[0]; 	my @fields = split(/\t/,$forward);
-	my $reverse = $_[1];	my @rields = split(/\t/,$reverse);
-	my $seqLength = $_[2];
-	my $r_start = $seqLength - $rields[$endcord] + 1;
-	my $r_end = $seqLength - $rields[$startcord] + 1;
-#	print "we have $fields[$startcord] -> $fields[$endcord] && $r_start -> $r_end\n" if $printer == 1;
-	return "0" if $fields[$startcord] == $r_start && $fields[$endcord] == $r_end;
-	return "1" if $fields[$startcord] <= $r_start && $fields[$endcord] >= $r_end;
-	return "2" if $r_start <= $fields[$startcord] && $r_end >= $fields[$endcord];
-	return "3";
-}
-
-sub motifBYmotif_match{
-	my $forward = $_[0];
-	my $reverse = $_[1];
-	$forward =~ s/^\[|\]$//g;
-	$reverse =~ s/^\[|\]$//g;
-	my @f_motields=split(/\]\[/, $forward);
-	my @r_motields=split(/\]\[/, $reverse);
-	my $finalresult = 0;
-
-	if (scalar(@f_motields) != scalar(@r_motields)){
-		my $subresult = 0;
-		my @mega = (); my @sub = ();
-		@mega = @f_motields if scalar(@f_motields) > scalar(@r_motields);
-		@sub = @f_motields if scalar(@f_motields) > scalar(@r_motields);
-		@mega = @r_motields if scalar(@f_motields) < scalar(@r_motields);
-		@sub = @r_motields if scalar(@f_motields) < scalar(@r_motields);
-
-		for my $i (0 ... $#sub){
-			my $probe = $sub[$i].$sub[$i];
-#			print "probing $probe and $mega[$i]\n" if $printer == 1;
-			if ($probe =~ /$mega[$i]/) {$subresult = 1; }
-			else {$subresult = 0; last; }
-		}
-
-		return 0 if $subresult == 0;
-		return 2 if $subresult == 1 && scalar(@f_motields) > scalar(@r_motields); # r is subset of f
-		return 3 if $subresult == 1 && scalar(@f_motields) < scalar(@r_motields);  # ^reverse
-
-	}
-	else{
-		for my $i (0 ... $#f_motields){
-			my $probe = $f_motields[$i].$f_motields[$i];
-			if ($probe =~ /$r_motields[$i]/) {$finalresult = 1 ;}
-			else {$finalresult = 0 ;last;}
-		}
-	}
-#	print "finalresult = $finalresult\n" if $printer == 1;
-	return $finalresult;
-}
-
-sub merge_microsats{
-	my @input = @_;
-	my $sequence = pop(@input);
-	$sequence =~ s/ //g;
-	my @seq_string = @input;
-#	print "IN: merge_microsats\n";
-#	print "recieved for merging: ", join("\n", @seq_string), "\nsequence = $sequence\n";
-	my $start;
-	my $end;
-	my @micros = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @seq_string;
-#	print "\nrearranged into @micros \n";
-	my (@motifs, @microsats, @interruptiontypes, @interruptions, @interrposes, @no_of_interruptions, @types, @starts, @ends, @mergestart, @mergeend, @mergemicro) = ();
-	my @fields = ();
-	for my $i (0 ... $#micros){
-		chomp $micros[$i];
-		@fields = split(/\t/,$micros[$i]);
-		push(@types, $fields[$typecord]);
-		push(@motifs, $fields[$motifcord]);
-
-		if (exists $fields[$interrtypecord]){ push(@interruptiontypes, $fields[$interrtypecord]);}
-			else { push(@interruptiontypes, "NA"); }
-		if (exists $fields[$interrcord]) {push(@interruptions, $fields[$interrcord]);}
-			else { push(@interruptions, "NA"); }
-		if (exists $fields[$interr_poscord]) { push(@interrposes, $fields[$interr_poscord]);}
-			else { push(@interrposes, "NA"); }
-		if (exists $fields[$no_of_interruptionscord]) {push(@no_of_interruptions, $fields[$no_of_interruptionscord]);}
-			else { push(@no_of_interruptions, "NA"); }
-		if(exists $fields[$mergestarts]) { @mergestart = (@mergestart, split(/\./,$fields[$mergestarts]));}
-			else { push(@mergestart, $fields[$startcord]); }
-		if(exists $fields[$mergeends]) { @mergeend = (@mergeend, split(/\./,$fields[$mergeends]));}
-			else { push(@mergeend, $fields[$endcord]); }
-		if(exists $fields[$mergemicros]) { push(@mergemicro, $fields[$mergemicros]);}
-			else { push(@mergemicro, $fields[$microsatcord]); }
-
-
-	}
-	$start = smallest_number(@mergestart);
-	$end = largest_number(@mergeend);
-	my $microsat_entry = "[".substr( $sequence, $start-1, ($end - $start + 1) )."]";
-	my $microsat = join("\t", @fields[0 ... $infocord], join(".", @types), $start, $fields[$strandcord], $end, $microsat_entry , join(".", @motifs), join(".", @interruptiontypes),join(".", @interruptions),join(".", @interrposes),join(".", @no_of_interruptions), join(".", @mergestart), join(".", @mergeend) , join(".", @mergemicro));
-	return $microsat;
-}
-
-sub slided_microsat{
-	my $forward = $_[0]; 	my @fields = split(/\t/,$forward);
-	my $reverse = $_[1];	my @rields = split(/\t/,$reverse);
-	my $seqLength = $_[2];
-	my $r_start = $seqLength - $rields[$endcord] + 1;
-	my $r_end = $seqLength - $rields[$startcord] + 1;
-	my $motlen =();
-	 $motlen =1 if $fields[$typecord] eq "mononucleotide";
-	 $motlen =2 if $fields[$typecord] eq "dinucleotide";
-	 $motlen =3 if $fields[$typecord] eq "trinucleotide";
-	 $motlen =4 if $fields[$typecord] eq "tetranucleotide";
-
-	if (abs($fields[$startcord] - $r_start) < $motlen || abs($fields[$endcord] - $r_end) < $motlen ) {
-		return 0;
-	}
-	else{
-		return 1;
-	}
-
-}
-
-#xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx
-
-
-
-#xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx
-sub new_multispecies_t10{
-	my $input1 = $_[0];  #gap_op_unrand_match
-	my $input2 = $_[1];  #sput
-	my $output = $_[2];  #output
-	my $bin = $output."_bin";
-	my $orgs = join("|",split(/\./,$_[3]));
-	my @organisms = split(/\./,$_[3]);
-	my $no_of_species = scalar(@organisms); #3
-	my $t10info = $output."_info";
-	$prinkter = 0;
-
-	open (MATCH, "<$input1");
-	open (SPUT, "<$input2");
-	open (OUT, ">$output");
-	open (INFO, ">$t10info");
-
-
-	sub microsat_bracketer;
-	sub custom;
-	my %seen = ();
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$typecord = 2 + (4*$no_of_species) + 1 - 1;
-	$startcord = 2 + (4*$no_of_species) + 2 - 1;
-	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
-	$endcord = 2 + (4*$no_of_species) + 4 - 1;
-	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
-	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
-	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
-	#---------------------------------------------------------------------------------------------------------------#
-	#	MAKING A HASH FROM SPUT, WITH HASH KEYS GENERATED BELOW AND SEQUENCES STORED AS VALUES	#
-	#---------------------------------------------------------------------------------------------------------------#
-	my $linecounter = 0;
-	my $microcounter = 0;
-	while (my $line = <SPUT>){
-		chomp $line;
-	#	print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
-		next if $line !~ /[0-9a-z]+/;
-		$linecounter++;
-	#		my $key = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
-	#		print $key, "#-#-#-#-#-#-#-#\n";
-		if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			my $key = join("\t",$1, $2, $3, $4, $5);
-#			print "key = $key\n" if $prinkter == 1;
-			push (@{$seen{$key}},$line);
-			$microcounter++;
-		}
-		else {		print "could not make ker in SPUT : \n$line \n";
-		}
-	}
-#	print "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
-#	print INFO  "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
-	close SPUT;
-
-	#----------------------------------------------------------------------------------------------------------------
-
-	#-------------------------------------------------------------------------------------------------------#
-	#	THE ENTIRE CODE BELOW IS DEVOTED TO GENERATING HASH KEYS FROM MATCH FOLLOWED BY			#
-	#	USING THESE HASH KEYS TO CORRESPOND EACH SEQUENCE IN FIRST FILE TO ITS MICROSAT REPEATS IN			#
-	#   SECOND FILE FOLLOWED BY																				#
-	#	FINDING THE EXACT LOCATION OF EACH MICROSAT REPEAT WITHIN EACH SEQUENCE USING THE 'index' FUNCTION	#
-	#-------------------------------------------------------------------------------------------------------#
-	my $ref = 0;
-	my $ref2 = 0;
-	my $ref3 = 0;
-	my $ref4 = 0;
-	my $deletes= 0;
-	my $duplicates = 0;
-	my $neighbors = 0;
-	my $tooshort = 0;
-	my $prevmicrol=();
-	my $startnotfound = 0;
-	my $matchkeysformed = 0;
-	my $keysused = 0;
-
-	while (my $line = <MATCH>)	{
-#		print    colored ['magenta'], $line  if $prinkter == 1;
-		next if $line !~ /[a-zA-Z0-9]/;
-		chomp $line;
-		my @fields2 = split(/\t/,$line);
-		my $key2 = ();
-	#		$key2 = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
-		if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
-			$matchkeysformed++;
-			$key2 = join("\t",$1, $2, $3, $4, $5);
-#			print "key = $key2 \n" if $prinkter == 1;
-		}
-		else{
-#			print "could not make ker in SEQ : $line\n";
-			next;
-		}
-		my $sequence = $fields2[$sequencepos];
-		$sequence =~ s/\*/-/g;
-		my $count = 0;
-		if (exists $seen{$key2}){
-			$keysused++;
-			my @unsorted_raw = @{$seen{$key2}};
-			delete $seen{$key2};
-			my @sequencearr = split(/\s*/, $sequence);
-
-#			print "sequencearr = @sequencearr\n" if $prinkter == 1;
-
-			my $counter;
-
-			my %start_database = ();
-			my %end_database = ();
-			foreach my $uns (@unsorted_raw){
-				my @uields = split(/\t/,$uns);
-				$start_database{$uields[$startcord]} = $uns;
-				$end_database{$uields[$endcord]} = $uns;
-			}
-
-			my @unsorted = ();
-			my %starts = (); my %ends = ();
-#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $prinkter == 1; foreach (@unsorted_raw) {print colored ['blue'],$_,"\n" if $prinkter == 1;}
-			for my $u (0 ... $#unsorted_raw){
-				my @tields = split(/\t/,$unsorted_raw[$u]);
-				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
-				push(@unsorted, $unsorted_raw[$u]);
-				$starts{$tields[$startcord]} = $unsorted_raw[$u];
-#				print "in starts : $tields[$startcord] -> $unsorted_raw[$u]\n" if $prinkter == 1;
-			}
-
-			my $basecounter= 0;
-			my $gapcounter = 0;
-			my $poscounter = 0;
-
-			for my $s (@sequencearr){
-
-				$poscounter++;
-				if ($s eq "-"){
-					$gapcounter++; next;
-				}
-				else{
-					$basecounter++;
-				}
-
-
-				#print "s = $s, poscounter = $poscounter, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
-				#print "s = $s, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
-				#print "s = $s, gapcpunter = $gapcounter\n" if $prinkter == 1;
-
-				if (exists $starts{$basecounter}){
-					my $locus = $starts{$basecounter};
-#					print "locus identified = $locus\n" if $prinkter == 1;
-					my @fields3 = split(/\t/,$locus);
-					my $start = $fields3[$startcord];
-					my $end = $fields3[$endcord];
-					my $motif = $fields3[$motifcord];
-					my $microsat = $fields3[$microsatcord];
-					my @leftbracketpos = ();
-					my @rightbracketpos = ();
-					my $bracket_picker = 'no';
-					my $leftbrackets=();
-					my $rightbrackets = ();
-					my $micro_cpy = $microsat;
-#					print "microsat = $microsat\n" if $prinkter == 1;
-					while($microsat =~ m/\[/g) {push(@leftbracketpos, (pos($microsat)));  $leftbrackets = join("__",@leftbracketpos);$bracket_picker='yes';}
-					while($microsat =~ m/\]/g) {push(@rightbracketpos, (pos($microsat))); $rightbrackets = join("__",@rightbracketpos);}
-					$microsat =~ s/[\[\]\-\*]//g;
-#					print "microsat = $microsat\n" if $prinkter == 1;
-					my $human_search = join '-*', split //, $microsat;
-					my $temp = substr($sequence, $poscounter-1);
-#					print "with poscounter = $poscounter\n" if $prinkter == 1;
-					my $search_result = ();
-					my $posnow  = ();
-					while ($temp =~ /($human_search)/gi){
-						$search_result = $1;
-					#	$posnow  = pos($temp);
-						last;
-					}
-
-					my @gapspos = ();
-					while($search_result =~ m/-/g) {push(@gapspos, (pos($search_result))); }
-					my $gaps  = join("__",@gapspos);
-
-					my $final_microsat = $search_result;
-					if ($bracket_picker eq "yes"){
-						$final_microsat = microsat_bracketer($search_result, $gaps,$leftbrackets,$rightbrackets);
-					}
-
-					my $outsentence = join("\t",join ("\t",@fields3[0 ... $infocord]),$fields3[$typecord],$fields3[$motifcord],$gapcounter,$poscounter,$fields3[$strandcord],$poscounter + length($search_result) -1 ,$final_microsat);
-
-					if ($bracket_picker eq "yes") {
-						$outsentence = $outsentence."\t".join("\t",@fields3[($motifcord+1) ... $#fields3]);
-					}
-					print OUT $outsentence,"\n";
-				}
-			}
-		}
-	}
-	my $unusedkeys = scalar(keys %seen);
-	print INFO "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
-	print INFO "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined  in +-10 bp neighboring region: $neighbors \n";
-	print INFO "microsatellites too short = $tooshort\n";
-	print INFO "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
-
-	#print  "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
-	#print  "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined  in +-10 bp neighboring region: $neighbors \n";
-	#print  "microsatellites too short = $tooshort\n";
-	#print  "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
-	#print "unused keys = \n",join("\n", (keys %seen)),"\n";
-	close (MATCH);
-	close (SPUT);
-	close (OUT);
-	close (INFO);
-}
-
-sub microsat_bracketer{
-#	print "in bracketer: @_\n";
-	my ($microsat, $gapspos, $leftbracketpos, $rightbracketpos) = @_;
-	my @gaps = split(/__/,$gapspos);
-	my @lefts = split(/__/,$leftbracketpos);
-	my @rights = split(/__/,$rightbracketpos);
-	my @new=();
-	my $pure = $microsat;
-	$pure =~ s/-//g;
-	my $off = 0;
-	my $finallength  = length($microsat) + scalar(@lefts)+scalar(@rights);
-	push(@gaps, 0);
-	push(@lefts,0);
-	push(@rights,0);
-
-	for my $i (1 ... $finallength){
-#		print "1 current i = >$i<>, right = >$rights[0]<  gap = $gaps[0] left = >$lefts[0]< and $rights[0] == $i\n";
-		if($rights[0] == $i){
-	#		print "pushed a ]\n";
-			push(@new, "]");
-			shift(@rights);
-			push(@rights,0);
-			for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}
-			next;
-		}
-		if($gaps[0] == $i){
-	#		print "pushed a -\n";
-			push(@new, "-");
-			shift(@gaps);
-			push(@gaps, 0);
-			for my $j (0 ... scalar(@rights)-1) {$rights[$j]++;}
-			for my $j (0 ... scalar(@lefts)-1) {$lefts[$j]++;}
-
-			next;
-		}
-		if($lefts[0] == $i){
-#			print "pushed a [\n";
-			push(@new, "[");
-			shift(@lefts);
-			push(@lefts,0);
-			for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}
-			next;
-		}
-		else{
-			my $pushed = substr($pure,$off,1);
-			$off++;
-			push(@new,$pushed );
-#			print "pushed an alphabet, now new = @new, pushed = $pushed\n";
-			next;
-		}
-	}
-	my $returnmicrosat = join("",@new);
-#	print "final microsat = $returnmicrosat \n";
-	return($returnmicrosat);
-}
-
-#xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx
-
-
-#xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx
-sub multiSpecies_orthFinder4{
-	#print "IN multiSpecies_orthFinder4: @_\n";
-	my @handles = ();
-	#1 SEPT 30TH 2008
-	#2 THIS CODE (multiSpecies_orthFinder4.pl)  IS BEING MADE SO THAT IN THE REMOVAL OF MICROSATELLITES THAT ARE CLOSER TO EACH OTHER
-	#3 THAN 50 BP (HE 50BP RADIUS OF EXCLUSION), WE ARE LOOKING ACCROSS ALIGNMENT BLOCKS.. AND NOT JUST LOOKING WITHIN THE ALIGNMENT BLOCKS. THIS WILL
-	#4 POTENTIALLY REMOVE EVEN MORE MICROSATELLITES THAN BEFORE, BUT THIS WILL RESCUE THOSE MICROSATELLITES THAT WERE LOST
-	#5 DUE TO OUR PREVIOUS REQUIREMENT FROM VERSION 3, THAT MICROSATELLITES THAT ARE CLOSER TO THE BOUNDARY THAN 25 BP NEED TO BE REMOVED
-	#6 SUCH A REQUIREMENT WAS A CRUDE WAY TO IMPOSE THE ABOVE 50 BP RADIUS OF EXCLUSION ACCROSS THE ALIGNMENT BLOCKS WITHOUT ACTUALLY
-	#7 CHECKING COORDINATES OF THE EXCLUDED MICROSATELLITES.
-	#8 IN ORDER TO TAKE CARE OF THE CASES WHERE MICROSATELLITES ARE PRELIOUSLY CLOSE TO ENDS OF THE ALIGNMENT BLOCKS, WE IMPOSE HERE
-	#9 A NEW REQUIREMENT THAT FOR A MICROSATELLITE TO BE CONSIDERED, ALL THE SPECIES NEED TO HAVE AT LEAST 10 BP OF NON-MICROSATELLITE SEQUENCE
-	#10 ON EITHER SIDE OF IT.. GAPLESS. THIS INFORMATION IS STORED IN THE VARIABLE: $FLANK_SUPPORT. THIS PART, INSTEAD OF BEING INCLUDED IN
-	#11 THIS CODE, WILL BE INCLUDED IN A NEW CODE THAT WE WILL BE WRITING AS PART OF THE PIPELINE: multiSpecies_microsatSetSelector.pl
-
-	#1 trial run:
-	#2 perl ../../../codes/multiSpecies_orthFinder4.pl /gpfs/home/ydk104/work/rhesus_microsat/axtNet/hg18.panTro2.ponAbe2.rheMac2.calJac1/chr22.hg18.panTro2.ponAbe2.rheMac2.calJac1.net.axt H.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:C.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:O.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:R.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:M.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2 orth22 hg18:panTro2:ponAbe2:rheMac2:calJac1 50
-
-	$prinkter=0;
-
-	#############
-	my $CLUSTER_DIST = $_[4];
-	#############
-
-
-	my $aligns = $_[0];
-	my @micros = split(/:/, $_[1]);
-	my $orth = $_[2];
-	#my $not_orth = "notorth";
-	@tags = split(/:/, $_[3]);
-
-	$no_of_species=scalar(@tags);
-	my $junkfile = $orth."_junk";
-	#open(JUNK,">$junkfile");
-
-	#my $info = $output1."_info";
-	#print "inputs are : \n"; foreach(@micros){print $_,"\n";}
-	#print "info = @_\n";
-
-
-	open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
-	open (ORTH, ">$orth");
-	my $output=$orth."_out";
-	open (OUTP, ">$output");
-
-
-	#open (NORTH, ">$not_orth");
-	#open (INF, ">$info");
-	my $i = 0;
-	foreach my $path (@micros){
-		$handles[$i] = IO::Handle->new();
-		open ($handles[$i], "<$path") or die "Can't open microsat file $path : $!";
-		$i++;
-	}
-
-	#print "Opened files\n";
-
-
-	$infocord = 2 + (4*$no_of_species) - 1;
-	$typecord = 2 + (4*$no_of_species) + 1 - 1;
-	$motifcord = $typecord + 1;
-	$gapcord = $motifcord+1;
-	$startcord = $gapcord + 1;
-	$strandcord = $startcord + 1;
-	$endcord = $strandcord + 1;
-	$microsatcord = $endcord + 1;
-	$sequencepos = 2 + (4*$no_of_species) + 1 -1 ;
-	#$sequencepos = 17;
-	#	GENERATING HASHES CONTAINING CHIMP AND HUMAN DATA FROM ABOVE FILES
-	#----------------------------------------------------------------------------------------------------------------
-	my @hasharr = ();
-	foreach my $path (@micros){
-		open(READ, "<$path") or die "Cannot open file $path :$!";
-		my %single_hash = ();
-		my $key = ();
-		my $counter = 0;
-		while (my $line = <READ>){
-			$counter++;
-		#	print $line;
-			chomp $line;
-			my @fields1 = split(/\t/,$line);
-			if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
-				$key = join("\t",$1, $2,  $4, $5);
-
-#					print "key =  : $key\n" if $prinkter == 1;
-
-#				print $line if $prinkter == 1;
-				push (@{$single_hash{$key}},$line);
-			}
-			else{
-			#	print "microsat line incompatible\n";
-			}
-		}
-		push @hasharr, {%single_hash};
-	#	print "@{$single_hash{$key}} \n";
-#		print "done $path: counter = $counter\n" if $prinkter == 1;
-		close READ;
-	}
-#	print "Done hashes\n";
-	#----------------------------------------------------------------------------------------------------------------
-	my $question=();
-	#----------------------------------------------------------------------------------------------------------------
-	my @contigstarts = ();
-	my @contigends = ();
-
-	my %contigclusters = ();
-	my %contigclustersFirstStartOnly=();
-	my %contigclustersLastEndOnly=();
-	my %contigclustersLastEndLengthOnly=();
-	my %contigclustersFirstStartLengthOnly=();
-	my %contigpath=();
-	my $dotcounter = 0;
-	while (my $line = <BO>){
-#		print "x" x 60, "\n" if $prinkter == 1;
-		$dotcounter++;
-
-
-
-#		print "." if $dotcounter % 100 ==0;
-#		print "\n" if $dotcounter % 5000 ==0;
-		next if $line !~ /^[0-9]+/;
-#		print $line if $prinkter == 1;
-		chomp $line;
-		my @fields2 = split(/\t/,$line);
-		my $key2 = ();
-		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
-			$key2 = join("\t",$1, $2,  $4, $5);
-		}
-		else {
-#			print "seq line $line incompatible\n" if $prinkter == 1;
-			next;}
-
-
-
-
-
-
-		my @sequences = ();
-		for (0 ... $#tags){
-			my $seq = <BO>;
-	#		print $seq;
-			chomp $seq;
-			push(@sequences , " ".$seq);
-		}
-		my @origsequences = @sequences;
-		my $seqcopy = $sequences[0];
-		my @strings = ();
-		$seqcopy =~ s/[a-zA-Z]|-/x/g;
-		my @string = split(/\s*/,$seqcopy);
-
-		for my $s (0 ... $#tags){
-			$sequences[$s] =~ s/-//g;
-			$sequences[$s] =~ s/[a-zA-Z]/x/g;
-	#		print "length  of sequence = ",length($sequences[$s]),"\n";
-			my @tempstring = split(/\s*/,$sequences[$s]);
-			push(@strings, [@tempstring])
-
-		}
-
-		my @species_list = ();
-		my @micro_count = 0;
-		my @starthash = ();
-		my $stopper = 1;
-		my @endhash = ();
-
-		my @currentcontigstarts=();
-		my @currentcontigends=();
-		my @currentcontigchrs=();
-
-		for my $i (0 ... $#tags){
-#			print "searching for : if exists  hasharr: $i : $tags[$i] : $key2 \n" if $prinkter == 1;
-			my @temparr = ();
-
-			if (exists $hasharr[$i]{$key2}){
-				@temparr =  @{$hasharr[$i]{$key2}};
-
-#				print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
-				$line =~ /$tags[$i]\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/;
-#				print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1;
-				my $startkey = $1."_".$2; print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
-				my $endkey = $1."_".$3; print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
-				$contigstarts[$i]{$startkey}= $key2;
-				$contigends[$i]{$endkey}= $key2;
-#				print "confirming existance: \n" if $prinkter == 1;
-#				print "present \n" if exists $contigends[$i]{$endkey} && $prinkter == 1;
-#				print "absent \n" if !exists $contigends[$i]{$endkey} && $prinkter == 1;
-				$currentcontigchrs[$i]=$1;
-				$currentcontigstarts[$i]=$2;
-				$currentcontigends[$i]=$3;
-
-			} # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
-			else {
-				push (@starthash, {0 => "0"});
-				push (@endhash, {0 => "0"});
-				$currentcontigchrs[$i] = 0;
-				next;
-			}
-			$stopper = 0;
-	#		print "exists: @temparr\n" if $prinkter == 1;
-			push(@micro_count, scalar(@temparr));
-			push(@species_list, [@temparr]);
-			my @tempstart = (); my @tempend = ();
-			my %localends = ();
-			my %localhash = ();
-	#		print "---------------------------\n";
-
-			foreach my $templine (@temparr){
-#				print "templine = $templine\n" if $prinkter == 1;
-				my @tields = split(/\t/,$templine);
-				my $start = $tields[$startcord]; # - $tields[$gapcord];
-				my $end = $tields[$endcord]; #- $tields[$gapcord];
-				my $realstart = $tields[$startcord]- $tields[$gapcord];
-				my $gapsinmicrosat = ($tields[$microsatcord] =~ s/-/-/g);
-				$gapsinmicrosat = 0 if $gapsinmicrosat !~ /[0-9]+/;
-	#			print "infocord = $infocord  typecord = $typecord  motifcord = $motifcord  gapcord = $gapcord  startcord = $startcord strandcord = $strandcord endcord = $endcord   microsatcord = $microsatcord  sequencepos = $sequencepos\n";
-				my $realend = $tields[$endcord]- $tields[$gapcord]- $gapsinmicrosat;
-		#		print "real start = $realstart, realend = $realend \n";
-				for my $pos ($realstart ... $realend){  $strings[$i][$pos] = $strings[$i][$pos].",".$i.":".$start."-".$end;}
-				push(@tempstart, $start);
-				push(@tempend, $end);
-				$localhash{$start."-".$end} = $templine;
-				}
-			push @starthash, {%localhash};
-			my $foundclusters  =findClusters(join("!",@{$strings[$i]}), $CLUSTER_DIST);
-
-			my @clusters = split(/_/,$foundclusters);
-
-			my $clustno = 0;
-
-			foreach my $cluster (@clusters) {
-				my @constituenst = split(/,/,$cluster);
-#				print "clusters returned: @constituenst\n" if $prinkter == 1;
-			}
-
-			@string = split("_",stringPainter(join("_",@string),$foundclusters));
-
-
-		}
-		next if $stopper == 1;
-
-#		print colored ['blue'],"FINAL:\n" if $prinkter == 1;
-		my $finalclusters  =findClusters(join("!",@string), 1);
-#		print colored ['blue'],"----------------------\n" if $prinkter == 1;
-		my @clusters = split(/,/,$finalclusters);
-#			print "@string\n" if $prinkter == 1;
-#			print "@clusters\n" if $prinkter == 1;
-#			print "------------------------------------------------------------------\n" if $prinkter == 1;
-
-		my $clustno = 0;
-
-	#	foreach my $cluster (@clusters) {
-	#		my @constituenst = split(/,/,$cluster);
-	#		print "clusters returned: @constituenst\n";
-	#	}
-
-		next if (scalar @clusters == 0);
-
-		my @contigcluster=();
-		my $clusterno=0;
-		my @contigClusterstarts=();
-		my @contigClusterends = ();
-
-		foreach my $clust (@clusters){
-	 #		print "cluster: $clust\n";
-			$clusterno++;
-			my @localclust = split(/\./, $clust);
-			my @result = ();
-			my @starts = ();
-			my @ends = ();
-
-			for my $i (0 ... $#localclust){
-	 #			print "localclust[$i]: $localclust[$i]\n";
-				my @pattern = split(/:/, $localclust[$i]);
-				my @cords = split(/-/, $pattern[1]);
-				push (@starts, $cords[0]);
-				push (@ends, $cords[1]);
-			}
-
-			my $extremestart = smallest_number(@starts);
-			my $extremeend = largest_number(@ends);
-			push(@contigClusterstarts, $extremestart);
-			push(@contigClusterends, $extremeend);
-#			print "cluster starts from $extremestart and ends at $extremeend \n" if $prinkter == 1 ;
-
-			foreach my $clustparts (@localclust){
-				my @pattern = split(/:/, $clustparts);
-	# 			print "printing from pattern: $pattern[1]: $starthash[$pattern[0]]{$pattern[1]}\n";
-				push (@result, $starthash[$pattern[0]]{$pattern[1]});
-			}
-			push(@contigcluster, join("\t", @result));
-#			print join("\t", @result),"<-result \n" if $prinkter == 1 ;
-		}
-
-
-		my $firstclusterstart = smallest_number(@contigClusterstarts);
-		my $lastclusterend = largest_number(@contigClusterends);
-
-
-		$contigclustersFirstStartOnly{$key2}=$firstclusterstart;
-		$contigclustersLastEndOnly{$key2} = $lastclusterend;
-		$contigclusters{$key2}=[ @contigcluster ];
-#		print "currentcontigchr are @currentcontigchrs , firstclusterstart = $firstclusterstart, lastclusterend = $lastclusterend\n " if $prinkter == 1;
-		for my $i (0 ... $#tags){
-			#1 check if there exists adjacent alignment block wrt coordinates of this species.
-			next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block..
-												 #2	no need to worry about proximity of anything in adjacent block!
-
-			#1 BELOW, the following is really to calclate the distance between the end coordinate of the
-			#2 cluster and the end of the gap-free sequence of each species. this is so that if an
-			#3 adjacent alignment block is found lateron, the exact distance between the potentially
-			#4 adjacent microsat clusters can be found here. the exact start coordinate will be used
-			#5 immediately below.
-	#		print "full sequence = $origsequences[$i] and its length = ",length($origsequences[$i])," \n" if $prinkter == 1;
-
-			my $species_startsubstring = substr($origsequences[$i], 0, $firstclusterstart);
-			my $species_endsubstring = ();
-
-			if (length ($origsequences[$i]) <= $lastclusterend+1){ $species_endsubstring = "";}
-			else{  $species_endsubstring = substr($origsequences[$i], $lastclusterend+1);}
-
-#			print "\nnot defined species_endsubstring...\n"  if !defined $species_endsubstring && $prinkter == 1;
-#			print "for species: $tags[$i]: \n" if $prinkter == 1;
-
-			$species_startsubstring =~ s/-| //g;
-			$species_endsubstring =~ s/-| //g;
-			$contigclustersLastEndLengthOnly{$key2}[$i]=length($species_endsubstring);
-			$contigclustersFirstStartLengthOnly{$key2}[$i]=length($species_startsubstring);
-
-
-
-#			print "species_startsubstring = $species_startsubstring, and its length =",length($species_startsubstring)," \n" if $prinkter == 1;
-#			print "species_endsubstring = $species_endsubstring, and its length =",length($species_endsubstring)," \n" if $prinkter == 1;
-#			print "attaching to contigclustersLastEndOnly: $key2: $i\n" if $prinkter == 1;
-
-#			print "just confirming: $contigclustersLastEndLengthOnly{$key2}[$i] \n" if $prinkter == 1;
-
-		}
-
-
-	}
-#	print "\ndone the job of filling... \n";
-	#///////////////////////////////////////////////////////////////////////////////////////
-	#///////////////////////////////////////////////////////////////////////////////////////
-	#///////////////////////////////////////////////////////////////////////////////////////
-	#///////////////////////////////////////////////////////////////////////////////////////
-	$prinkter=0;
-	open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
-
-	my %clusteringpaths=();
-	my %clustersholder=();
-	my %foundkeys=();
-	my %clusteringpathsRev=();
-
-
-	my $totalcount=();
-	my $founkeys_enteredcount=();
-	my $transfered=0;
-	my $complete_transfered=0;
-	my $plain_transfered=0;
-	my $existing_removed=0;
-
-	while (my $line = <BO>){
-#		print "x" x 60, "\n" if $prinkter == 1;
-		next if $line !~ /^[0-9]+/;
-		#print $line;
-		chomp $line;
-		my @fields2 = split(/\t/,$line);
-		my $key2 = ();
-		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
-			$key2 = join("\t",$1, $2,  $4, $5);
-		}
-
-		else {print "seq line $line incompatible\n"; next;}
-#		print "KEY =  : $key2\n" if $prinkter == 1;
-
-
-		my @currentcontigstarts=();
-		my @currentcontigends=();
-		my @currentcontigchrs=();
-		my @clusters = ();
-		my @clusterscopy=();
-		if (exists $contigclusters{$key2}){
-			@clusters =  @{$contigclusters{$key2}};
-			@clusterscopy=@clusters;
-			for my $i (0 ... $#tags){
-	#			print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
-				if ($line =~ /$tags[$i]\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/){
-	#				print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1;
-					my $startkey = $1."_".$2; #print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
-					my $endkey = $1."_".$3; #print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
-					$currentcontigchrs[$i]=$1;
-					$currentcontigstarts[$i]=$2;
-					$currentcontigends[$i]=$3;
-				}
-				else {
-					$currentcontigchrs[$i] = 0;
-	#				print "no microsat clusters for $key2\n" if $prinkter == 1; next;
-				}
-			}
-		} # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
-
-		my @sequences = ();
-		for (0 ... $#tags){
-			my $seq = <BO>;
-	#		print $seq;
-			chomp $seq;
-			push(@sequences , " ".$seq);
-		}
-
-		next if scalar @currentcontigchrs == 0;
-
-	#	print "contigchrs= @currentcontigchrs \n" if $prinkter == 1;
-		my %visitedcontigs=();
-
-		for my $i (0 ... $#tags){
-			#1 check if there exists adjacent alignment block wrt coordinates of this species.
-			next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block..
-													#2	no need to worry about proximity of anything in adjacent block!
-			@clusters=@clusterscopy;
-			#1 BELOW, the following is really to calclate the distance between the end coordinate of the
-			#2 cluster and the end of the gap-free sequence of each species. this is so that if an
-			#3 adjacent alignment block is found lateron, the exact distance between the potentially
-			#4 adjacent microsat clusters can be found here. the exact start coordinate will be used
-			#5 immediately below.
-			my $firstclusterstart = $contigclustersFirstStartOnly{$key2};
-			my $lastclusterend = $contigclustersLastEndOnly{$key2};
-
-			my $key3 = $currentcontigchrs[$i]."_".($currentcontigstarts[$i]);
-#			print  "check if exists $key3 in  contigends for $i\n" if $prinkter == 1;
-
-			if (exists($contigends[$i]{$key3}) && !exists $visitedcontigs{$contigends[$i]{$key3}}){
-				$visitedcontigs{$contigends[$i]{$key3}} = $contigends[$i]{$key3}; #1 this array keeps track of adjacent contigs that we have already visited, thus saving computational time and potential redundancies#
-	#			print "just checking the hash visitedcontigs: ",$visitedcontigs{$contigends[$i]{$key3}} ,"\n" if $prinkter == 1;
-
-				#1 extract coordinates of the last cluster of this found alignment block
-#				print "key of the found alignment block = ", $contigends[$i]{$key3},"\n" if $prinkter == 1;
-	#			print "we are trying to mine: contigclustersAllLastEndLengthOnly_raw: $contigends[$i]{$key3}: $i \n" if $prinkter == 1;
-	#			print "EXISTS\n" if exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
-	#			print "does NOT EXIST\n" if !exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
-				my @contigclustersAllFirstStartLengthOnly_raw=@{$contigclustersFirstStartLengthOnly{$key2}};
-				my @contigclustersAllLastEndLengthOnly_raw=@{$contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}};
-				my @contigclustersAllFirstStartLengthOnly=(); my @contigclustersAllLastEndLengthOnly=();
-
-				for my $val (0 ... $#contigclustersAllFirstStartLengthOnly_raw){
-	#				print "val = $val\n" if $prinkter == 1;
-					if (defined $contigclustersAllFirstStartLengthOnly_raw[$val]){
-						push(@contigclustersAllFirstStartLengthOnly, $contigclustersAllFirstStartLengthOnly_raw[$val]) if $contigclustersAllFirstStartLengthOnly_raw[$val] =~ /[0-9]+/;
-					}
-				}
-	#			print "-----\n" if $prinkter == 1;
-				for my $val (0 ... $#contigclustersAllLastEndLengthOnly_raw){
-	#				print "val = $val\n" if $prinkter == 1;
-					if (defined $contigclustersAllLastEndLengthOnly_raw[$val]){
-						push(@contigclustersAllLastEndLengthOnly, $contigclustersAllLastEndLengthOnly_raw[$val]) if  $contigclustersAllLastEndLengthOnly_raw[$val] =~ /[0-9]+/;
-					}
-				}
-
-
-	#			print "our two arrays are: starts = <@contigclustersAllFirstStartLengthOnly> ......... and ends = <@contigclustersAllLastEndLengthOnly>\n" if $prinkter == 1;
-	#			print "the last cluster's end in that one is: ",smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly)," = ", smallest_number(@contigclustersAllFirstStartLengthOnly)," + ",smallest_number(@contigclustersAllLastEndLengthOnly),"\n" if $prinkter == 1;
-
-	#			if ($contigclustersFirstStartLengthOnly{$key2}[$i] + $contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}[$i] < 50){
-				if (smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly) < $CLUSTER_DIST){
-					my @regurgitate = @{$contigclusters{$contigends[$i]{$key3}}};
-					$regurgitate[$#regurgitate]=~s/\n//g;
-					$regurgitate[$#regurgitate] = $regurgitate[$#regurgitate]."\t".shift(@clusters);
-					delete $contigclusters{$contigends[$i]{$key3}};
-					$contigclusters{$contigends[$i]{$key3}}=[ @regurgitate ];
-					delete $contigclusters{$key2};
-					$contigclusters{$key2}= [ @clusters ] if scalar(@clusters) >0;
-					$contigclusters{$key2}= [ "" ] if scalar(@clusters) ==0;
-
-					if (scalar(@clusters) < 1){
-				#		print "$key2-> $clusteringpaths{$key2} in the loners\n" if exists $foundkeys{$key2};
-						$clusteringpaths{$key2}=$contigends[$i]{$key3};
-						$clusteringpathsRev{$contigends[$i]{$key3}}=$key2;
-						print OUTP "$contigends[$i]{$key3} -> $clusteringpathsRev{$contigends[$i]{$key3}}\n";
-	#					print " clusteringpaths $key2 -> $contigends[$i]{$key3}\n";
-						$founkeys_enteredcount-- if exists $foundkeys{$key2};
-						$existing_removed++  if exists $foundkeys{$key2};
-#						print "$key2->",@{$contigclusters{$key2}},"->>$foundkeys{$key2}\n" if exists $foundkeys{$key2} && $prinkter == 1;
-						delete $foundkeys{$key2} if exists $foundkeys{$key2};
-						$complete_transfered++;
-					}
-					else{
-						print OUTP "$key2-> 0 not so lonely\n"  if !exists $clusteringpathsRev{$key2};
-						$clusteringpaths{$key2}=$key2  if !exists $clusteringpaths{$key2};
-						$clusteringpathsRev{$key2}=0 if !exists $clusteringpathsRev{$key2};
-
-						$founkeys_enteredcount++ if !exists $foundkeys{$key2};
-						$foundkeys{$key2} = $key2 if !exists $foundkeys{$key2};
-	#					print "adding foundkeys entry $foundkeys{$key2}\n";
-						$transfered++;
-					}
-					#$contigclusters{$key2}=[ @contigcluster ];
-				}
-			}
-			else{
-	#					print "adjacent block with species $tags[$i] does not exist\n" if $prinkter == 1;
-						$plain_transfered++;
-						print OUTP "$key2-> 0 , going straight\n"  if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
-						$clusteringpaths{$key2}=$key2 if exists $contigclusters{$key2} && !exists $clusteringpaths{$key2};
-						$clusteringpathsRev{$key2}=0  if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
-						$founkeys_enteredcount++ if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
-						$foundkeys{$key2} = $key2 if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
-	#					print "adding foundkeys entry $foundkeys{$key2}\n";
-
-			}
-			$totalcount++;
-
-		}
-
-
-	}
-	close BO;
-	#close (NORTH);
-	#///////////////////////////////////////////////////////////////////////////////////////
-	#///////////////////////////////////////////////////////////////////////////////////////
-	#///////////////////////////////////////////////////////////////////////////////////////
-	#///////////////////////////////////////////////////////////////////////////////////////
-
-	my $founkeys_count=();
-	my $nopath_count=();
-	my $pathed_count=0;
-	foreach my $key2 (keys %foundkeys){
-		#print "x" x 60, "\n";
-#		print "x" if $dotcounter % 100 ==0;
-#		print "\n" if $dotcounter % 5000 ==0;
-		$founkeys_count++;
-		my $key = $key2;
-#		print "$key2 -> $clusteringpaths{$key2}\n" if $prinkter == 1;
-		if ($clusteringpaths{$key} eq $key){
-#			print "printing hit the alignment block immediately... no path needed\n" if $prinkter == 1;
-			$nopath_count++;
-			delete $foundkeys{$key2};
-			print ORTH join ("\n",@{$contigclusters{$key2}}),"\n";
-		}
-		else{
-			my @pool=();
-			my $key3=();
-			$pathed_count++;
-#			print "going reverse... clusteringpathsRev, $key = $clusteringpathsRev{$key}\n" if exists $clusteringpathsRev{$key} && $prinkter == 1;
-#			print "going reverse... clusteringpathsRev  $key does not exist\n" if !exists $clusteringpathsRev{$key} && $prinkter == 1;
-			if ($clusteringpathsRev{$key} eq "0") {
- 				next;
-			}
-			else{
-				my $yek3 = $clusteringpathsRev{$key};
-				my $yek = $key;
-#				print "caught in the middle of a path, now goin down from $yek to $yek3, which is $clusteringpathsRev{$key} \n" if $prinkter == 1;
-				while ($yek3 ne "0"){
-#					print "$yek->$yek3," if $prinkter == 1;
-					$yek = $yek3;
-					$yek3 = $clusteringpathsRev{$yek};
-				}
-#				print "\nfinally reached the end of path: $yek3, and the next in line is $yek, and its up-route is  $clusteringpaths{$yek}\n" if $prinkter == 1;
-				$key3 = $clusteringpaths{$yek};
-				$key = $yek;
-			}
-
-#			print "now that we are at bottom of the path, lets start climbing up again\n" if $prinkter == 1;
-
-			while($key ne $key3){
-#				print "KEEY $key->$key3\n" if $prinkter == 1;
-#				print "our contigcluster = @{$contigclusters{$key}}\n----------\n" if $prinkter == 1;
-
-				if (scalar(@{$contigclusters{$key}}) > 0) {push @pool, @{$contigclusters{$key}}; print "now pool = @pool\n" if $prinkter == 1;}
-				delete $foundkeys{$key3};
-				$key=$key3;
-				$key3=$clusteringpaths{$key};
-			}
-#			print "\nfinally, adding the first element of path: @{$contigclusters{$key}}\n AND printing the contents:\n" if $prinkter == 1;
-			my @firstcontig= @{$contigclusters{$key}};
-			delete $foundkeys{$key2} if exists $foundkeys{$key2} ;
-			delete $foundkeys{$key} if exists $foundkeys{$key};
-
-			unshift @pool, pop @firstcontig;
-#			print join("\t",@pool),"\n" if $prinkter == 1;
-			print ORTH join ("\n",@firstcontig),"\n" if scalar(@firstcontig) > 0;
-			print ORTH join ("\t",@pool),"\n";
-		#	join();
-		}
-
-	}
-	#close (NORTH);
-#	print "founkeys_entered =$founkeys_enteredcount, plain_transfered=$plain_transfered,existing_removed=$existing_removed,founkeys_count =$founkeys_count, nopath_count =$nopath_count, transfered = $transfered, complete_transfered = $complete_transfered, totalcount = $totalcount, pathed=$pathed_count\n" if $prinkter == 1;
-	close (BO);
-	close (ORTH);
-	close (OUTP);
-	return 1;
-
-}
-sub stringPainter{
-	my @string  = split(/_/,$_[0]);
-#	print $_[0], " <- in stringPainter\n";
-#	print $_[1], " <- in clusters\n";
-
-	my @clusters = split(/,/, $_[1]);
-	for my $i (0 ... $#clusters){
-		my $cluster = $clusters[$i];
-#		print "cluster = $cluster\n";
-		my @parts = split(/\./,$cluster);
-		my @cord = split(/:|-/,shift(@parts));
-		my $minstart = $cord[1];
-		my $maxend = $cord[2];
-#		print "minstart = $minstart , maxend = $maxend\n";
-
-		for my $j (0 ... $#parts){
-#			print "oing thri $parts[$j]\n";
-			my @cord = split(/:|-/,$parts[$j]);
-			$minstart = $cord[1] if $cord[1] < $minstart;
-			$maxend = $cord[2] if $cord[2] > $maxend;
-		}
-#		print "minstart = $minstart , maxend = $maxend\n";
-		for my $pos ($minstart ... $maxend){ $string[$pos] = $string[$pos].",".$cluster;}
-
-
-	}
-#	print "@string <-done from function stringPainter\n";
-	return join("_",@string);
-}
-
-sub findClusters{
-	my $continue = 0;
-	my @mapped_clusters = ();
-	my $clusterdist = $_[1];
-	my $previous = 'x';
-	my @localcluster = ();
-	my $cluster_starts = ();
-	my $cluster_ends = ();
-	my $localcluster_start = ();
-	my $localcluster_end = ();
-	my @record_cluster = ();
-	my @string = split(/\!/, $_[0]);
-	my $zerolength=0;
-
-	for my $pos_pos (1 ... $#string){
-			my $pos = $string[$pos_pos];
-#			print $pos, "\n";
-			if ($continue == 0 && $pos eq "x") {next;}
-
-			if ($continue == 1 && $pos eq "x" && $zerolength <= $clusterdist){
-				if ($zerolength == 0) {$localcluster_end = $pos_pos-1};
-				$zerolength++;
-				$continue = 1;
-			}
-
-			if ($continue == 1 && $pos eq "x" && $zerolength > $clusterdist) {
-				$zerolength = 0;
-				$continue = 0;
-				my %seen;
-				my @uniqed = grep !$seen{$_}++, @localcluster;
-#				print "caught cluster : @uniqed \n";
-				push(@mapped_clusters, [@uniqed]);
-#				print "clustered:\n@uniqed\n";
-				@localcluster = ();
-				@record_cluster = ();
-
-			}
-
-			if ($pos ne "x"){
-				$zerolength = 0;
-				$continue = 1;
-				$pos =~ s/x,//g;
-				my @entries = split(/,/,$pos);
-				$localcluster_end = 0;
-				$localcluster_start = 0;
-				push(@record_cluster,$pos);
-
-				if ($continue == 0){
-					@localcluster = ();
-					@localcluster = (@localcluster, @entries);
-					$localcluster_start = $pos_pos;
-				}
-
-				if ($continue == 1 ) {
-					@localcluster = (@localcluster, @entries);
-				}
-			}
-	}
-
-	if (scalar(@localcluster) > 0){
-		my %seen;
-		my @uniqed = grep !$seen{$_}++, @localcluster;
-	#	print "caught cluster : @uniqed \n";
-		push(@mapped_clusters, [@uniqed]);
-	#	print "clustered:\n@uniqed\n";
-		@localcluster = ();
-		@record_cluster = ();
-	}
-
-	my @returner = ();
-
-	foreach my $clust (@mapped_clusters){
-		my @localclust = @$clust;
-		my @result = ();
-		foreach my $clustparts (@localclust){
-			push(@result,$clustparts);
-		}
-		push(@returner , join(".",@result));
-	}
-#	print "returnig: ", join(",",@returner), "\n";
-	return join(",",@returner);
-}
-#xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx
--- a/tools/regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<tool id="multispecies_orthologous_microsats" name="Extract orthologous microsatellites" version="1.0.1">
-  <description> for multiple (>2) species alignments</description>
-  <command interpreter="perl">
-    multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl
-    $input1
-  	$out_file1
-  	$thresholds
-  	$species
-  	"$treedefinition"
-  	$separation
-
-  </command>
-  <inputs>
-    <page>
-        <param format="maf" name="input1" type="data" label="Select MAF alignments"/>
-       	<param name="separation" size="10" type="integer" value="10" label="Minimum base pair distance between adjacent microsatellite blocks"
-    	help="A value of 10 means: Adjacent microsatellites separated by less than 10 base pairs will be excluded from the output."/>
-    	<param name="thresholds" size="15" type="text" value="9,10,12,12" label="Minimum Threshold for the number of repeats for microsatellites"
-    	help="A value of 9,10,12,12 means: All monos having fewer than 9 repeats, dis having fewer than 5 repeats, tris having fewer than 4 repeats, tetras having fewer than 3 repeats will be excluded from the output."/>
-        <param name="species" type="select" label="Select species" display="checkboxes" multiple="true" help="NOTE: Currently users are requested to select one of these three combinations: hg18-panTro2-ponAbe2, hg18-panTro2-ponAbe2-rheMac2 or hg18-panTro2-ponAbe2-rheMac2-calJac1">
-      	<options>
-        	<filter type="data_meta" ref="input1" key="species" />
-      	</options>
-    	</param>
-    	<param name="treedefinition" size="200" type="text" value = "((((hg18,panTro2),ponAbe2),rheMac2),calJac1)" label="Tree definition of all species above whether or not selected for microsatellite extraction"
-    	help="For example: ((((hg18,panTro2),ponAbe2),rheMac2),calJac1)"/>
-    </page>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" metadata_source="input1"/>
-  </outputs>
-  <requirements>
-     <requirement type="binary">sputnik</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input1" value="chr22_5sp.maf"/>
-      <param name="thresholds" value="9,10,12,12"/>
-      <param name="species" value="hg18,panTro2,ponAbe2,rheMac2,calJac1"/>
-      <param name="treedefinition" value="((((hg18, panTro2), ponAbe2), rheMac2), calJac1)"/>
-      <param name="separation" value="10"/>
-      <output name="out_file1" file="chr22_5sp.microraw.tabular"/>
-    </test>
-  </tests>
-
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool finds ortholgous microsatellite blocks between aligned species
-
-</help>
-
-
-</tool>
--- a/tools/regVariation/parseMAF_smallIndels.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,698 +0,0 @@
-#!/usr/bin/perl -w
-# a program to get indels
-# input is a MAF format 3-way alignment file
-# from 3-way blocks only at this time
-# translate seq2, seq3, etc coordinates to + if align orient is reverse complement
-
-use strict;
-use warnings;
-
-# declare and initialize variables
-my $fh; # variable to store filehandle
-my $record;
-my $offset;
-my $library = $ARGV[0];
-my $count = 0;
-my $count2 = 0;
-my $count3 = 0;
-my $count4 = 0;
-my $start1 = my $start2 = my $start3 = my $start4 = my $start5 = my $start6 = 0;
-my $orient = "";
-my $outgroup = $ARGV[2];
-my $ingroup1 = my $ingroup2 = "";
-my $count_seq1insert = my $count_seq1delete = 0;
-my $count_seq2insert = my $count_seq2delete = 0;
-my $count_seq3insert = my $count_seq3delete = 0;
-my @seq1_insert_lengths = my @seq1_delete_lengths = ();
-my @seq2_insert_lengths = my @seq2_delete_lengths = ();
-my @seq3_insert_lengths = my @seq3_delete_lengths = ();
-my @seq1_insert =  my @seq1_delete =  my @seq2_insert =  my @seq2_delete =  my @seq3_insert =  my @seq3_delete = ();
-my @seq1_insert_startOnly = my @seq1_delete_startOnly = my @seq2_insert_startOnly = my @seq2_delete_startOnly = ();
-my @seq3_insert_startOnly = my @seq3_delete_startOnly = ();
-my @indels = ();
-
-# check to make sure correct files
-my $usage = "usage: parseMAF_smallIndels.pl [MAF.in] [small_Indels_summary.out] [outgroup]\n";
-die $usage unless @ARGV == 3;
-
-# perform some standard subroutines
-$fh = open_file($library);
-
-$offset = tell($fh);
-
-#my $ofile = $ARGV[2];
-#unless (open(OFILE, ">$ofile")){
-#	 print "Cannot open output file \"$ofile\"\n\n";
-#	 exit;
-#}
-
-my $ofile2 = $ARGV[1];
-unless (open(OFILE2, ">$ofile2")){
-         print "Cannot open output file \"$ofile2\"\n\n";
-         exit;
-}
-
-
-# header line for output files
-#print OFILE "# small indel events, parsed from MAF 3-way alignment file, coords are translated from (-) to (+) if necessary\n";
-#print OFILE "#align\tingroup1\tingroup1_coord\tingroup1_orient\tingroup2\tingroup2_coord\tingroup2_orient\toutgroup\toutgroup_coord\toutgroup_orient\tindel_type\n";
-
-#print OFILE2 "# small indels summary, parsed from MAF 3-way alignment file, coords are translated from (-) to (+) if necessary\n";
-print OFILE2 "#block\tindel_type\tindel_length\tingroup1\tingroup1_start\tingroup1_end\tingroup1_alignSize\tingroup1_orient\tingroup2\tingroup2_start\tingroup2_end\tingroup2_alignSize\tingroup2_orient\toutgroup\toutgroup_start\toutgroup_end\toutgroup_alignSize\toutgroup_orient\n";
-
-# main body of program
-while ($record = get_next_record($fh) ){
-	if ($record=~ m/\s*##maf(.*)\s*# maf/s){
-		next;
-	}
-
-	my @sequences = get_sequences_within_block($record);
-	my @seq_info = get_indels_within_block(@sequences);
-	get_indels_lengths(@seq_info);
-
-	$offset = tell($fh);
-        $count++;
-
-}
-
-get_starts_only(@seq1_insert);
-get_starts_only(@seq1_delete);
-get_starts_only(@seq2_insert);
-get_starts_only(@seq2_delete);
-get_starts_only(@seq3_insert);
-get_starts_only(@seq3_delete);
-
-# print some things to keep track of progress
-#print "# $library\n";
-#print "# number of records = $count\n";
-#print "# number of sequence \"s\" lines = $count2\n";
-if ($count3 != 0){
-	print "Skipped $count3 blocks with only 2 seqs;\n";
-}
-#print "# number of records with only h-m = $count4\n\n";
-
-print "Ingroup1 = $ingroup1; Ingroup2 = $ingroup2; Outgroup = $outgroup;\n";
-print "# of ingroup1 inserts = $count_seq1insert;\n";
-print "# of ingroup1 deletes = $count_seq1delete;\n";
-print "# of ingroup2 inserts = $count_seq2insert;\n";
-print "# of ingroup2 deletes = $count_seq2delete;\n";
-print "# of outgroup3 inserts = $count_seq3insert;\n";
-print "# of outgroup3 deletes = $count_seq3delete\n";
-
-
-#close OFILE;
-
-if ($count == $count3){
-	print STDERR "Skipped all blocks since none of them contain 3-way alignments.\n";
-  	exit -1;
-}
-
-###################SUBROUTINES#####################################
-
-# subroutine to open file
-sub open_file {
-        my($filename) = @_;
-        my $fh;
-
-        unless (open($fh, $filename)){
-                print "Cannot open file $filename\n";
-                exit;
-        }
-        return $fh;
-}
-
-# get next record
-sub get_next_record {
-        my ($fh) = @_;
-        my ($offset);
-        my ($record) = "";
-        my ($save_input_separator) = $/;
-
-	$/ = "a score";
-
-        $record = <$fh>;
-
-        $/ = $save_input_separator;
-        return $record;
-}
-
-# get header info
-sub get_sequences_within_block{
-	my (@alignment) = @_;
-	my @lines = ();
-
-	my @sequences = ();
-
-	@lines = split ("\n", $record);
-	foreach (@lines){
-		chomp($_);
-		if (m/^\s*$/){
-			next;
-		}
-		elsif (m/^\s*=(\d+\.*\d*)/){
-
-		}elsif (m/^\s*s(.*)$/){
-			$count2++;
-
-			push (@sequences,$_);
-		}
-	}
-	return @sequences;
-}
-
-sub get_indels_within_block{
-	my (@sequences) = @_;
-	my $line1 = my $line2 = my $line3 = "";
-    my @line1 = my @line2 = my @line3 = ();
-	my $score = 0;
-        my $start1 = my $align_length1 = my $end1 = my $seq_length1 = 0;
-        my $start2 = my $align_length2 = my $end2 = my $seq_length2 = 0;
-        my $start3 = my $align_length3 = my $end3 = my $seq_length3 = 0;
-        my $seq1 = my $orient1 = "";
-        my $seq2 = my $orient2 = "";
-        my $seq3 = my $orient3 = "";
-        my $start1_plus = my $end1_plus = 0;
-        my $start2_plus = my $end2_plus = 0;
-        my $start3_plus = my $end3_plus = 0;
-   	my @test = ();
-        my $test = "";
-        my $header = "";
-        my @header = ();
-        my $sequence1 = my $sequence2 = my $sequence3 ="";
-	my @array_return = ();
-	my $test1 = 0;
-	my $line1_stat = my $line2_stat = my $line3_stat = "";
-
-	# process 3-way blocks only
-	if (scalar(@sequences) == 3){
-		$line1 = $sequences[0];
-		chomp ($line1);
-		$line2 = $sequences[1];
-		chomp ($line2);
-		$line3 = $sequences[2];
-		chomp ($line3);
-		# check order of sequences and assign uniformly seq1= human, seq2 = chimp, seq3 = macaque
-		if ($line1 =~ m/$outgroup/){
-			$line1_stat = "out";
-			$line2=~ s/^\s*//;
-            $line2 =~ s/\s+/\t/g;
-            @line2 = split(/\t/, $line2);
-			if (($ingroup1 eq "") || ($line2[1] =~ m/$ingroup1/)){
-			 $line2_stat = "in1";
-			 $line3_stat = "in2";
-			 }
-			 else{
-             $line3_stat = "in1";
-             $line2_stat = "in2";              }
-			}
-		elsif ($line2 =~ m/$outgroup/){
-			$line2_stat = "out";
-			$line1=~ s/^\s*//;
-            $line1 =~ s/\s+/\t/g;
-            @line1 = split(/\t/, $line1);
-            if (($ingroup1 eq "") || ($line1[1] =~ m/$ingroup1/)){
-             $line1_stat = "in1";
-             $line3_stat = "in2";
-             }
-             else{
-             $line3_stat = "in1";
-             $line1_stat = "in2";              }
-            }
-		elsif ($line3 =~ m/$outgroup/){
-			$line3_stat = "out";
-			$line1=~ s/^\s*//;
-            $line1 =~ s/\s+/\t/g;
-            @line1 = split(/\t/, $line1);
-            if (($ingroup1 eq "") || ($line1[1] =~ m/$ingroup1/)){
-             $line1_stat = "in1";
-             $line2_stat = "in2";
-             }
-             else{
-             $line2_stat = "in1";
-             $line1_stat = "in2";              }
-			}
-
-		#print "# l1 = $line1_stat\n";
-		#print "# l2 = $line2_stat\n";
-		#print "# l3 = $line3_stat\n";
-		my $linei1 = my $linei2 = my $lineo = "";
-		my @linei1 = my @linei2 = my @lineo = ();
-
-        if ($line1_stat eq "out"){
-            $lineo = $line1;
-        }
-        elsif ($line1_stat eq "in1"){
-            $linei1 = $line1;
-        }
-        else{
-            $linei2 = $line1;
-        }
-
-        if ($line2_stat eq "out"){
-            $lineo = $line2;
-        }
-        elsif ($line2_stat eq "in1"){
-            $linei1 = $line2;
-        }
-        else{
-            $linei2 = $line2;
-        }
-
-        if ($line3_stat eq "out"){
-            $lineo = $line3;
-        }
-        elsif ($line3_stat eq "in1"){
-            $linei1 = $line3;
-        }
-        else{
-            $linei2 = $line3;
-        }
-
-        $linei1=~ s/^\s*//;
-        $linei1 =~ s/\s+/\t/g;
-        @linei1 = split(/\t/, $linei1);
-        $end1 =($linei1[2]+$linei1[3]-1);
-        $seq1 = $linei1[1].":".$linei1[3];
-        $ingroup1 = (split(/\./, $seq1))[0];
-        $start1 = $linei1[2];
-        $align_length1 = $linei1[3];
-        $orient1 = $linei1[4];
-        $seq_length1 = $linei1[5];
-        $sequence1 = $linei1[6];
-        $test1 = length($sequence1);
-        my $total_length1 = $test1+$start1;
-        my @array1 = ($start1,$end1,$orient1,$seq_length1);
-        ($start1_plus, $end1_plus) =  convert_coords(@array1);
-
-        $linei2=~ s/^\s*//;
-        $linei2 =~ s/\s+/\t/g;
-        @linei2 = split(/\t/, $linei2);
-        $end2 =($linei2[2]+$linei2[3]-1);
-        $seq2 = $linei2[1].":".$linei2[3];
-        $ingroup2 = (split(/\./, $seq2))[0];
-        $start2 = $linei2[2];
-        $align_length2 = $linei2[3];
-        $orient2 = $linei2[4];
-        $seq_length2 = $linei2[5];
-        $sequence2 = $linei2[6];
-        my $test2 = length($sequence2);
-        my $total_length2 = $test2+$start2;
-        my @array2 = ($start2,$end2,$orient2,$seq_length2);
-        ($start2_plus, $end2_plus) = convert_coords(@array2);
-
-        $lineo=~ s/^\s*//;
-        $lineo =~ s/\s+/\t/g;
-        @lineo = split(/\t/, $lineo);
-        $end3 =($lineo[2]+$lineo[3]-1);
-        $seq3 = $lineo[1].":".$lineo[3];
-        $start3 = $lineo[2];
-        $align_length3 = $lineo[3];
-        $orient3 = $lineo[4];
-        $seq_length3 = $lineo[5];
-        $sequence3 = $lineo[6];
-        my $test3 = length($sequence3);
-        my $total_length3 = $test3+$start3;
-        my @array3 = ($start3,$end3,$orient3,$seq_length3);
-        ($start3_plus, $end3_plus) = convert_coords(@array3);
-
-        #print "# l1 = $ingroup1\n";
-		#print "# l2 = $ingroup2\n";
-		#print "# l3 = $outgroup\n";
-
-		my $ABC = "";
-		my $coord1 = my $coord2 = my $coord3 = 0;
-                $coord1 = $start1_plus;
-                $coord2 = $start2_plus;
-                $coord3 = $start3_plus;
-
-		for (my $position = 0; $position < $test1; $position++) {
-			my $indelType = "";
-			my $indel_line = "";
-			# seq1 deletes
-			 if ((substr($sequence1,$position,1) eq "-")
-		    		&& (substr($sequence2,$position,1) !~ m/[-*\#$?^@]/)
-	       			&& (substr($sequence3,$position,1) !~ m/[-*\#$?^@]/)){
-					$ABC = join("",($ABC,"X"));
-					my @s = split(/:/, $seq1);
-					$indelType = $s[0]."_delete";
-
-					#print OFILE "$count\t$seq1\t$coord1\t$orient1\t$seq2\t$coord2\t$orient2\t$seq3\t$coord3\t$orient3\t$indelType\n";
-					$indel_line = join("\t",($count,$seq1,$coord1,$orient1,$seq2,$coord2,$orient2,$seq3,$coord3,$orient3,$indelType));
-					push (@indels,$indel_line);
-					push (@seq1_delete,$indel_line);
-		 			$coord2++; $coord3++;
-	       		}
-			# seq2 deletes
-			elsif ((substr($sequence1,$position,1) !~ m/[-*\#$?^@]/)
-				&& (substr($sequence2,$position,1) eq "-")
-				&& (substr($sequence3,$position,1) !~ m/[-*\$?^]/)){
-					$ABC = join("",($ABC,"Y"));
-					my @s = split(/:/, $seq2);
-					$indelType = $s[0]."_delete";
-					#print OFILE "$count\t$seq1\t$coord1\t$orient1\t$seq2\t$coord2\t$orient2\t$seq3\t$coord3\t$orient3\t$indelType\n";
-					$indel_line = join("\t",($count,$seq1,$coord1,$orient1,$seq2,$coord2,$orient2,$seq3,$coord3,$orient3,$indelType));
-                                        push (@indels,$indel_line);
-					push (@seq2_delete,$indel_line);
-					$coord1++;
-					$coord3++;
-
-			}
-			# seq1 inserts
-			elsif ((substr($sequence1,$position,1) !~ m/[-*\#$?^@]/)
-				&& (substr($sequence2,$position,1) eq "-")
-				&& (substr($sequence3,$position,1) eq "-")){
-					$ABC = join("",($ABC,"Z"));
-					my @s = split(/:/, $seq1);
-					$indelType = $s[0]."_insert";
-					#print OFILE "$count\t$seq1\t$coord1\t$orient1\t$seq2\t$coord2\t$orient2\t$seq3\t$coord3\t$orient3\t$indelType\n";
-					$indel_line = join("\t",($count,$seq1,$coord1,$orient1,$seq2,$coord2,$orient2,$seq3,$coord3,$orient3,$indelType));
-					push (@indels,$indel_line);
-					push (@seq1_insert,$indel_line);
-					$coord1++;
-			}
-			# seq2 inserts
-			elsif ((substr($sequence1,$position,1) eq "-")
-				&& (substr($sequence2,$position,1) !~ m/[-*\#$?^@]/)
-				&& (substr($sequence3,$position,1) eq "-")){
-					$ABC = join("",($ABC,"W"));
-					my @s = split(/:/, $seq2);
-					$indelType = $s[0]."_insert";
-					#print OFILE "$count\t$seq1\t$coord1\t$orient1\t$seq2\t$coord2\t$orient2\t$seq3\t$coord3\t$orient3\t$indelType\n";
-					$indel_line = join("\t",($count,$seq1,$coord1,$orient1,$seq2,$coord2,$orient2,$seq3,$coord3,$orient3,$indelType));
-					push (@indels,$indel_line);
-					push (@seq2_insert,$indel_line);
-					$coord2++;
-			}
-			# seq3 deletes
-			elsif ((substr($sequence1,$position,1) !~ m/[-*\#$?^@]/)
-				&& (substr($sequence2,$position,1) !~ m/[-*\#$?^@]/)
-				&& (substr($sequence3,$position,1) eq "-")){
-					$ABC = join("",($ABC,"S"));
-					my @s = split(/:/, $seq3);
-					$indelType = $s[0]."_delete";
-					#print OFILE "$count\t$seq1\t$coord1\t$orient1\t$seq2\t$coord2\t$orient2\t$seq3\t$coord3\t$orient3\t$indelType\n";
-					$indel_line = join("\t",($count,$seq1,$coord1,$orient1,$seq2,$coord2,$orient2,$seq3,$coord3,$orient3,$indelType));
-					push (@indels,$indel_line);
-					push (@seq3_delete,$indel_line);
-					$coord1++; $coord2++;
-			}
-			# seq3 inserts
-			elsif ((substr($sequence1,$position,1) eq "-")
-				&& (substr($sequence2,$position,1) eq "-")
-				&& (substr($sequence3,$position,1) !~ m/[-*\#$?^@]/)){
-					$ABC = join("",($ABC,"T"));
-					my @s = split(/:/, $seq3);
-					$indelType = $s[0]."_insert";
-					#print OFILE "$count\t$seq1\t$coord1\t$orient1\t$seq2\t$coord2\t$orient2\t$seq3\t$coord3\t$orient3\t$indelType\n";
-					$indel_line = join("\t",($count,$seq1,$coord1,$orient1,$seq2,$coord2,$orient2,$seq3,$coord3,$orient3,$indelType));
-					push (@indels,$indel_line);
-					push (@seq3_insert,$indel_line);
-					$coord3++;
-			}else{
-				$ABC = join("",($ABC,"N"));
-				$coord1++; $coord2++; $coord3++;
-			}
-
-		}
-		@array_return=($seq1,$seq2,$seq3,$ABC);
-		return (@array_return);
-
-	}
-	# ignore pairwise cases for now, just count the number of blocks
-	elsif (scalar(@sequences) == 2){
-		my $ABC = "";
-		my $coord1 = my $coord2 = my $coord3 = 0;
-		$count3++;
-
-		$line1 = $sequences[0];
-		$line2 = $sequences[1];
-		chomp ($line1);
-		chomp ($line2);
-
-		if ($line2 !~ m/$ingroup2/){
-		       $count4++;
-		}
-	}
-}
-
-
-sub get_indels_lengths{
-	my (@array) = @_;
-	if (scalar(@array) == 4){
-		my $seq1 = $array[0]; my $seq2 = $array[1]; my $seq3 = $array[2]; my $ABC = $array[3];
-
-		while ($ABC =~ m/(X+)/g) {
-			push (@seq1_delete_lengths,length($1));
-			$count_seq1delete++;
-		}
-
-		while ($ABC =~ m/(Y+)/g) {
-			push (@seq2_delete_lengths,length($1));
-			$count_seq2delete++;
-		}
-		while ($ABC =~ m/(S+)/g) {
-			push (@seq3_delete_lengths,length($1));
-			$count_seq3delete++;
-		}
-		while ($ABC =~ m/(Z+)/g) {
-			push (@seq1_insert_lengths,length($1));
-			$count_seq1insert++;
-		}
-		while ($ABC =~ m/(W+)/g) {
-			push(@seq2_insert_lengths,length($1));
-			$count_seq2insert++;
-		}
-		while ($ABC =~ m/(T+)/g) {
-			push (@seq3_insert_lengths,length($1));
-			$count_seq3insert++;
-		}
-	}
-	elsif (scalar(@array) == 0){
-		next;
-	}
-
-}
-# convert to coordinates to + strand if align orient = -
-sub convert_coords{
-	my (@array) = @_;
-	my $s = $array[0];
-	my $e = $array[1];
-	my $o = $array[2];
-	my $l = $array[3];
-	my $start_plus = my $end_plus = 0;
-
-	if ($o eq "-"){
-		$start_plus = ($l - $e);
-		$end_plus = ($l - $s);
-	}elsif ($o eq "+"){
-		$start_plus = $s;
-		$end_plus = $e;
-	}
-
-	return ($start_plus, $end_plus);
-}
-
-# get first line only for each event
-sub get_starts_only{
-	my (@test) = @_;
-        my $seq1 = my $seq2 = my $seq3 = my $indelType = my $old_seq1 = my $old_seq2 = my $old_seq3 = my $old_indelType = my $old_line = "";
-        my $coord1 = my $coord2 = my $coord3 = my $old_coord1 = my $old_coord2 = my $old_coord3 = 0;
-
-        my @matches = ();
-        my @seq1_insert = my @seq1_delete = my @seq2_insert = my @seq2_delete = my @seq3_insert = my @seq3_delete = ();
-
-
-       	foreach my $line (@test){
-                chomp($line);
-                $line =~ s/^\s*//;
-                $line =~ s/\s+/\t/g;
-				my @line1 = split(/\t/, $line);
-                 $seq1 = $line1[1];
-                 $coord1 = $line1[2];
-                $seq2 = $line1[4];
-                 $coord2 = $line1[5];
-                 $seq3 = $line1[7];
-                 $coord3 = $line1[8];
-                 $indelType = $line1[10];
-                if ($indelType =~ m/$ingroup1/ && $indelType =~ m/insert/){
-           		if ($coord1 != $old_coord1+1 || ($coord2 != $old_coord2 || $coord3 != $old_coord3)){
-	       			$start1++;
-                              	push (@seq1_insert_startOnly,$line);
-                     	}
-	     	}
-                elsif ($indelType =~ m/$ingroup1/ && $indelType =~ m/delete/){
-		        if ($coord1 != $old_coord1 || ($coord2 != $old_coord2+1 || $coord3 != $old_coord3+1)){
-		                $start2++;
-		                push(@seq1_delete_startOnly,$line);
-		        }
-		}
-                elsif ($indelType =~ m/$ingroup2/ && $indelType =~ m/insert/){
-	                if ($coord2 != $old_coord2+1 || ($coord1 != $old_coord1 || $coord3 != $old_coord3)){
-		                $start3++;
-		                push(@seq2_insert_startOnly,$line);
-		        }
-		}
-                elsif ($indelType =~ m/$ingroup2/ && $indelType =~ m/delete/){
-                        if ($coord2 != $old_coord2 || ($coord1 != $old_coord1+1 || $coord3 != $old_coord3+1)){
-                                $start4++;
-                                push(@seq2_delete_startOnly,$line);
-                        }
-                }
-                elsif ($indelType =~ m/$outgroup/ && $indelType =~ m/insert/){
-                        if ($coord3 != $old_coord3+1 || ($coord1 != $old_coord1 || $coord2 != $old_coord2)){
-                                $start5++;
-                                push(@seq3_insert_startOnly,$line);
-                        }
-                }
-                elsif ($indelType =~ m/$outgroup/ && $indelType =~ m/delete/){
-                        if ($coord3 != $old_coord3 || ($coord1 != $old_coord1+1 ||$coord2 != $old_coord2+1)){
-                                $start6++;
-                                push(@seq3_delete_startOnly,$line);
-                        }
-                }
-                 $old_indelType = $indelType;
-                 $old_seq1 = $seq1;
-                 $old_coord1 = $coord1;
-                 $old_seq2 = $seq2;
-                 $old_coord2 = $coord2;
-                 $old_seq3 = $seq3;
-                 $old_coord3 = $coord3;
-                 $old_line = $line;
-        }
-}
-# append lengths to each event start line
-my $counter1; my $counter2; my $counter3; my $counter4; my $counter5; my $counter6;
-my @final1 = my @final2 = my @final3 = my @final4 = my @final5 = my @final6 = ();
-my $final_line1 = my $final_line2 = my $final_line3 = my $final_line4 = my $final_line5 = my $final_line6 = "";
-
-
-for ($counter1 = 0; $counter1 < @seq3_insert_startOnly; $counter1++){
-	$final_line1 = join("\t",($seq3_insert_startOnly[$counter1],$seq3_insert_lengths[$counter1]));
-	push (@final1,$final_line1);
-}
-
-for ($counter2 = 0; $counter2 < @seq3_delete_startOnly; $counter2++){
-        $final_line2 =  join("\t",($seq3_delete_startOnly[$counter2],$seq3_delete_lengths[$counter2]));
-        push(@final2,$final_line2);
-}
-
-for ($counter3 = 0; $counter3 < @seq2_insert_startOnly; $counter3++){
-    $final_line3 =  join("\t",($seq2_insert_startOnly[$counter3],$seq2_insert_lengths[$counter3]));
-	push(@final3,$final_line3);
-}
-
-for ($counter4 = 0; $counter4 < @seq2_delete_startOnly; $counter4++){
-        $final_line4 =  join("\t",($seq2_delete_startOnly[$counter4],$seq2_delete_lengths[$counter4]));
-        push(@final4,$final_line4);
-}
-
-for ($counter5 = 0; $counter5 < @seq1_insert_startOnly; $counter5++){
-        $final_line5 =  join("\t",($seq1_insert_startOnly[$counter5],$seq1_insert_lengths[$counter5]));
-        push(@final5,$final_line5);
-}
-
-for ($counter6 = 0; $counter6 < @seq1_delete_startOnly; $counter6++){
-        $final_line6 =  join("\t",($seq1_delete_startOnly[$counter6],$seq1_delete_lengths[$counter6]));
-        push(@final6,$final_line6);
-}
-
-# format final output
-# # if inserts, increase coords for the sequence inserted, other sequences give coords for 5' and 3' bases flanking the gap
-# # for deletes, increase coords for other 2 sequences and the one deleted give coords for 5' and 3' bases flanking the gap
-
-get_final_format(@final5);
-get_final_format(@final6);
-get_final_format(@final3);
-get_final_format(@final4);
-get_final_format(@final1);
-get_final_format(@final2);
-
-sub get_final_format{
-	my (@final) = @_;
-	foreach (@final){
-		my $event_line = $_;
-		my @events = split(/\t/, $event_line);
-		my $event_type = $events[10];
-		my @name_align1 = split(/:/, $events[1]);
-		my @name_align2 = split(/:/, $events[4]);
-		my @name_align3 = split(/:/, $events[7]);
-		my $seq1_event_start = my $seq1_event_end = my $seq2_event_start = my $seq2_event_end = my $seq3_event_start = my $seq3_event_end = 0;
-		my $final_event_line = "";
-		# seq1_insert
-		if ($event_type =~ m/$ingroup1/ && $event_type =~ m/insert/){
-			# only increase coord for human
-			# remember that other two sequnences, the gap spans (coord - 1) --> coord
-			$seq1_event_start = ($events[2]);
-			$seq1_event_end = ($events[2]+$events[11]-1);
-			$seq2_event_start = ($events[5]-1);
-			$seq2_event_end = ($events[5]);
-			$seq3_event_start = ($events[8]-1);
-			$seq3_event_end = ($events[8]);
-			$final_event_line = join("\t",($events[0],$event_type,$events[11],$name_align1[0],$seq1_event_start,$seq1_event_end,$name_align1[1],$events[3],$name_align2[0],$seq2_event_start,$seq2_event_end,$name_align2[1],$events[6],$name_align3[0],$seq3_event_start,$seq3_event_end,$name_align3[1],$events[9]));
-		}
-		# seq1_delete
-		elsif ($event_type =~ m/$ingroup1/ && $event_type =~ m/delete/){
-			# only increase coords for seq2 and seq3
-			# remember for seq1, the gap spans (coord - 1) --> coord
-			$seq1_event_start = ($events[2]-1);
-			$seq1_event_end = ($events[2]);
-                        $seq2_event_start = ($events[5]);
-                        $seq2_event_end = ($events[5]+$events[11]-1);
-                        $seq3_event_start = ($events[8]);
-                        $seq3_event_end = ($events[8]+$events[11]-1);
-			$final_event_line = join("\t",($events[0],$event_type,$events[11],$name_align1[0],$seq1_event_start,$seq1_event_end,$name_align1[1],$events[3],$name_align2[0],$seq2_event_start,$seq2_event_end,$name_align2[1],$events[6],$name_align3[0],$seq3_event_start,$seq3_event_end,$name_align3[1],$events[9]));
-		}
-		# seq2_insert
-		elsif ($event_type =~ m/$ingroup2/ && $event_type =~ m/insert/){
-			# only increase coords for seq2
-			# remember that other two sequnences, the gap spans (coord - 1) --> coord
-                        $seq1_event_start = ($events[2]-1);
-                        $seq1_event_end = ($events[2]);
-			$seq2_event_start = ($events[5]);
-                        $seq2_event_end = ($events[5]+$events[11]-1);
-                        $seq3_event_start = ($events[8]-1);
-			$seq3_event_end = ($events[8]);
-			$final_event_line = join("\t",($events[0],$event_type,$events[11],$name_align1[0],$seq1_event_start,$seq1_event_end,$name_align1[1],$events[3],$name_align2[0],$seq2_event_start,$seq2_event_end,$name_align2[1],$events[6],$name_align3[0],$seq3_event_start,$seq3_event_end,$name_align3[1],$events[9]));
-		}
-		# seq2_delete
-		elsif ($event_type =~ m/$ingroup2/ && $event_type =~ m/delete/){
-			# only increase coords for seq1 and seq3
-			# remember for seq2, the gap spans (coord - 1) --> coord
-                        $seq1_event_start = ($events[2]);
-			$seq1_event_end = ($events[2]+$events[11]-1);
-                        $seq2_event_start = ($events[5]-1);
-	                $seq2_event_end = ($events[5]);
-                        $seq3_event_start = ($events[8]);
-                        $seq3_event_end = ($events[8]+$events[11]-1);
-			$final_event_line = join("\t",($events[0],$event_type,$events[11],$name_align1[0],$seq1_event_start,$seq1_event_end,$name_align1[1],$events[3],$name_align2[0],$seq2_event_start,$seq2_event_end,$name_align2[1],$events[6],$name_align3[0],$seq3_event_start,$seq3_event_end,$name_align3[1],$events[9]));
-		}
-		# start testing w/seq3_insert
-		elsif ($event_type =~ m/$outgroup/ && $event_type =~ m/insert/){
-			# only increase coord for rheMac
-			# remember that other two sequnences, the gap spans (coord - 1) --> coord
-			$seq1_event_start = ($events[2]-1);
-			$seq1_event_end = ($events[2]);
-			$seq2_event_start = ($events[5]-1);
-			$seq2_event_end = ($events[5]);
-			$seq3_event_start = ($events[8]);
-			$seq3_event_end = ($events[8]+$events[11]-1);
-			$final_event_line = join("\t",($events[0],$event_type,$events[11],$name_align1[0],$seq1_event_start,$seq1_event_end,$name_align1[1],$events[3],$name_align2[0],$seq2_event_start,$seq2_event_end,$name_align2[1],$events[6],$name_align3[0],$seq3_event_start,$seq3_event_end,$name_align3[1],$events[9]));
-		}
-		# seq3_delete
-		elsif ($event_type =~ m/$outgroup/ && $event_type =~ m/delete/){
-			# only increase coords for seq1 and seq2
-			# remember for seq3, the gap spans (coord - 1) --> coord
-			$seq1_event_start = ($events[2]);
-			$seq1_event_end = ($events[2]+$events[11]-1);
-			$seq2_event_start = ($events[5]);
-			$seq2_event_end = ($events[5]+$events[11]-1);
-			$seq3_event_start = ($events[8]-1);
-			$seq3_event_end = ($events[8]);
-			$final_event_line = join("\t",($events[0],$event_type,$events[11],$name_align1[0],$seq1_event_start,$seq1_event_end,$name_align1[1],$events[3],$name_align2[0],$seq2_event_start,$seq2_event_end,$name_align2[1],$events[6],$name_align3[0],$seq3_event_start,$seq3_event_end,$name_align3[1],$events[9]));
-
-		}
-
-		print OFILE2 "$final_event_line\n";
-	}
-}
-close OFILE2;
--- a/tools/regVariation/quality_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,245 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-"""
-Filter based on nucleotide quality (PHRED score).
-
-usage: %prog input out_file primary_species mask_species score mask_char mask_region mask_region_length
-"""
-
-
-from __future__ import division
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-pkg_resources.require( "lrucache" )
-try:
-    pkg_resources.require("numpy")
-except:
-    pass
-
-import psyco_full
-import sys
-import os, os.path
-from UserDict import DictMixin
-from bx.binned_array import BinnedArray, FileBinnedArray
-from bx.bitset import *
-from bx.bitset_builders import *
-from fpconst import isNaN
-from bx.cookbook import doc_optparse
-from galaxy.tools.exception_handling import *
-import bx.align.maf
-
-class FileBinnedArrayDir( DictMixin ):
-    """
-    Adapter that makes a directory of FileBinnedArray files look like
-    a regular dict of BinnedArray objects.
-    """
-    def __init__( self, dir ):
-        self.dir = dir
-        self.cache = dict()
-    def __getitem__( self, key ):
-        value = None
-        if key in self.cache:
-            value = self.cache[key]
-        else:
-            fname = os.path.join( self.dir, "%s.qa.bqv" % key )
-            if os.path.exists( fname ):
-                value = FileBinnedArray( open( fname ) )
-                self.cache[key] = value
-        if value is None:
-            raise KeyError( "File does not exist: " + fname )
-        return value
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def load_scores_ba_dir( dir ):
-    """
-    Return a dict-like object (keyed by chromosome) that returns
-    FileBinnedArray objects created from "key.ba" files in `dir`
-    """
-    return FileBinnedArrayDir( dir )
-
-def bitwise_and ( string1, string2, maskch ):
-    result=[]
-    for i,ch in enumerate(string1):
-        try:
-            ch = int(ch)
-        except:
-            pass
-        if string2[i] == '-':
-            ch = 1
-        if ch and string2[i]:
-            result.append(string2[i])
-        else:
-            result.append(maskch)
-    return ''.join(result)
-
-def main():
-    # Parsing Command Line here
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        #chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
-        inp_file, out_file, pri_species, mask_species, qual_cutoff, mask_chr, mask_region, mask_length, loc_file = args
-        qual_cutoff = int(qual_cutoff)
-        mask_chr = int(mask_chr)
-        mask_region = int(mask_region)
-        if mask_region != 3:
-            mask_length = int(mask_length)
-        else:
-            mask_length_r = int(mask_length.split(',')[0])
-            mask_length_l = int(mask_length.split(',')[1])
-    except:
-        stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
-
-    if pri_species == 'None':
-        stop_err( "No primary species selected, try again by selecting at least one primary species." )
-    if mask_species == 'None':
-        stop_err( "No mask species selected, try again by selecting at least one species to mask." )
-
-    mask_chr_count = 0
-    mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
-    mask_reg_dict = {0:'Current pos', 1:'Current+Downstream', 2:'Current+Upstream', 3:'Current+Both sides'}
-
-    #ensure dbkey is present in the twobit loc file
-    filepath = None
-    try:
-        pspecies_all = pri_species.split(',')
-        pspecies_all2 = pri_species.split(',')
-        pspecies = []
-        filepaths = []
-        for line in open(loc_file):
-            if pspecies_all2 == []:
-                break
-            if line[0:1] == "#":
-                continue
-            fields = line.split('\t')
-            try:
-                build = fields[0]
-                for i,dbkey in enumerate(pspecies_all2):
-                    if dbkey == build:
-                        pspecies.append(build)
-                        filepaths.append(fields[1])
-                        del pspecies_all2[i]
-                    else:
-                        continue
-            except:
-                pass
-    except Exception, exc:
-        stop_err( 'Initialization errorL %s' % str( exc ) )
-
-    if len(pspecies) == 0:
-        stop_err( "Quality scores are not available for the following genome builds: %s" % ( pspecies_all2 ) )
-    if len(pspecies) < len(pspecies_all):
-        print "Quality scores are not available for the following genome builds: %s" %(pspecies_all2)
-
-    scores_by_chrom = []
-    #Get scores for all the primary species
-    for file in filepaths:
-        scores_by_chrom.append(load_scores_ba_dir( file.strip() ))
-
-    try:
-        maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
-        maf_writer = bx.align.maf.Writer( open(out_file,'w') )
-    except Exception, e:
-        stop_err( "Your MAF file appears to be malformed: %s" % str( e ) )
-
-    maf_count = 0
-    for block in maf_reader:
-        status_strings = []
-        for seq in range (len(block.components)):
-            src = block.components[seq].src
-            dbkey = src.split('.')[0]
-            chr = src.split('.')[1]
-            if not (dbkey in pspecies):
-                continue
-            else:    #enter if the species is a primary species
-                index = pspecies.index(dbkey)
-                sequence = block.components[seq].text
-                s_start = block.components[seq].start
-                size = len(sequence)    #this includes the gaps too
-                status_str = '1'*size
-                status_list = list(status_str)
-                if status_strings == []:
-                    status_strings.append(status_str)
-                ind = 0
-                s_end = block.components[seq].end
-                #Get scores for the entire sequence
-                try:
-                    scores = scores_by_chrom[index][chr][s_start:s_end]
-                except:
-                    continue
-                pos = 0
-                while pos < (s_end-s_start):
-                    if sequence[ind] == '-':    #No score for GAPS
-                        ind += 1
-                        continue
-                    score = scores[pos]
-                    if score < qual_cutoff:
-                        score = 0
-
-                    if not(score):
-                        if mask_region == 0:    #Mask Corresponding position only
-                            status_list[ind] = '0'
-                            ind += 1
-                            pos += 1
-                        elif mask_region == 1:    #Mask Corresponding position + downstream neighbors
-                            for n in range(mask_length+1):
-                                try:
-                                    status_list[ind+n] = '0'
-                                except:
-                                    pass
-                            ind = ind + mask_length + 1
-                            pos = pos + mask_length + 1
-                        elif mask_region == 2:    #Mask Corresponding position + upstream neighbors
-                            for n in range(mask_length+1):
-                                try:
-                                    status_list[ind-n] = '0'
-                                except:
-                                    pass
-                            ind += 1
-                            pos += 1
-                        elif mask_region == 3:    #Mask Corresponding position + neighbors on both sides
-                            for n in range(-mask_length_l,mask_length_r+1):
-                                try:
-                                    status_list[ind+n] = '0'
-                                except:
-                                    pass
-                            ind = ind + mask_length_r + 1
-                            pos = pos + mask_length_r + 1
-                    else:
-                        pos += 1
-                        ind += 1
-
-                status_strings.append(''.join(status_list))
-
-        if status_strings == []:    #this block has no primary species
-            continue
-        output_status_str = status_strings[0]
-        for stat in status_strings[1:]:
-            try:
-                output_status_str = bitwise_and (status_strings[0], stat, '0')
-            except Exception, e:
-                break
-
-        for seq in range (len(block.components)):
-            src = block.components[seq].src
-            dbkey = src.split('.')[0]
-            if dbkey not in mask_species.split(','):
-                continue
-            sequence = block.components[seq].text
-            sequence = bitwise_and (output_status_str, sequence, mask_chr_dict[mask_chr])
-            block.components[seq].text = sequence
-            mask_chr_count += output_status_str.count('0')
-        maf_writer.write(block)
-        maf_count += 1
-
-    maf_reader.close()
-    maf_writer.close()
-    print "No. of blocks = %d; No. of masked nucleotides = %s; Mask character = %s; Mask region = %s; Cutoff used = %d" %(maf_count, mask_chr_count, mask_chr_dict[mask_chr], mask_reg_dict[mask_region], qual_cutoff)
-
-
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/quality_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<tool id="qualityFilter" name="Filter nucleotides" version="1.0.1">
-  <description> based on quality scores</description>
-  <command interpreter="python">
-  	quality_filter.py
-  	$input
-  	$out_file1
-  	$primary_species
-  	$mask_species
-  	$score
-  	$mask_char
-  	${mask_region.region}
-  	#if $mask_region.region == "3"
-  		${mask_region.lengthr},${mask_region.lengthl}
-  	#elif $mask_region.region == "0"
-  		1
-  	#else
-  		${mask_region.length}
-	#end if
-  	${GALAXY_DATA_INDEX_DIR}/quality_scores.loc
-  </command>
-  <inputs>
-    <param format="maf" name="input" type="data" label="Select data"/>
-    <param name="primary_species" type="select" label="Use quality scores of" display="checkboxes" multiple="true">
-      <options>
-        <filter type="data_meta" ref="input" key="species" />
-      </options>
-    </param>
-	<param name="mask_species" type="select" label="Mask Species" display="checkboxes" multiple="true">
-      <options>
-        <filter type="data_meta" ref="input" key="species" />
-      </options>
-	</param>
-	<param name="score" size="10" type="integer" value="20" label="Quality score cut-off" help="Cut-off value of 20 means mask all nucleotides having quality score less than or equal to 20"/>
-	<param name="mask_char" size="5" type="select" label="Mask character">
-      <option value="0" selected="true">#</option>
-      <option value="1">$</option>
-      <option value="2">^</option>
-      <option value="3">*</option>
-      <option value="4">?</option>
-      <option value="5">N</option>
-    </param>
-	<conditional name="mask_region">
-      <param name="region" type="select" label="Mask region">
-        <option value="0" selected="true">Only the corresponding nucleotide </option>
-        <option value="1">Corresponding column + right-side neighbors</option>
-        <option value="2">Corresponding column + left-side neighbors</option>
-        <option value="3">Corresponding column + neighbors on both sides</option>
-      </param>
-      <when value="0">
-      </when>
-      <when value="1">
-        <param name="length" size="10" type="integer" value="2" label="Number of right-side neighbors"/>
-      </when>
-      <when value="2">
-        <param name="length" size="10" type="integer" value="2" label="Number of left-side neighbors"/>
-      </when>
-      <when value="3">
-        <param name="lengthr" size="10" type="integer" value="2" label="Number of neighbors on right-side" />
-        <param name="lengthl" size="10" type="integer" value="2" label="Number of neighbors on left-side" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="maf" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <requirements>
-    <requirement type="python-module">numpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="6.maf"/>
-      <param name="primary_species" value="panTro2"/>
-      <param name="mask_species" value="hg18"/>
-      <param name="score" value="50"/>
-      <param name="mask_char" value="0"/>
-      <param name="region" value="0" />
-      <output name="out_file1" file="6_quality_filter.maf"/>
-    </test>
-  </tests>
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool takes a MAF file as input and filters nucleotides in every alignment block of the MAF file based on their quality/PHRED scores.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-Any block/s not containing the primary species (species whose quality scores is to be used), will be omitted.
-Also, any primary species whose quality scores are not available in Galaxy will be considered as a non-primary species. This info will appear as a message in the job history panel.
-
------
-
-**Example**
-
-- For the following alignment block::
-
-   a score=4050.0
-   s hg18.chrX    3719221 48 - 154913754 tattttacatttaaaataaatatgtaaatatatattttatatttaaaa
-   s panTro2.chrX 3560945 48 - 155361357 tattttatatttaaaataaagatgtaaatatatattttatatttaaaa
-
-- running this tool with **Primary species as panTro2**, **Mask species as hg18, panTro2**, **Quality cutoff as 20**, **Mask character as #** and **Mask region as only the corresponding position** will return::
-
-   a score=4050.0
-   s hg18.chrX    3719221 48 - 154913754 ###tttac#####a###a#atatgtaaat###tattt#####ttaaaa
-   s panTro2.chrX 3560945 48 - 155361357 ###tttat#####a###a#agatgtaaat###tattt#####ttaaaa
-
-   where, the positions containing # represent panTro2 nucleotides having quality scores less than 20.
-  </help>
-</tool>
--- a/tools/regVariation/qv_to_bqv.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Adapted from bx/scripts/qv_to_bqv.py
-
-Convert a qual (qv) file to several BinnedArray files for fast seek.
-This script takes approximately 4 seconds per 1 million base pairs.
-
-The input format is fasta style quality -- fasta headers followed by
-whitespace separated integers.
-
-usage: %prog qual_file output_file
-"""
-
-import pkg_resources
-pkg_resources.require( "bx-python" )
-pkg_resources.require( "numpy" )
-import string
-import psyco_full
-import sys, re, os, tempfile
-from bx.binned_array import BinnedArrayWriter
-from bx.cookbook import *
-import fileinput
-
-def load_scores_ba_dir( dir ):
-    """
-    Return a dict-like object (keyed by chromosome) that returns
-    FileBinnedArray objects created from "key.ba" files in `dir`
-    """
-    return FileBinnedArrayDir( dir )
-
-def main():
-    args = sys.argv[1:]
-    try:
-        qual_file_dir = args[0]
-        #mydir="/home/gua110/Desktop/chimp_quality_scores/chr22.qa"
-        mydir="/home/gua110/Desktop/rhesus_quality_scores/rheMac2.qual.qv"
-        qual_file_dir = mydir.replace(mydir.split("/")[-1], "")
-        output_file = args[ 1 ]
-        fo = open(output_file,"w")
-    except:
-        print "usage: qual_file output_file"
-        sys.exit()
-
-    tmpfile = tempfile.NamedTemporaryFile()
-    cmdline = "ls " + qual_file_dir + "*.qa | cat >> " + tmpfile.name
-    os.system (cmdline)
-    for qual_file in tmpfile.readlines():
-        qual = fileinput.FileInput( qual_file.strip() )
-        outfile = None
-        outbin = None
-        base_count = 0
-        mega_count = 0
-
-        for line in qual:
-            line = line.rstrip("\r\n")
-            if line.startswith(">"):
-                # close old
-                if outbin and outfile:
-                    print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
-                    outbin.finish()
-                    outfile.close()
-                # start new file
-                region = line.lstrip(">")
-                #outfname = output_file + "." + region + ".bqv" #CHANGED
-                outfname = qual_file.strip() + ".bqv"
-                print >>fo, "Writing region " + region + " to file " + outfname
-                outfile = open( outfname , "wb")
-                outbin = BinnedArrayWriter(outfile, typecode='b', default=0)
-                base_count = 0
-                mega_count = 0
-            else:
-                if outfile and outbin:
-                    nums = line.split()
-                    for val in nums:
-                        outval = int(val)
-                        assert outval <= 255 and outval >= 0
-                        outbin.write(outval)
-                        base_count += 1
-                    if (mega_count * 1000000) <= base_count:
-                        sys.stdout.write(str(mega_count)+" ")
-                        sys.stdout.flush()
-                        mega_count = base_count // 1000000 + 1
-        if outbin and outfile:
-            print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
-            outbin.finish()
-            outfile.close()
-
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/qv_to_bqv.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-<tool id="qv2bqv" name="qv2bqv">
-  <description></description>
-  <command interpreter="python">qv_to_bqv.py "$input1" $output</command>
-  <inputs>
-    <param name="input1" type="data" format="interval" help="Directory" />
-   </inputs>
-  <outputs>
-    <data format="text" name="output" metadata_source="input1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <output name="output" file="gops-coverage.dat" />
-    </test>
-  </tests>
-</tool>
\ No newline at end of file
--- a/tools/regVariation/rcve.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-#!/usr/bin/env python
-
-from galaxy import eggs
-
-import sys, string
-from rpy import *
-import numpy
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def sscombs(s):
-    if len(s) == 1:
-        return [s]
-    else:
-        ssc = sscombs(s[1:])
-        return [s[0]] + [s[0]+comb for comb in ssc] + ssc
-
-
-infile = sys.argv[1]
-y_col = int(sys.argv[2])-1
-x_cols = sys.argv[3].split(',')
-outfile = sys.argv[4]
-
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
-fout = open(outfile,'w')
-
-for i, line in enumerate( file ( infile )):
-    line = line.rstrip('\r\n')
-    if len( line )>0 and not line.startswith( '#' ):
-        elems = line.split( '\t' )
-        break
-    if i == 30:
-        break # Hopefully we'll never get here...
-
-if len( elems )<1:
-    stop_err( "The data in your input dataset is either missing or not formatted properly." )
-
-y_vals = []
-x_vals = []
-
-for k,col in enumerate(x_cols):
-    x_cols[k] = int(col)-1
-    x_vals.append([])
-    """
-    try:
-        float( elems[x_cols[k]] )
-    except:
-        try:
-            msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." %( col, elems[x_cols[k]] )
-        except:
-            msg = "This operation cannot be performed on non-numeric data."
-        stop_err( msg )
-    """
-NA = 'NA'
-for ind,line in enumerate( file( infile )):
-    if line and not line.startswith( '#' ):
-        try:
-            fields = line.split("\t")
-            try:
-                yval = float(fields[y_col])
-            except Exception, ey:
-                yval = r('NA')
-                #print >>sys.stderr, "ey = %s" %ey
-            y_vals.append(yval)
-            for k,col in enumerate(x_cols):
-                try:
-                    xval = float(fields[col])
-                except Exception, ex:
-                    xval = r('NA')
-                    #print >>sys.stderr, "ex = %s" %ex
-                x_vals[k].append(xval)
-        except:
-            pass
-
-x_vals1 = numpy.asarray(x_vals).transpose()
-dat= r.list(x=array(x_vals1), y=y_vals)
-
-set_default_mode(NO_CONVERSION)
-try:
-    full = r.lm(r("y ~ x"), data= r.na_exclude(dat))    #full model includes all the predictor variables specified by the user
-except RException, rex:
-    stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
-set_default_mode(BASIC_CONVERSION)
-
-summary = r.summary(full)
-fullr2 = summary.get('r.squared','NA')
-
-if fullr2 == 'NA':
-    stop_error("Error in linear regression")
-
-if len(x_vals) < 10:
-    s = ""
-    for ch in range(len(x_vals)):
-        s += str(ch)
-else:
-    stop_err("This tool only works with less than 10 predictors.")
-
-print >>fout, "#Model\tR-sq\tRCVE_Terms\tRCVE_Value"
-all_combos = sorted(sscombs(s), key=len)
-all_combos.reverse()
-for j,cols in enumerate(all_combos):
-    #if len(cols) == len(s):    #Same as the full model above
-    #    continue
-    if len(cols) == 1:
-        x_vals1 = x_vals[int(cols)]
-    else:
-        x_v = []
-        for col in cols:
-            x_v.append(x_vals[int(col)])
-        x_vals1 = numpy.asarray(x_v).transpose()
-    dat= r.list(x=array(x_vals1), y=y_vals)
-    set_default_mode(NO_CONVERSION)
-    red = r.lm(r("y ~ x"), data= dat)    #Reduced model
-    set_default_mode(BASIC_CONVERSION)
-    summary = r.summary(red)
-    redr2 = summary.get('r.squared','NA')
-    try:
-        rcve = (float(fullr2)-float(redr2))/float(fullr2)
-    except:
-        rcve = 'NA'
-    col_str = ""
-    for col in cols:
-        col_str = col_str + str(int(x_cols[int(col)]) + 1) + " "
-    col_str.strip()
-    rcve_col_str = ""
-    for col in s:
-        if col not in cols:
-            rcve_col_str = rcve_col_str + str(int(x_cols[int(col)]) + 1) + " "
-    rcve_col_str.strip()
-    if len(cols) == len(s):    #full model
-        rcve_col_str = "-"
-        rcve = "-"
-    try:
-        redr2 = "%.4f" %(float(redr2))
-    except:
-        pass
-    try:
-        rcve = "%.4f" %(float(rcve))
-    except:
-        pass
-    print >>fout, "%s\t%s\t%s\t%s" %(col_str,redr2,rcve_col_str,rcve)
--- a/tools/regVariation/rcve.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-<tool id="rcve1" name="Compute RCVE" version="1.0.0">
-  <description> </description>
-  <command interpreter="python">
-    rcve.py
-      $input1
-      $response_col
-      $predictor_cols
-      $out_file1
-      1>/dev/null
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="response_col" label="Response column (Y)" type="data_column" data_ref="input1" />
-    <param name="predictor_cols" label="Predictor columns (X)" type="data_column" data_ref="input1" multiple="true">
-        <validator type="no_options" message="Please select at least one column."/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <!-- Test data with vlid values -->
-  	<test>
-      <param name="input1" value="reg_inp.tab"/>
-      <param name="response_col" value="1"/>
-      <param name="predictor_cols" value="2,3,4"/>
-      <output name="out_file1" file="rcve_out.dat"/>
-    </test>
-
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool computes the RCVE (Relative Contribution to Variance) for all possible variable subsets using the following formula:
-
-**RCVE(i) = [R-sq (full: 1,2,..,i..,p-1) - R-sq(without i: 1,2,...,p-1)] / R-sq (full: 1,2,..,i..,p-1)**,
-which denotes the case where the 'i'th predictor is dropped.
-
-
-In general,
-**RCVE(X+) = [R-sq (full: {X,X+}) - R-sq(reduced: {X})] / R-sq (full: {X,X+})**,
-where,
-
-- {X,X+} denotes the set of all predictors,
-- X+ is the set of predictors for which we compute RCVE (and therefore drop from the full model to obtain a reduced one),
-- {X} is the set of the predictors that are left in the reduced model after excluding {X+}
-
-
-The 4 columns in the output are described below:
-
-- Column 1 (Model): denotes the variables present in the model ({X})
-- Column 2 (R-sq): denotes the R-squared value corresponding to the model in Column 1
-- Column 3 (RCVE_Terms): denotes the variable/s for which RCVE is computed ({X+}). These are the variables that are absent in the reduced model in Column 1. A '-' in this column indicates that the model in Column 1 is the Full model.
-- Column 4 (RCVE): denotes the RCVE value corresponding to the variable/s in Column 3. A '-' in this column indicates that the model in Column 1 is the Full model.
-
-
-  </help>
-</tool>
--- a/tools/regVariation/substitution_rates.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-#!/usr/bin/env python
-#guruprasad Ananda
-"""
-Estimates substitution rates from pairwise alignments using JC69 model.
-"""
-
-from galaxy import eggs
-from galaxy.tools.util.galaxyops import *
-from galaxy.tools.util import maf_utilities
-import bx.align.maf
-import sys, fileinput
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-if len(sys.argv) < 3:
-        stop_err("Incorrect number of arguments.")
-
-inp_file = sys.argv[1]
-out_file = sys.argv[2]
-fout = open(out_file, 'w')
-int_file = sys.argv[3]
-if int_file != "None":     #The user has specified an interval file
-    dbkey_i = sys.argv[4]
-    chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
-
-
-def rateEstimator(block):
-    global alignlen, mismatches
-
-    src1 = block.components[0].src
-    sequence1 = block.components[0].text
-    start1 = block.components[0].start
-    end1 = block.components[0].end
-    len1 = int(end1)-int(start1)
-    len1_withgap = len(sequence1)
-    mismatch = 0.0
-
-    for seq in range (1,len(block.components)):
-        src2 = block.components[seq].src
-        sequence2 = block.components[seq].text
-        start2 = block.components[seq].start
-        end2 = block.components[seq].end
-        len2 = int(end2)-int(start2)
-        for nt in range(len1_withgap):
-            if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
-                if sequence1[nt].upper() != sequence2[nt].upper():
-                    mismatch += 1
-
-    if int_file == "None":
-        p = mismatch/min(len1,len2)
-        print >>fout, "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%d\t%.4f" %(src1,start1,end1,src2,start2,end2,min(len1,len2),mismatch,p)
-    else:
-        mismatches += mismatch
-        alignlen += min(len1,len2)
-
-def main():
-    skipped = 0
-    not_pairwise = 0
-
-    if int_file == "None":
-        try:
-            maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
-        except:
-            stop_err("Your MAF file appears to be malformed.")
-        print >>fout, "#Seq1\tStart1\tEnd1\tSeq2\tStart2\tEnd2\tL\tN\tp"
-        for block in maf_reader:
-            if len(block.components) != 2:
-                not_pairwise += 1
-                continue
-            try:
-                rateEstimator(block)
-            except:
-                skipped += 1
-    else:
-        index, index_filename = maf_utilities.build_maf_index( inp_file, species = [dbkey_i] )
-        if index is None:
-            print >> sys.stderr, "Your MAF file appears to be malformed."
-            sys.exit()
-        win = NiceReaderWrapper( fileinput.FileInput( int_file ),
-                                chrom_col=chr_col_i,
-                                start_col=start_col_i,
-                                end_col=end_col_i,
-                                strand_col=strand_col_i,
-                                fix_strand=True)
-        species=None
-        mincols = 0
-        global alignlen, mismatches
-
-        for interval in win:
-            alignlen = 0
-            mismatches = 0.0
-            src = "%s.%s" % ( dbkey_i, interval.chrom )
-            for block in maf_utilities.get_chopped_blocks_for_region( index, src, interval, species, mincols ):
-                if len(block.components) != 2:
-                    not_pairwise += 1
-                    continue
-                try:
-                    rateEstimator(block)
-                except:
-                    skipped += 1
-            if alignlen:
-                p = mismatches/alignlen
-            else:
-                p = 'NA'
-            interval.fields.append(str(alignlen))
-            interval.fields.append(str(mismatches))
-            interval.fields.append(str(p))
-            print >>fout, "\t".join(interval.fields)
-            #num_blocks += 1
-
-    if not_pairwise:
-        print "Skipped %d non-pairwise blocks" %(not_pairwise)
-    if skipped:
-        print "Skipped %d blocks as invalid" %(skipped)
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/substitution_rates.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="subRate1" name="Estimate substitution rates " version="1.0.0">
-  <description> for non-coding regions</description>
-  <command interpreter="python">
-  	substitution_rates.py
-  	$input
-  	$out_file1
-  	#if $region.type == "win":
-      ${region.input2} ${region.input2.dbkey} ${region.input2.metadata.chromCol},$region.input2.metadata.startCol,$region.input2.metadata.endCol,$region.input2.metadata.strandCol
-    #else:
-      "None"
-    #end if
-  </command>
-  <inputs>
-    <param format="maf" name="input" type="data" label="Select pair-wise alignment data"/>
-    <conditional name="region">
-	      <param name="type" type="select" label="Estimate rates corresponding to" multiple="false">
-	         <option value="align">Alignment block</option>
-	         <option value="win">Intervals in your history</option>
-	     </param>
-	     <when value="win">
-	      	<param format="interval" name="input2" type="data" label="Choose intervals">
-	      		<validator type="unspecified_build" />
-	    	</param>
-	      </when>
-	      <when value="align" />
-      </conditional>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" metadata_source="input"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="Interval2Maf_pairwise_out.maf"/>
-      <param name="type" value="align"/>
-      <output name="out_file1" file="subRates1.out"/>
-    </test>
-  </tests>
-
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool takes a pairwise MAF file as input and estimates substitution rate according to Jukes-Cantor JC69 model. The 3 new columns appended to the output are explained below:
-
-- L: number of nucleotides compared
-- N: number of different nucleotides
-- p = N/L
-
------
-
-.. class:: warningmark
-
-**Note**
-
-Any block/s not containing exactly two sequences, will be omitted.
-
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/regVariation/substitutions.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad ANanda
-"""
-Fetches substitutions from pairwise alignments.
-"""
-
-from galaxy import eggs
-
-from galaxy.tools.util import maf_utilities
-
-import bx.align.maf
-import sys
-import os, fileinput
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-if len(sys.argv) < 3:
-        stop_err("Incorrect number of arguments.")
-
-inp_file = sys.argv[1]
-out_file = sys.argv[2]
-fout = open(out_file, 'w')
-
-def fetchSubs(block):
-
-    src1 = block.components[0].src
-    sequence1 = block.components[0].text
-    start1 = block.components[0].start
-    end1 = block.components[0].end
-    len1 = int(end1)-int(start1)
-    len1_withgap = len(sequence1)
-
-    for seq in range (1,len(block.components)):
-        src2 = block.components[seq].src
-        sequence2 = block.components[seq].text
-        start2 = block.components[seq].start
-        end2 = block.components[seq].end
-        len2 = int(end2)-int(start2)
-        sub_begin = None
-        sub_end = None
-        begin = False
-
-        for nt in range(len1_withgap):
-            if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
-                if sequence1[nt].upper() != sequence2[nt].upper():
-                    if not(begin):
-                        sub_begin = nt
-                        begin = True
-                    sub_end = nt
-                else:
-                    if begin:
-                        print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),start1+sub_end-sequence1[0:sub_end].count('-'))
-                        print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),start2+sub_end-sequence2[0:sub_end].count('-'))
-                        begin = False
-
-            else:
-                if begin:
-                    print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),end1+sub_end-sequence1[0:sub_end].count('-'))
-                    print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),end2+sub_end-sequence2[0:sub_end].count('-'))
-                    begin = False
-                    ended = False
-
-
-def main():
-    skipped = 0
-    not_pairwise = 0
-    try:
-        maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
-    except:
-        stop_err("Your MAF file appears to be malformed.")
-    print >>fout, "#Chr\tStart\tEnd"
-    for block in maf_reader:
-        if len(block.components) != 2:
-            not_pairwise += 1
-            continue
-        try:
-            fetchSubs(block)
-        except:
-            skipped += 1
-
-    if not_pairwise:
-        print "Skipped %d non-pairwise blocks" %(not_pairwise)
-    if skipped:
-        print "Skipped %d blocks" %(skipped)
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/substitutions.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-<tool id="substitutions1" name="Fetch substitutions " version="1.0.0">
-  <description> from pairwise alignments</description>
-  <command interpreter="python">
-  	substitutions.py
-  	$input
-  	$out_file1
-  </command>
-  <inputs>
-    <param format="maf" name="input" type="data" label="Select pair-wise alignment data"/>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" metadata_source="input"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="Interval2Maf_pairwise_out.maf"/>
-      <output name="out_file1" file="subs.out"/>
-    </test>
-  </tests>
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool takes a pairwise MAF file as input and fetches substitutions per alignment block.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-Any block/s not containing exactly two sequences, will be omitted.
-
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/regVariation/t_test_two_samples.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-# A program to implement the non-pooled t-test for two samples where the alternative hypothesis is two-sided or one-sided.
-# The first input file is a TABULAR format file representing the first sample and consisting of one column only.
-# The second input file is a TABULAR format file representing the first sample nd consisting of one column only.
-# The third input is the sidedness of the t-test: either two-sided or, one-sided with m1 less than m2 or,
-# one-sided with m1 greater than m2.
-# The fourth input is the equality status of the standard deviations of both populations
-# The output file is a TXT file representing the result of the two sample t-test.
-
-use strict;
-use warnings;
-
-#variable to handle the motif information
-my $motif;
-my $motifName = "";
-my $motifNumber = 0;
-my $totalMotifsNumber = 0;
-my @motifNamesArray = ();
-
-# check to make sure having correct files
-my $usage = "usage: non_pooled_t_test_two_samples_galaxy.pl [TABULAR.in] [TABULAR.in] [testSidedness] [standardDeviationEquality] [TXT.out] \n";
-die $usage unless @ARGV == 5;
-
-#get the input arguments
-my $firstSampleInputFile = $ARGV[0];
-my $secondSampleInputFile = $ARGV[1];
-my $testSidedness = $ARGV[2];
-my $standardDeviationEquality = $ARGV[3];
-my $outputFile = $ARGV[4];
-
-#open the input files
-open (INPUT1, "<", $firstSampleInputFile) || die("Could not open file $firstSampleInputFile \n");
-open (INPUT2, "<", $secondSampleInputFile) || die("Could not open file $secondSampleInputFile \n");
-open (OUTPUT, ">", $outputFile) || die("Could not open file $outputFile \n");
-
-
-#variables to store the name of the R script file
-my $r_script;
-
-# R script to implement the two-sample test on the motif frequencies in upstream flanking region
-#construct an R script file and save it in the same directory where the perl file is located
-$r_script = "non_pooled_t_test_two_samples.r";
-
-open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n";
-print Rcmd "
-        sampleTable1 <- read.table(\"$firstSampleInputFile\", header=FALSE);
-		sample1 <- sampleTable1[, 1];
-
-		sampleTable2 <- read.table(\"$secondSampleInputFile\", header=FALSE);
-		sample2 <- sampleTable2[, 1];
-
-		testSideStatus <- \"$testSidedness\";
-		STEqualityStatus <- \"$standardDeviationEquality\";
-
-		#open the output a text file
-		sink(file = \"$outputFile\");
-
-		#check if the t-test is two-sided
-		if (testSideStatus == \"two-sided\"){
-
-			#check if the standard deviations are equal in both populations
-			if (STEqualityStatus == \"equal\"){
-				#two-sample t-test where standard deviations are assumed to be unequal, the test is two-sided
-				testResult <- t.test(sample1, sample2, var.equal = TRUE);
-			} else{
-				#two-sample t-test where standard deviations are assumed to be unequal, the test is two-sided
-				testResult <- t.test(sample1, sample2, var.equal = FALSE);
-			}
-		} else{  #the t-test is one sided
-
-			#check if the t-test is two-sided with m1 < m2
-			if (testSideStatus == \"one-sided:_m1_less_than_m2\"){
-
-				#check if the standard deviations are equal in both populations
-				if (STEqualityStatus == \"equal\"){
-					#two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2
-					testResult <- t.test(sample1, sample2, var.equal = TRUE, alternative = \"less\");
-				} else{
-					#two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2
-					testResult <- t.test(sample1, sample2, var.equal = FALSE, alternative = \"less\");
-				}
-			} else{   #the t-test is one-sided with m1 > m2
-				#check if the standard deviations are equal in both populations
-				if (STEqualityStatus == \"equal\"){
-					#two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2
-					testResult <- t.test(sample1, sample2, var.equal = TRUE, alternative = \"greater\");
-				} else{
-					#two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2
-					testResult <- t.test(sample1, sample2, var.equal = FALSE, alternative = \"greater\");
-				}
-			}
-		}
-
-		#save the output of the t-test into the output text file
-		testResult;
-
-		#close the output text file
-		sink();
-
-		#eof" . "\n";
-
-close Rcmd;
-
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
-
-#close the input and output files
-close(OUTPUT);
-close(INPUT2);
-close(INPUT1);
-
--- a/tools/regVariation/t_test_two_samples.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-<tool id="t_test_two_samples" name="T Test for Two Samples" version="1.0.0">
-  <description></description>
-
-  <command interpreter="perl">
-  	t_test_two_samples.pl $inputFile1 $inputFile2 $inputTestSidedness3 $inputStandardDeviationEquality4 $outputFile1
-  </command>
-
-  <inputs>
-  	<param format="tabular" name="inputFile1" type="data" label="Select the first sample tabular file"/>
-  	<param format="tabular" name="inputFile2" type="data" label="Select the second sample tabular file"/>
-
-    <param name="inputTestSidedness3" type="select" label="Choose the test sidedness:">
-    	<option value="two-sided">Two-sided</option>
-      	<option value="one-sided:_m1_less_than_m2">One-sided: m1 less than m2</option>
-      	<option value="one-sided:_m1_greater_than_m2">One-sided: m1 greater than m2</option>
-    </param>
-
-    <param name="inputStandardDeviationEquality4" type="select" label="Choose the standard deviation equality status of the two populations:">
-    	<option value="equal">Equal</option>
-      	<option value="unequal">Unequal</option>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="text" name="outputFile1"/>
-  </outputs>
-
-  <tests>
-  	<test>
-  		<param name="inputFile1" value="sample1.tabular" ftype="tabular" />
-  		<param name="inputFile2" value="sample2.tabular" ftype="tabular" />
-    	<param name="inputTestSidedness3" value="Two-sided" />
-    	<param name="inputStandardDeviationEquality4" value="Equal" />
-    	<output name="outputFile1" file="t_test_result1.text" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="sample1.tabular" ftype="tabular" />
-  		<param name="inputFile2" value="sample2.tabular" ftype="tabular" />
-    	<param name="inputTestSidedness3" value="Two-sided" />
-    	<param name="inputStandardDeviationEquality4" value="Unequal" />
-    	<output name="outputFile1" file="t_test_result2.text" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="sample1.tabular" ftype="tabular" />
-  		<param name="inputFile2" value="sample2.tabular" ftype="tabular" />
-    	<param name="inputTestSidedness3" value="One-sided: m1 less than m2" />
-    	<param name="inputStandardDeviationEquality4" value="Equal" />
-    	<output name="outputFile1" file="t_test_result3.text" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="sample1.tabular" ftype="tabular" />
-  		<param name="inputFile2" value="sample2.tabular" ftype="tabular" />
-    	<param name="inputTestSidedness3" value="One-sided: m1 less than m2" />
-    	<param name="inputStandardDeviationEquality4" value="Unequal" />
-    	<output name="outputFile1" file="t_test_result4.text" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="sample1.tabular" ftype="tabular" />
-  		<param name="inputFile2" value="sample2.tabular" ftype="tabular"/>
-    	<param name="inputTestSidedness3" value="One-sided: m1 greater than m2" />
-    	<param name="inputStandardDeviationEquality4" value="Equal" />
-    	<output name="outputFile1" file="t_test_result5.text" />
-  	</test>
-
-  	<test>
-  		<param name="inputFile1" value="sample1.tabular" ftype="tabular" />
-  		<param name="inputFile2" value="sample2.tabular" ftype="tabular" />
-    	<param name="inputTestSidedness3" value="One-sided: m1 greater than m2" />
-    	<param name="inputStandardDeviationEquality4" value="Unequal" />
-    	<output name="outputFile1" file="t_test_result6.text" />
-  	</test>
-  </tests>
-
-
-  <help>
-
-.. class:: infomark
-
-**What it does**
-
-This program implements the non-pooled t-test for two samples where the alternative hypothesis is two-sided or one-sided. The program takes four inputs:
-
-- The first input file is a TABULAR format file representing the first sample and consisting of one column only.
-- The second input file is a TABULAR format file representing the first sample and consisting of one column only.
-- The third input is the sidedness of the t-test: either two-sided or, one-sided with m1 less than m2 or, one-sided with m1 greater than m2.
-- The fourth input is the equality status of the standard deviations of both populations.
-- The output file is a TXT file representing the result of the two-sample t-test.
-
-
-**Example**
-
-Let us have the first input file representing the first sample as follows::
-
-	5
-	4
-	8
-	6
-	7
-	2
-	1
-	1
-	0
-	6
-	4
-	5
-	7
-	5
-	3
-	2
-	5
-	8
-	7
-	6
-	4
-
-And the second input file representing the second sample as follows::
-
-	2
-	3
-	5
-	1
-	2
-	7
-	5
-	4
-	3
-	2
-	7
-	6
-	0
-	8
-	4
-	6
-	9
-	2
-	4
-	5
-	6
-
-Runnig the program and choosing "Two-sided" and "Equal" as parameters will give the following output::
-
-	Two Sample t-test
-
-	data:  sample1 and sample2
-	t = -0.3247, df = 40, p-value = 0.7471
-	alternative hypothesis: true difference in means is not equal to 0
-	95 percent confidence interval:
- 	-1.720030  1.243839
-	sample estimates:
-	mean of x mean of y
- 	4.333333  4.571429
-
-
-  </help>
-
-</tool>
--- a/tools/regVariation/windowSplitter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Split into windows.
-
-usage: %prog input size out_file
-   -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
-"""
-
-import sys, re, os
-
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.galaxyops import *
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    # Parsing Command Line here
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
-        inp_file, winsize, out_file, makesliding, offset = args
-        winsize = int(winsize)
-        offset = int(offset)
-        makesliding = int(makesliding)
-        if strand_col_1 <= 0:
-            strand = "+"        #if strand is not defined, default it to +
-    except:
-        stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
-
-    fo = open(out_file,'w')
-
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_line = None
-    if offset == 0:
-        makesliding = 0
-
-    for i, line in enumerate( file( inp_file ) ):
-        line = line.strip()
-        if line and line[0:1] != "#":
-            try:
-                elems = line.split('\t')
-                if strand_col_1 != -1:
-                    strand = elems[strand_col_1]
-                start = int(elems[start_col_1])
-                end = int(elems[end_col_1])
-                if makesliding == 0:
-                    numwin = (end - start)/winsize
-                else:
-                    numwin = (end - start)/offset
-                if numwin > 0:
-                    for win in range(numwin):
-                        elems_1 = elems
-                        elems_1[start_col_1] = str(start)
-                        elems_1[end_col_1] = str(start + winsize)
-                        fo.write( "%s\n" % '\t'.join( elems_1 ) )
-                        if makesliding == 0:
-                            start = start + winsize
-                        else:
-                            start = start + offset
-                            if start+winsize > end:
-                                break
-            except:
-                skipped_lines += 1
-                if not invalid_line:
-                    first_invalid_line = i + 1
-                    invalid_line = line
-
-    fo.close()
-
-    if makesliding == 1:
-        print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset)
-    else:
-        print 'Window size=%d, Sliding=No' %(winsize)
-    if skipped_lines > 0:
-        print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
-
-if __name__ == "__main__":
-    main()
--- a/tools/regVariation/windowSplitter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-<tool id="winSplitter" name="Make windows">
-  <description></description>
-  <command interpreter="python">windowSplitter.py $input $size $out_file1 ${wintype.choice} ${wintype.offset} -l ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol}</command>
-  <inputs>
-    <!--<param label="Genome" name="dbkey" type="genomebuild"/>-->
-    <param format="interval" name="input" type="data" label="Select data"/>
-    <param name="size" size="10" type="integer" value="500" label="Window size"/>
-    <conditional name="wintype">
-	    <param name="choice" type="select" label="Make sliding windows?">
-	    	<option value="0" selected="true">No</option>
-	    	<option value="1">Yes</option>
-		</param>
-		<when value="0">
-    		<param name="offset" type="hidden" value="0" />
-    	</when>
-    	<when value="1">
-    		<param name="offset" size="10" type="integer" value="10" label="Offset size"/>
-    	</when>
-	</conditional>
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="4.bed"/>
-      <param name="size" value="5000"/>
-      <param name="choice" value="1"/>
-      <param name="offset" value="4000"/>
-      <output name="out_file1" file="4_windows.bed"/>
-    </test>
-  </tests>
- <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool splits the intervals in the input file into smaller intervals based on the specified window-size and window type.
-
------
-
-.. class:: warningmark
-
-**Note**
-
-The positions at the end of the input interval which do not fit into the last window or a new window of required size, will be omitted from the output.
-
------
-
-.. class:: infomark
-
-**About formats**
-
-**BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones:
-
-The first three BED fields (required) are::
-
-    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
-    2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
-    3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
-
-The additional BED fields (optional) are::
-
-    4. name - The name of the BED line.
-    5. score - A score between 0 and 1000.
-    6. strand - Defines the strand - either '+' or '-'.
-    7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
-    8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
-    9. reserved - This should always be set to zero.
-   10. blockCount - The number of blocks (exons) in the BED line.
-   11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
-   12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
-   13. expCount - The number of experiments.
-   14. expIds - A comma-separated list of experiment ids. The number of items in this list should correspond to expCount.
-   15. expScores - A comma-separated list of experiment scores. All of the expScores should be relative to expIds. The number of items in this list should correspond to expCount.
-
------
-
-**Example**
-
-- For the following dataset::
-
-   chr22  1000  4700  NM_174568 0 +
-
-- running this tool with **Window size as 1000**,  will return::
-
-   chr22  1000  2000  NM_174568 0 +
-   chr22  2000	3000  NM_174568	0 +
-   chr22  3000  4000  NM_174568 0 +
-
-- running this tool to make **Sliding windows** of **size 1000** and **offset 500**,  will return::
-
-   chr22  1000  2000  NM_174568 0 +
-   chr22  1500	2500  NM_174568	0 +
-   chr22  2000  3000  NM_174568 0 +
-   chr22  2500	3500  NM_174568	0 +
-   chr22  3000  4000  NM_174568 0 +
-   chr22  3500	4500  NM_174568	0 +
-
-  </help>
-
-
-</tool>
\ No newline at end of file
--- a/tools/rgenetics/listFiles.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,227 +0,0 @@
-#Provides Upload tool with access to list of available files
-import glob,sys
-import galaxy.app as thisapp
-import galaxy.util
-
-from elementtree.ElementTree import XML
-
-librepos = '/usr/local/galaxy/data/rg'
-myrepos = '/home/rerla/galaxy'
-marchinirepos = '/usr/local/galaxy/data/rg/snptest'
-
-from galaxy.tools.parameters import DataToolParameter
-
-#Provides Upload tool with access to list of available builds
-
-builds = []
-#Read build names and keys from galaxy.util
-for dbkey, build_name in galaxy.util.dbnames:
-    builds.append((build_name,dbkey,False))
-
-#Return available builds
-def get_available_builds(defval='hg18'):
-    for i,x in enumerate(builds):
-        if x[1] == defval:
-           x = list(x)
-           x[2] = True
-           builds[i] = tuple(x)
-    return builds
-
-
-
-def get_tabular_cols( input, outformat='gg' ):
-    """numeric only other than rs for strict genome graphs
-    otherwise tabular. Derived from galaxy tool source around August 2007 by Ross"""
-    columns = []
-    seenCnames = {}
-    elems = []
-    colnames = ['Col%d' % x for x in range(input.metadata.columns+1)]
-    strict = (outformat=='gg')
-    for i, line in enumerate( file ( input.file_name ) ):
-        if line and not line.startswith( '#' ):
-            line = line.rstrip('\r\n')
-            elems = line.split( '\t' )
-
-            """
-            Strict gg note:
-            Since this tool requires users to select only those columns
-            that contain numerical values, we'll restrict the column select
-            list appropriately other than the first column which must be a marker
-            """
-            if len(elems) > 0:
-                for col in range(1, input.metadata.columns+1):
-		    isFloat = False # short circuit common result
-                    try:
-                        val = float(elems[col-1])
-                        isFloat = True
-                    except:
-                        val = elems[col-1]
-                        if val:
-                            if i == 0: # header row
-                               colnames[col] = val
-                    if isFloat or (not strict) or (col == 1): # all in if not GG
-                        option = colnames[col]
-                        if not seenCnames.get(option,None): # new
-                              columns.append((option,str(col),False))
-                              seenCnames[option] = option
-            #print 'get_tab: %d=%s. Columns=%s' % (i,line,str(columns))
-            if len(columns) > 0 and i > 10:
-                """
-                We have our select list built, so we can break out of the outer most for loop
-                """
-                break
-        if i == 30:
-            break # Hopefully we never get here...
-    for option in range(min(5,len(columns))):
-      (x,y,z) = columns[option]
-      columns[option] = (x,y,True)
-    return columns # sorted select options
-
-def get_marchini_dir():
-    """return the filesystem directory for snptest style files"""
-    return marchinirepos
-
-
-def get_lib_SNPTESTCaCofiles():
-    """return a list of file names - without extensions - available for caco studies
-    These have a common file name with both _1 and _2 suffixes"""
-    d = get_marchini_dir()
-    testsuffix = '.gen_1' # glob these
-    flist = glob.glob('%s/*%s' % (d,testsuffix))
-    flist = [x.split(testsuffix)[0] for x in flist] # leaves with a list of file set names
-    if len(flist) > 0:
-        dat = [(flist[0],flist[0],True),]
-	dat += [(x,x,False) for x in flist[1:]]
-    else:
-        dat = [('No Marchini CaCo files found in %s - convert some using the Marchini converter tool' % d,'None',True),]
-    return dat
-
-def getChropt():
-    """return dynamic chromosome select options
-    """
-    c = ['X','Y']
-    c += ['%d' % x for x in range(1,23)]
-    dat = [(x,x,False) for x in c]
-    x,y,z = dat[3]
-    dat[3] = (x,y,True)
-    return dat
-
-
-def get_phecols(fname=''):
-   """ return a list of phenotype columns for a multi-select list
-   prototype:
-   foo = ('fake - not yet implemented','not implemented','False')
-   dat = [foo for x in range(5)]
-   return dat
-   """
-   try:
-   	header = file(fname,'r').next().split()
-   except:
-        return [('get_phecols unable to open file %s' % fname,'None',False),]
-   dat = [(x,x,False) for x in header]
-   return dat
-
-#Return various kinds of files
-
-def get_lib_pedfiles():
-    dat = glob.glob('%s/ped/*.ped' % librepos)
-    dat += glob.glob('%s/ped/*.ped' % myrepos)
-    dat.sort()
-    if len(dat) > 0:
-        dat = [x.split('.ped')[0] for x in dat]
-    	dat = [(x,x,'True') for x in dat]
-    else:
-        dat = [('No ped files - add some to %s/ped or %s/ped' % (librepos,myrepos),'None',True),]
-    return dat
-
-def get_lib_phefiles():
-    ext = 'phe'
-    dat = glob.glob('%s/pheno/*.%s' % (librepos,ext))
-    dat += glob.glob('%s/pheno/*.%s' % (myrepos,ext))
-    dat.sort()
-    if len(dat) > 0:
-    	dat = [(x,x,'False') for x in dat]
-    else:
-        dat = [('No %s files - add some to %s/pheno or %s/pheno' % (ext,librepos,myrepos),'None',True),]
-    return dat
-
-def get_lib_bedfiles():
-    dat = glob.glob('%s/plinkbed/*.bed' % librepos)
-    dat += glob.glob('%s/plinkbed/*.bed' % myrepos)
-    dat.sort()
-    if len(dat) > 0:
-        dat = [x.split('.bed')[0] for x in dat]
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No bed files - Please import some to %s/plinkbed or %s/plinkbed' % (librepos,myrepos),'None',True),]
-    return dat
-
-def get_lib_fbatfiles():
-    dat = glob.glob('%s/plinkfbat/*.ped' % librepos)
-    dat += glob.glob('%s/plinkfbat/*.ped' % myrepos)
-    dat.sort()
-    if len(dat) > 0:
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No fbat bed files - Please import some to %s/plinkfbat or %s/plinkfbat' % (librepos,myrepos),'None',True),]
-    return dat
-
-def get_lib_mapfiles():
-    dat = glob.glob('%s/ped/*.map' % librepos)
-    dat += glob.glob('%s/ped/*.map' % myrepos)
-    dat.sort()
-    if len(dat) > 0:
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No map files - add some to %s/ped' % librepos,'None',True),]
-    return dat
-
-def get_my_pedfiles():
-    dat = glob.glob('%s/*.ped' % myrepos)
-    if len(dat) > 0:
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No ped files - add some to %s' % librepos,'None',True),]
-    return dat
-
-def get_my_mapfiles():
-    dat = glob.glob('%s/*.map' % myrepos)
-    if len(dat) > 0:
-    	dat = [(x,x,'True') for x in dat]
-    else:
-        dat = [('No ped files - add some to %s' % librepos,'None',True),]
-    return dat
-
-def get_lib_xlsfiles():
-    dat = glob.glob('%s/*.xls' % librepos)
-    if len(dat) > 0:
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No ped files - add some to %s' % librepos,'None',True),]
-    return dat
-
-def get_lib_htmlfiles():
-    dat = glob.glob('%s/*.html' % librepos)
-    if len(dat) > 0:
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No ped files - add some to %s' % librepos,'None',True),]
-    return dat
-
-def get_my_xlsfiles():
-    dat = glob.glob('%s/*.xls' %  myrepos)
-    if len(dat) > 0:
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No ped files - add some to %s' % librepos,'None',True),]
-    return dat
-
-def get_my_htmlfiles():
-    dat = glob.glob('%s/*.html' % myrepos)
-    if len(dat) > 0:
-    	dat = [(x,x,False) for x in dat]
-    else:
-        dat = [('No ped files - add some to %s' % librepos,'None',True),]
-    return dat
-
-
--- a/tools/rgenetics/plinkbinJZ.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,868 +0,0 @@
-#!/usr/bin/env python2.4
-"""
-"""
-
-import optparse,os,subprocess,gzip,struct,time,commands
-from array import array
-
-#from AIMS import util
-#from pga import util as pgautil
-
-__FILE_ID__ = '$Id: plinkbinJZ.py,v 1.14 2009/07/13 20:16:50 rejpz Exp $'
-
-VERBOSE = True
-
-MISSING_ALLELES = set(['N', '0', '.', '-',''])
-
-AUTOSOMES = set(range(1, 23) + [str(c) for c in range(1, 23)])
-
-MAGIC_BYTE1 = '00110110'
-MAGIC_BYTE2 = '11011000'
-FORMAT_SNP_MAJOR_BYTE = '10000000'
-FORMAT_IND_MAJOR_BYTE = '00000000'
-MAGIC1 = (0, 3, 1, 2)
-MAGIC2 = (3, 1, 2, 0)
-FORMAT_SNP_MAJOR = (2, 0, 0, 0)
-FORMAT_IND_MAJOR = (0, 0, 0, 0)
-HEADER_LENGTH = 3
-
-HOM0 = 3
-HOM1 = 0
-MISS = 2
-HET  = 1
-HOM0_GENO = (0, 0)
-HOM1_GENO = (1, 1)
-HET_GENO = (0, 1)
-MISS_GENO = (-9, -9)
-
-GENO_TO_GCODE = {
-    HOM0_GENO: HOM0,
-    HET_GENO: HET,
-    HOM1_GENO: HOM1,
-    MISS_GENO: MISS,
-    }
-
-CHROM_REPLACE = {
-    'X': '23',
-    'Y': '24',
-    'XY': '25',
-    'MT': '26',
-    'M': '26',
-}
-
-MAP_LINE_EXCEPTION_TEXT = """
-One or more lines in the *.map file has only three fields.
-The line was:
-
-%s
-
-If you are running rgGRR through EPMP, this is usually a
-sign that you are using an old version of the map file.
-You can correct the problem by re-running Subject QC.  If
-you have already tried this, please contact the developers,
-or file a bug.
-"""
-
-INT_TO_GCODE = {
-     0: array('i', (0, 0, 0, 0)),   1: array('i', (2, 0, 0, 0)),   2: array('i', (1, 0, 0, 0)),   3: array('i', (3, 0, 0, 0)),
-     4: array('i', (0, 2, 0, 0)),   5: array('i', (2, 2, 0, 0)),   6: array('i', (1, 2, 0, 0)),   7: array('i', (3, 2, 0, 0)),
-     8: array('i', (0, 1, 0, 0)),   9: array('i', (2, 1, 0, 0)),  10: array('i', (1, 1, 0, 0)),  11: array('i', (3, 1, 0, 0)),
-    12: array('i', (0, 3, 0, 0)),  13: array('i', (2, 3, 0, 0)),  14: array('i', (1, 3, 0, 0)),  15: array('i', (3, 3, 0, 0)),
-    16: array('i', (0, 0, 2, 0)),  17: array('i', (2, 0, 2, 0)),  18: array('i', (1, 0, 2, 0)),  19: array('i', (3, 0, 2, 0)),
-    20: array('i', (0, 2, 2, 0)),  21: array('i', (2, 2, 2, 0)),  22: array('i', (1, 2, 2, 0)),  23: array('i', (3, 2, 2, 0)),
-    24: array('i', (0, 1, 2, 0)),  25: array('i', (2, 1, 2, 0)),  26: array('i', (1, 1, 2, 0)),  27: array('i', (3, 1, 2, 0)),
-    28: array('i', (0, 3, 2, 0)),  29: array('i', (2, 3, 2, 0)),  30: array('i', (1, 3, 2, 0)),  31: array('i', (3, 3, 2, 0)),
-    32: array('i', (0, 0, 1, 0)),  33: array('i', (2, 0, 1, 0)),  34: array('i', (1, 0, 1, 0)),  35: array('i', (3, 0, 1, 0)),
-    36: array('i', (0, 2, 1, 0)),  37: array('i', (2, 2, 1, 0)),  38: array('i', (1, 2, 1, 0)),  39: array('i', (3, 2, 1, 0)),
-    40: array('i', (0, 1, 1, 0)),  41: array('i', (2, 1, 1, 0)),  42: array('i', (1, 1, 1, 0)),  43: array('i', (3, 1, 1, 0)),
-    44: array('i', (0, 3, 1, 0)),  45: array('i', (2, 3, 1, 0)),  46: array('i', (1, 3, 1, 0)),  47: array('i', (3, 3, 1, 0)),
-    48: array('i', (0, 0, 3, 0)),  49: array('i', (2, 0, 3, 0)),  50: array('i', (1, 0, 3, 0)),  51: array('i', (3, 0, 3, 0)),
-    52: array('i', (0, 2, 3, 0)),  53: array('i', (2, 2, 3, 0)),  54: array('i', (1, 2, 3, 0)),  55: array('i', (3, 2, 3, 0)),
-    56: array('i', (0, 1, 3, 0)),  57: array('i', (2, 1, 3, 0)),  58: array('i', (1, 1, 3, 0)),  59: array('i', (3, 1, 3, 0)),
-    60: array('i', (0, 3, 3, 0)),  61: array('i', (2, 3, 3, 0)),  62: array('i', (1, 3, 3, 0)),  63: array('i', (3, 3, 3, 0)),
-    64: array('i', (0, 0, 0, 2)),  65: array('i', (2, 0, 0, 2)),  66: array('i', (1, 0, 0, 2)),  67: array('i', (3, 0, 0, 2)),
-    68: array('i', (0, 2, 0, 2)),  69: array('i', (2, 2, 0, 2)),  70: array('i', (1, 2, 0, 2)),  71: array('i', (3, 2, 0, 2)),
-    72: array('i', (0, 1, 0, 2)),  73: array('i', (2, 1, 0, 2)),  74: array('i', (1, 1, 0, 2)),  75: array('i', (3, 1, 0, 2)),
-    76: array('i', (0, 3, 0, 2)),  77: array('i', (2, 3, 0, 2)),  78: array('i', (1, 3, 0, 2)),  79: array('i', (3, 3, 0, 2)),
-    80: array('i', (0, 0, 2, 2)),  81: array('i', (2, 0, 2, 2)),  82: array('i', (1, 0, 2, 2)),  83: array('i', (3, 0, 2, 2)),
-    84: array('i', (0, 2, 2, 2)),  85: array('i', (2, 2, 2, 2)),  86: array('i', (1, 2, 2, 2)),  87: array('i', (3, 2, 2, 2)),
-    88: array('i', (0, 1, 2, 2)),  89: array('i', (2, 1, 2, 2)),  90: array('i', (1, 1, 2, 2)),  91: array('i', (3, 1, 2, 2)),
-    92: array('i', (0, 3, 2, 2)),  93: array('i', (2, 3, 2, 2)),  94: array('i', (1, 3, 2, 2)),  95: array('i', (3, 3, 2, 2)),
-    96: array('i', (0, 0, 1, 2)),  97: array('i', (2, 0, 1, 2)),  98: array('i', (1, 0, 1, 2)),  99: array('i', (3, 0, 1, 2)),
-   100: array('i', (0, 2, 1, 2)), 101: array('i', (2, 2, 1, 2)), 102: array('i', (1, 2, 1, 2)), 103: array('i', (3, 2, 1, 2)),
-   104: array('i', (0, 1, 1, 2)), 105: array('i', (2, 1, 1, 2)), 106: array('i', (1, 1, 1, 2)), 107: array('i', (3, 1, 1, 2)),
-   108: array('i', (0, 3, 1, 2)), 109: array('i', (2, 3, 1, 2)), 110: array('i', (1, 3, 1, 2)), 111: array('i', (3, 3, 1, 2)),
-   112: array('i', (0, 0, 3, 2)), 113: array('i', (2, 0, 3, 2)), 114: array('i', (1, 0, 3, 2)), 115: array('i', (3, 0, 3, 2)),
-   116: array('i', (0, 2, 3, 2)), 117: array('i', (2, 2, 3, 2)), 118: array('i', (1, 2, 3, 2)), 119: array('i', (3, 2, 3, 2)),
-   120: array('i', (0, 1, 3, 2)), 121: array('i', (2, 1, 3, 2)), 122: array('i', (1, 1, 3, 2)), 123: array('i', (3, 1, 3, 2)),
-   124: array('i', (0, 3, 3, 2)), 125: array('i', (2, 3, 3, 2)), 126: array('i', (1, 3, 3, 2)), 127: array('i', (3, 3, 3, 2)),
-   128: array('i', (0, 0, 0, 1)), 129: array('i', (2, 0, 0, 1)), 130: array('i', (1, 0, 0, 1)), 131: array('i', (3, 0, 0, 1)),
-   132: array('i', (0, 2, 0, 1)), 133: array('i', (2, 2, 0, 1)), 134: array('i', (1, 2, 0, 1)), 135: array('i', (3, 2, 0, 1)),
-   136: array('i', (0, 1, 0, 1)), 137: array('i', (2, 1, 0, 1)), 138: array('i', (1, 1, 0, 1)), 139: array('i', (3, 1, 0, 1)),
-   140: array('i', (0, 3, 0, 1)), 141: array('i', (2, 3, 0, 1)), 142: array('i', (1, 3, 0, 1)), 143: array('i', (3, 3, 0, 1)),
-   144: array('i', (0, 0, 2, 1)), 145: array('i', (2, 0, 2, 1)), 146: array('i', (1, 0, 2, 1)), 147: array('i', (3, 0, 2, 1)),
-   148: array('i', (0, 2, 2, 1)), 149: array('i', (2, 2, 2, 1)), 150: array('i', (1, 2, 2, 1)), 151: array('i', (3, 2, 2, 1)),
-   152: array('i', (0, 1, 2, 1)), 153: array('i', (2, 1, 2, 1)), 154: array('i', (1, 1, 2, 1)), 155: array('i', (3, 1, 2, 1)),
-   156: array('i', (0, 3, 2, 1)), 157: array('i', (2, 3, 2, 1)), 158: array('i', (1, 3, 2, 1)), 159: array('i', (3, 3, 2, 1)),
-   160: array('i', (0, 0, 1, 1)), 161: array('i', (2, 0, 1, 1)), 162: array('i', (1, 0, 1, 1)), 163: array('i', (3, 0, 1, 1)),
-   164: array('i', (0, 2, 1, 1)), 165: array('i', (2, 2, 1, 1)), 166: array('i', (1, 2, 1, 1)), 167: array('i', (3, 2, 1, 1)),
-   168: array('i', (0, 1, 1, 1)), 169: array('i', (2, 1, 1, 1)), 170: array('i', (1, 1, 1, 1)), 171: array('i', (3, 1, 1, 1)),
-   172: array('i', (0, 3, 1, 1)), 173: array('i', (2, 3, 1, 1)), 174: array('i', (1, 3, 1, 1)), 175: array('i', (3, 3, 1, 1)),
-   176: array('i', (0, 0, 3, 1)), 177: array('i', (2, 0, 3, 1)), 178: array('i', (1, 0, 3, 1)), 179: array('i', (3, 0, 3, 1)),
-   180: array('i', (0, 2, 3, 1)), 181: array('i', (2, 2, 3, 1)), 182: array('i', (1, 2, 3, 1)), 183: array('i', (3, 2, 3, 1)),
-   184: array('i', (0, 1, 3, 1)), 185: array('i', (2, 1, 3, 1)), 186: array('i', (1, 1, 3, 1)), 187: array('i', (3, 1, 3, 1)),
-   188: array('i', (0, 3, 3, 1)), 189: array('i', (2, 3, 3, 1)), 190: array('i', (1, 3, 3, 1)), 191: array('i', (3, 3, 3, 1)),
-   192: array('i', (0, 0, 0, 3)), 193: array('i', (2, 0, 0, 3)), 194: array('i', (1, 0, 0, 3)), 195: array('i', (3, 0, 0, 3)),
-   196: array('i', (0, 2, 0, 3)), 197: array('i', (2, 2, 0, 3)), 198: array('i', (1, 2, 0, 3)), 199: array('i', (3, 2, 0, 3)),
-   200: array('i', (0, 1, 0, 3)), 201: array('i', (2, 1, 0, 3)), 202: array('i', (1, 1, 0, 3)), 203: array('i', (3, 1, 0, 3)),
-   204: array('i', (0, 3, 0, 3)), 205: array('i', (2, 3, 0, 3)), 206: array('i', (1, 3, 0, 3)), 207: array('i', (3, 3, 0, 3)),
-   208: array('i', (0, 0, 2, 3)), 209: array('i', (2, 0, 2, 3)), 210: array('i', (1, 0, 2, 3)), 211: array('i', (3, 0, 2, 3)),
-   212: array('i', (0, 2, 2, 3)), 213: array('i', (2, 2, 2, 3)), 214: array('i', (1, 2, 2, 3)), 215: array('i', (3, 2, 2, 3)),
-   216: array('i', (0, 1, 2, 3)), 217: array('i', (2, 1, 2, 3)), 218: array('i', (1, 1, 2, 3)), 219: array('i', (3, 1, 2, 3)),
-   220: array('i', (0, 3, 2, 3)), 221: array('i', (2, 3, 2, 3)), 222: array('i', (1, 3, 2, 3)), 223: array('i', (3, 3, 2, 3)),
-   224: array('i', (0, 0, 1, 3)), 225: array('i', (2, 0, 1, 3)), 226: array('i', (1, 0, 1, 3)), 227: array('i', (3, 0, 1, 3)),
-   228: array('i', (0, 2, 1, 3)), 229: array('i', (2, 2, 1, 3)), 230: array('i', (1, 2, 1, 3)), 231: array('i', (3, 2, 1, 3)),
-   232: array('i', (0, 1, 1, 3)), 233: array('i', (2, 1, 1, 3)), 234: array('i', (1, 1, 1, 3)), 235: array('i', (3, 1, 1, 3)),
-   236: array('i', (0, 3, 1, 3)), 237: array('i', (2, 3, 1, 3)), 238: array('i', (1, 3, 1, 3)), 239: array('i', (3, 3, 1, 3)),
-   240: array('i', (0, 0, 3, 3)), 241: array('i', (2, 0, 3, 3)), 242: array('i', (1, 0, 3, 3)), 243: array('i', (3, 0, 3, 3)),
-   244: array('i', (0, 2, 3, 3)), 245: array('i', (2, 2, 3, 3)), 246: array('i', (1, 2, 3, 3)), 247: array('i', (3, 2, 3, 3)),
-   248: array('i', (0, 1, 3, 3)), 249: array('i', (2, 1, 3, 3)), 250: array('i', (1, 1, 3, 3)), 251: array('i', (3, 1, 3, 3)),
-   252: array('i', (0, 3, 3, 3)), 253: array('i', (2, 3, 3, 3)), 254: array('i', (1, 3, 3, 3)), 255: array('i', (3, 3, 3, 3)),
-   }
-
-GCODE_TO_INT = dict([(tuple(v),k) for (k,v) in INT_TO_GCODE.items()])
-
-### Exceptions
-class DuplicateMarkerInMapFile(Exception): pass
-class MapLineTooShort(Exception): pass
-class ThirdAllele(Exception): pass
-class PedError(Exception): pass
-class BadMagic(Exception):
-    """ Raised when one of the MAGIC bytes in a bed file does not match
-    """
-    pass
-class BedError(Exception):
-    """ Raised when parsing a bed file runs into problems
-    """
-    pass
-class UnknownGenocode(Exception):
-    """ Raised when we get a 2-bit genotype that is undecipherable (is it possible?)
-    """
-    pass
-class UnknownGeno(Exception): pass
-
-### Utility functions
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-def ceiling(n, k):
-    ''' Return the least multiple of k which is greater than n
-    '''
-    m = n % k
-    if m == 0:
-        return n
-    else:
-        return n + k - m
-
-def nbytes(n):
-    ''' Return the number of bytes required for n subjects
-    '''
-    return 2*ceiling(n, 4)/8
-
-### Primary module functionality
-class LPed:
-    """ The uber-class for processing the Linkage-format *.ped/*.map files
-    """
-    def __init__(self,  base):
-        self.base = base
-        self._ped = Ped('%s.ped' % (self.base))
-        self._map = Map('%s.map' % (self.base))
-
-        self._markers = {}
-        self._ordered_markers = []
-        self._marker_allele_lookup = {}
-        self._autosomal_indices = set()
-
-        self._subjects = {}
-        self._ordered_subjects = []
-
-        self._genotypes = []
-
-    def parse(self):
-        """
-        """
-        if VERBOSE: print 'plinkbinJZ: Analysis started: %s' % (timenow())
-        self._map.parse()
-        self._markers = self._map._markers
-        self._ordered_markers = self._map._ordered_markers
-        self._autosomal_indices = self._map._autosomal_indices
-
-        self._ped.parse(self._ordered_markers)
-        self._subjects = self._ped._subjects
-        self._ordered_subjects = self._ped._ordered_subjects
-        self._genotypes = self._ped._genotypes
-        self._marker_allele_lookup = self._ped._marker_allele_lookup
-
-        ### Adjust self._markers based on the allele information
-        ### we got from parsing the ped file
-        for m,  name in enumerate(self._ordered_markers):
-            a1,  a2 = self._marker_allele_lookup[m][HET]
-            self._markers[name][-2] = a1
-            self._markers[name][-1] = a2
-        if VERBOSE: print 'plinkbinJZ: Analysis finished: %s' % (timenow())
-
-    def getSubjectInfo(self, fid, oiid):
-        """
-        """
-        return self._subject_info[(fid, oiid)]
-
-    def getSubjectInfoByLine(self, line):
-        """
-        """
-        return self._subject_info[self._ordered_subjects[line]]
-
-    def getGenotypesByIndices(self, s, mlist, format):
-        """ needed for grr if lped - deprecated but..
-        """
-        mlist = dict(zip(mlist,[True,]*len(mlist))) # hash quicker than 'in' ?
-        raw_array = array('i', [row[s] for m,row in enumerate(self._genotypes) if mlist.get(m,None)])
-        if format == 'raw':
-            return raw_array
-        elif format == 'ref':
-            result = array('i', [0]*len(mlist))
-            for m, gcode in enumerate(raw_array):
-                if gcode == HOM0:
-                    nref = 3
-                elif gcode == HET:
-                    nref = 2
-                elif gcode == HOM1:
-                    nref = 1
-                else:
-                    nref = 0
-                result[m] = nref
-            return result
-        else:
-            result = []
-            for m, gcode in enumerate(raw_array):
-                result.append(self._marker_allele_lookup[m][gcode])
-            return result
-
-    def writebed(self, base):
-        """
-        """
-        dst_name = '%s.fam' % (base)
-        print 'Writing pedigree information to [ %s ]' % (dst_name)
-        dst = open(dst_name, 'w')
-        for skey in self._ordered_subjects:
-            (fid, iid, did, mid, sex, phe, sid, d_sid, m_sid) = self._subjects[skey]
-            dst.write('%s %s %s %s %s %s\n' % (fid, iid, did, mid, sex, phe))
-        dst.close()
-
-        dst_name = '%s.bim' % (base)
-        print 'Writing map (extended format) information to [ %s ]' % (dst_name)
-        dst = open(dst_name, 'w')
-        for m, marker in enumerate(self._ordered_markers):
-            chrom, name, genpos, abspos,  a1,  a2 = self._markers[marker]
-            dst.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (chrom, name, genpos, abspos, a1, a2))
-        dst.close()
-
-        bed_name = '%s.bed' % (base)
-        print 'Writing genotype bitfile to [ %s ]' % (bed_name)
-        print 'Using (default) SNP-major mode'
-        bed = open(bed_name, 'w')
-
-        ### Write the 3 header bytes
-        bed.write(struct.pack('B', int(''.join(reversed(MAGIC_BYTE1)), 2)))
-        bed.write(struct.pack('B', int(''.join(reversed(MAGIC_BYTE2)), 2)))
-        bed.write(struct.pack('B', int(''.join(reversed(FORMAT_SNP_MAJOR_BYTE)), 2)))
-
-        ### Calculate how many "pad bits" we should add after the last subject
-        nsubjects = len(self._ordered_subjects)
-        nmarkers = len(self._ordered_markers)
-        total_bytes = nbytes(nsubjects)
-        nbits = nsubjects  * 2
-        pad_nibbles = ((total_bytes * 8) - nbits)/2
-        pad = array('i', [0]*pad_nibbles)
-
-        ### And now write genotypes to the file
-        for m in xrange(nmarkers):
-            geno = self._genotypes[m]
-            geno.extend(pad)
-            bytes = len(geno)/4
-            for b in range(bytes):
-                idx = b*4
-                gcode = tuple(geno[idx:idx+4])
-                try:
-                    byte = struct.pack('B', GCODE_TO_INT[gcode])
-                except KeyError:
-                    print m, b, gcode
-                    raise
-                bed.write(byte)
-        bed.close()
-
-    def autosomal_indices(self):
-        """ Return the indices of markers in this ped/map that are autosomal.
-            This is used by rgGRR so that it can select a random set of markers
-            from the autosomes (sex chroms screw up the plot)
-        """
-        return self._autosomal_indices
-
-class Ped:
-    def __init__(self, path):
-        self.path = path
-        self._subjects = {}
-        self._ordered_subjects = []
-        self._genotypes = []
-        self._marker_allele_lookup = {}
-
-    def lineCount(self,infile):
-        """ count the number of lines in a file - efficiently using wget
-        """
-        return int(commands.getoutput('wc -l %s' % (infile)).split()[0])
-
-
-    def parse(self,  markers):
-        """ Parse a given file -- this needs to be memory-efficient so that large
-            files can be parsed (~1 million markers on ~5000 subjects?).  It
-            should also be fast, if possible.
-        """
-
-        ### Find out how many lines are in the file so we can ...
-        nsubjects = self.lineCount(self.path)
-        ### ... Pre-allocate the genotype arrays
-        nmarkers = len(markers)
-        _marker_alleles = [['0', '0'] for _ in xrange(nmarkers)]
-        self._genotypes = [array('i', [-1]*nsubjects) for _ in xrange(nmarkers)]
-
-        if self.path.endswith('.gz'):
-            pfile = gzip.open(self.path, 'r')
-        else:
-            pfile = open(self.path, 'r')
-
-        for s, line in enumerate(pfile):
-            line = line.strip()
-            if not line:
-                continue
-
-            fid, iid, did, mid, sex, phe, genos = line.split(None, 6)
-            sid = iid.split('.')[0]
-            d_sid = did.split('.')[0]
-            m_sid = mid.split('.')[0]
-
-            skey = (fid, iid)
-            self._subjects[skey] = (fid, iid, did, mid, sex, phe, sid, d_sid, m_sid)
-            self._ordered_subjects.append(skey)
-
-            genotypes = genos.split()
-
-            for m, marker in enumerate(markers):
-                idx = m*2
-                a1, a2 = genotypes[idx:idx+2] # Alleles for subject s, marker m
-                s1, s2 = seen = _marker_alleles[m] # Alleles seen for marker m
-
-                ### FIXME: I think this can still be faster, and simpler to read
-                # Two pieces of logic intertwined here:  first, we need to code
-                # this genotype as HOM0, HOM1, HET or MISS.  Second, we need to
-                # keep an ongoing record of the genotypes seen for this marker
-                if a1 == a2:
-                    if a1 in MISSING_ALLELES:
-                        geno = MISS_GENO
-                    else:
-                        if s1 == '0':
-                            seen[0] = a1
-                        elif s1 == a1 or s2 == a2:
-                            pass
-                        elif s2 == '0':
-                            seen[1] = a1
-                        else:
-                            raise ThirdAllele('a1=a2=%s, seen=%s?' % (a1, str(seen)))
-
-                        if a1 == seen[0]:
-                            geno = HOM0_GENO
-                        elif a1 == seen[1]:
-                            geno = HOM1_GENO
-                        else:
-                            raise PedError('Cannot assign geno for a1=a2=%s from seen=%s' % (a1, str(seen)))
-                elif a1 in MISSING_ALLELES or a2 in MISSING_ALLELES:
-                    geno = MISS_GENO
-                else:
-                    geno = HET_GENO
-                    if s1 == '0':
-                        seen[0] = a1
-                        seen[1] = a2
-                    elif s2 == '0':
-                        if s1 == a1:
-                            seen[1] = a2
-                        elif s1 == a2:
-                            seen[1] = a1
-                        else:
-                            raise ThirdAllele('a1=%s, a2=%s, seen=%s?' % (a1, a2, str(seen)))
-                    else:
-                        if sorted(seen) != sorted((a1, a2)):
-                            raise ThirdAllele('a1=%s, a2=%s, seen=%s?' % (a1, a2, str(seen)))
-
-                gcode = GENO_TO_GCODE.get(geno, None)
-                if gcode is None:
-                    raise UnknownGeno(str(geno))
-                self._genotypes[m][s] = gcode
-
-        # Build the _marker_allele_lookup table
-        for m,  alleles in enumerate(_marker_alleles):
-            if len(alleles) == 2:
-                a1,  a2 = alleles
-            elif len(alleles) == 1:
-                a1 = alleles[0]
-                a2 = '0'
-            else:
-                print 'All alleles blank for %s: %s' % (m,  str(alleles))
-                raise
-
-            self._marker_allele_lookup[m] = {
-                HOM0: (a2, a2),
-                HOM1: (a1, a1),
-                HET : (a1, a2),
-                MISS: ('0','0'),
-                }
-
-        if VERBOSE: print '%s(%s) individuals read from [ %s ]' % (len(self._subjects),  nsubjects,  self.path)
-
-class Map:
-    def __init__(self, path=None):
-        self.path = path
-        self._markers = {}
-        self._ordered_markers = []
-        self._autosomal_indices = set()
-
-    def __len__(self):
-        return len(self._markers)
-
-    def parse(self):
-        """ Parse a Linkage-format map file
-        """
-        if self.path.endswith('.gz'):
-            fh = gzip.open(self.path, 'r')
-        else:
-            fh = open(self.path, 'r')
-
-        for i, line in enumerate(fh):
-            line = line.strip()
-            if not line:
-                continue
-
-            fields = line.split()
-            if len(fields) < 4:
-                raise MapLineTooShort(MAP_LINE_EXCEPTION_TEXT % (str(line),  len(fields)))
-            else:
-                chrom, name, genpos, abspos = fields
-            if name in self._markers:
-                raise DuplicateMarkerInMapFile('Marker %s was found twice in map file %s' % (name, self.path))
-            abspos = int(abspos)
-            if abspos < 0:
-                continue
-            if chrom in AUTOSOMES:
-                self._autosomal_indices.add(i)
-            chrom = CHROM_REPLACE.get(chrom, chrom)
-            self._markers[name] = [chrom, name, genpos, abspos,  None,  None]
-            self._ordered_markers.append(name)
-        fh.close()
-        if VERBOSE: print '%s (of %s) markers to be included from [ %s ]' % (len(self._ordered_markers),  i,  self.path)
-
-class BPed:
-    """ The uber-class for processing Plink's Binary Ped file format *.bed/*.bim/*.fam
-    """
-    def __init__(self,  base):
-        self.base = base
-        self._bed = Bed('%s.bed' % (self.base))
-        self._bim = Bim('%s.bim' % (self.base))
-        self._fam = Fam('%s.fam' % (self.base))
-
-        self._markers = {}
-        self._ordered_markers = []
-        self._marker_allele_lookup = {}
-        self._autosomal_indices = set()
-
-        self._subjects = {}
-        self._ordered_subjects = []
-
-        self._genotypes = []
-
-    def parse(self,  quick=False):
-        """
-        """
-        self._quick = quick
-
-        self._bim.parse()
-        self._markers = self._bim._markers
-        self._ordered_markers = self._bim._ordered_markers
-        self._marker_allele_lookup = self._bim._marker_allele_lookup
-        self._autosomal_indices = self._bim._autosomal_indices
-
-        self._fam.parse()
-        self._subjects = self._fam._subjects
-        self._ordered_subjects = self._fam._ordered_subjects
-
-        self._bed.parse(self._ordered_subjects,  self._ordered_markers,  quick=quick)
-        self._bedf = self._bed._fh
-        self._genotypes = self._bed._genotypes
-        self.nsubjects = len(self._ordered_subjects)
-        self.nmarkers = len(self._ordered_markers)
-        self._bytes_per_marker = nbytes(self.nsubjects)
-
-    def writeped(self, path=None):
-        """
-        """
-        path = self.path = path or self.path
-
-        map_name = self.path.replace('.bed', '.map')
-        print 'Writing map file [ %s ]' % (map_name)
-        dst = open(map_name, 'w')
-        for m in self._ordered_markers:
-            chrom, snp, genpos, abspos, a1, a2 = self._markers[m]
-            dst.write('%s\t%s\t%s\t%s\n' % (chrom, snp, genpos, abspos))
-        dst.close()
-
-        ped_name = self.path.replace('.bed', '.ped')
-        print 'Writing ped file [ %s ]' % (ped_name)
-        ped = open(ped_name, 'w')
-        firstyikes = False
-        for s, skey in enumerate(self._ordered_subjects):
-            idx = s*2
-            (fid, iid, did, mid, sex, phe, oiid, odid, omid) = self._subjects[skey]
-            ped.write('%s %s %s %s %s %s' % (fid, iid, odid, omid, sex, phe))
-            genotypes_for_subject = self.getGenotypesForSubject(s)
-            for m, snp in enumerate(self._ordered_markers):
-                #a1, a2 = self.getGenotypeByIndices(s, m)
-                a1,a2 = genotypes_for_subject[m]
-                ped.write(' %s %s' % (a1, a2))
-            ped.write('\n')
-        ped.close()
-
-    def getGenotype(self, subject, marker):
-        """ Retrieve a genotype for a particular subject/marker pair
-        """
-        m = self._ordered_markers.index(marker)
-        s = self._ordered_subjects.index(subject)
-        return self.getGenotypeByIndices(s, m)
-
-    def getGenotypesForSubject(self, s, raw=False):
-        """ Returns list of genotypes for all m markers
-            for subject s.  If raw==True, then an array
-            of raw integer gcodes is returned instead
-        """
-        if self._quick:
-            nmarkers = len(self._markers)
-            raw_array = array('i', [0]*nmarkers)
-            seek_nibble = s % 4
-            for m in xrange(nmarkers):
-                seek_byte = m * self._bytes_per_marker + s/4 + HEADER_LENGTH
-                self._bedf.seek(seek_byte)
-                geno = struct.unpack('B', self._bedf.read(1))[0]
-                quartet = INT_TO_GCODE[geno]
-                gcode = quartet[seek_nibble]
-                raw_array[m] = gcode
-        else:
-            raw_array = array('i', [row[s] for row in self._genotypes])
-
-        if raw:
-            return raw_array
-        else:
-            result = []
-            for m, gcode in enumerate(raw_array):
-                result.append(self._marker_allele_lookup[m][gcode])
-            return result
-
-    def getGenotypeByIndices(self, s, m):
-        """
-        """
-        if self._quick:
-            # Determine which byte we need to seek to, and
-            # which nibble within the byte we need
-            seek_byte = m * self._bytes_per_marker + s/4 + HEADER_LENGTH
-            seek_nibble = s % 4
-            self._bedf.seek(seek_byte)
-            geno = struct.unpack('B', self._bedf.read(1))[0]
-            quartet = INT_TO_GCODE[geno]
-            gcode = quartet[seek_nibble]
-        else:
-            # Otherwise, just grab the genotypes from the
-            # list of arrays
-            genos_for_marker = self._genotypes[m]
-            gcode = genos_for_marker[s]
-
-        return self._marker_allele_lookup[m][gcode]
-
-    def getGenotypesByIndices(self, s, mlist, format):
-        """
-        """
-        if self._quick:
-            raw_array = array('i', [0]*len(mlist))
-            seek_nibble = s % 4
-            for i,m in enumerate(mlist):
-                seek_byte = m * self._bytes_per_marker + s/4 + HEADER_LENGTH
-                self._bedf.seek(seek_byte)
-                geno = struct.unpack('B', self._bedf.read(1))[0]
-                quartet = INT_TO_GCODE[geno]
-                gcode = quartet[seek_nibble]
-                raw_array[i] = gcode
-            mlist = set(mlist)
-        else:
-            mlist = set(mlist)
-            raw_array = array('i', [row[s] for m,row in enumerate(self._genotypes) if m in mlist])
-
-        if format == 'raw':
-            return raw_array
-        elif format == 'ref':
-            result = array('i', [0]*len(mlist))
-            for m, gcode in enumerate(raw_array):
-                if gcode == HOM0:
-                    nref = 3
-                elif gcode == HET:
-                    nref = 2
-                elif gcode == HOM1:
-                    nref = 1
-                else:
-                    nref = 0
-                result[m] = nref
-            return result
-        else:
-            result = []
-            for m, gcode in enumerate(raw_array):
-                result.append(self._marker_allele_lookup[m][gcode])
-            return result
-
-    def getSubject(self, s):
-        """
-        """
-        skey = self._ordered_subjects[s]
-        return self._subjects[skey]
-
-    def autosomal_indices(self):
-        """ Return the indices of markers in this ped/map that are autosomal.
-            This is used by rgGRR so that it can select a random set of markers
-            from the autosomes (sex chroms screw up the plot)
-        """
-        return self._autosomal_indices
-
-class Bed:
-
-    def __init__(self, path):
-        self.path = path
-        self._genotypes = []
-        self._fh = None
-
-    def parse(self, subjects,  markers,  quick=False):
-        """ Parse the bed file, indicated either by the path parameter,
-            or as the self.path indicated in __init__.  If quick is
-            True, then just parse the bim and fam, then genotypes will
-            be looked up dynamically by indices
-        """
-        self._quick = quick
-
-        ordered_markers = markers
-        ordered_subjects = subjects
-        nsubjects = len(ordered_subjects)
-        nmarkers = len(ordered_markers)
-
-        bed = open(self.path, 'rb')
-        self._fh = bed
-
-        byte1 = bed.read(1)
-        byte2 = bed.read(1)
-        byte3 = bed.read(1)
-        format_flag = struct.unpack('B', byte3)[0]
-
-        h1 = tuple(INT_TO_GCODE[struct.unpack('B', byte1)[0]])
-        h2 = tuple(INT_TO_GCODE[struct.unpack('B', byte2)[0]])
-        h3 = tuple(INT_TO_GCODE[format_flag])
-
-        if h1 != MAGIC1 or h2 != MAGIC2:
-            raise BadMagic('One or both MAGIC bytes is wrong: %s==%s or %s==%s' % (h1, MAGIC1, h2, MAGIC2))
-        if format_flag:
-            print 'Detected that binary PED file is v1.00 SNP-major mode (%s, "%s")\n' % (format_flag, h3)
-        else:
-            raise 'BAD_FORMAT_FLAG? (%s, "%s")\n' % (format_flag, h3)
-
-        print 'Parsing binary ped file for %s markers and %s subjects' % (nmarkers, nsubjects)
-
-        ### If quick mode was specified, we're done ...
-        self._quick = quick
-        if quick:
-            return
-
-        ### ... Otherwise, parse genotypes into an array, and append that
-        ### array to self._genotypes
-        ngcodes = ceiling(nsubjects, 4)
-        bytes_per_marker = nbytes(nsubjects)
-        for m in xrange(nmarkers):
-            genotype_array = array('i', [-1]*(ngcodes))
-            for byte in xrange(bytes_per_marker):
-                intval = struct.unpack('B', bed.read(1))[0]
-                idx = byte*4
-                genotype_array[idx:idx+4] = INT_TO_GCODE[intval]
-            self._genotypes.append(genotype_array)
-
-class Bim:
-    def __init__(self, path):
-        """
-        """
-        self.path = path
-        self._markers = {}
-        self._ordered_markers = []
-        self._marker_allele_lookup = {}
-        self._autosomal_indices = set()
-
-    def parse(self):
-        """
-        """
-        print 'Reading map (extended format) from [ %s ]' % (self.path)
-        bim = open(self.path, 'r')
-        for m, line in enumerate(bim):
-            chrom, snp, gpos, apos, a1, a2 = line.strip().split()
-            self._markers[snp] = (chrom, snp, gpos, apos, a1, a2)
-            self._marker_allele_lookup[m] = {
-                HOM0: (a2, a2),
-                HOM1: (a1, a1),
-                HET : (a1, a2),
-                MISS: ('0','0'),
-                }
-            self._ordered_markers.append(snp)
-            if chrom in AUTOSOMES:
-                self._autosomal_indices.add(m)
-        bim.close()
-        print '%s markers to be included from [ %s ]' % (m+1, self.path)
-
-class Fam:
-    def __init__(self, path):
-        """
-        """
-        self.path = path
-        self._subjects = {}
-        self._ordered_subjects = []
-
-    def parse(self):
-        """
-        """
-        print 'Reading pedigree information from [ %s ]' % (self.path)
-        fam = open(self.path, 'r')
-        for s, line in enumerate(fam):
-            fid, iid, did, mid, sex, phe = line.strip().split()
-            sid = iid.split('.')[0]
-            d_sid = did.split('.')[0]
-            m_sid = mid.split('.')[0]
-            skey = (fid, iid)
-            self._ordered_subjects.append(skey)
-            self._subjects[skey] = (fid, iid, did, mid, sex, phe, sid, d_sid, m_sid)
-        fam.close()
-        print '%s individuals read from [ %s ]' % (s+1, self.path)
-
-### Command-line functionality and testing
-def test(arg):
-    '''
-    '''
-
-    import time
-
-    if arg == 'CAMP_AFFY.ped':
-        print 'Testing bed.parse(quick=True)'
-        s = time.time()
-        bed = Bed(arg.replace('.ped', '.bed'))
-        bed.parse(quick=True)
-        print bed.getGenotype(('400118', '10300283'), 'rs2000467')
-        print bed.getGenotype(('400118', '10101384'), 'rs2294019')
-        print bed.getGenotype(('400121', '10101149'), 'rs2294019')
-        print bed.getGenotype(('400123', '10200290'), 'rs2294019')
-        assert bed.getGenotype(('400118', '10101384'), 'rs2294019') == ('4','4')
-        e = time.time()
-        print 'e-s = %s\n' % (e-s)
-
-    print 'Testing bed.parse'
-    s = time.time()
-    bed = BPed(arg)
-    bed.parse(quick=False)
-    e = time.time()
-    print 'e-s = %s\n' % (e-s)
-
-    print 'Testing bed.writeped'
-    s = time.time()
-    outname = '%s_BEDTEST' % (arg)
-    bed.writeped(outname)
-    e = time.time()
-    print 'e-s = %s\n' % (e-s)
-    del(bed)
-
-    print 'Testing ped.parse'
-    s = time.time()
-    ped = LPed(arg)
-    ped.parse()
-    e = time.time()
-    print 'e-s = %s\n' % (e-s)
-
-    print 'Testing ped.writebed'
-    s = time.time()
-    outname = '%s_PEDTEST' % (arg)
-    ped.writebed(outname)
-    e = time.time()
-    print 'e-s = %s\n' % (e-s)
-    del(ped)
-
-def profile_bed(arg):
-    """
-    """
-    bed = BPed(arg)
-    bed.parse(quick=False)
-    outname = '%s_BEDPROFILE' % (arg)
-    bed.writeped(outname)
-
-def profile_ped(arg):
-    """
-    """
-    ped = LPed(arg)
-    ped.parse()
-    outname = '%s_PEDPROFILE' % (arg)
-    ped.writebed(outname)
-
-if __name__ == '__main__':
-    """ Run as a command-line, this script should get one or more arguments,
-        each one a ped file to be parsed with the PedParser (unit tests?)
-    """
-    op = optparse.OptionParser()
-    op.add_option('--profile-bed', action='store_true', default=False)
-    op.add_option('--profile-ped', action='store_true', default=False)
-    opts, args = op.parse_args()
-
-    if opts.profile_bed:
-        import profile
-        import pstats
-        profile.run('profile_bed(args[0])', 'fooprof')
-        p = pstats.Stats('fooprof')
-        p.sort_stats('cumulative').print_stats(10)
-    elif opts.profile_ped:
-        import profile
-        import pstats
-        profile.run('profile_ped(args[0])', 'fooprof')
-        p = pstats.Stats('fooprof')
-        p.sort_stats('cumulative').print_stats(10)
-    else:
-        for arg in args:
-            test(arg)
-
-    ### Code used to generate the INT_TO_GCODE dictionary
-    #print '{\n  ',
-    #for i in range(256):
-    #   b = INT2BIN[i]
-    #    ints = []
-    #    s = str(i).rjust(3)
-    #    #print b
-    #    for j in range(4):
-    #        idx = j*2
-    #        #print i, j, idx, b[idx:idx+2], int(b[idx:idx+2], 2)
-    #        ints.append(int(b[idx:idx+2], 2))
-    #    print '%s: array(\'i\', %s),' % (s,tuple(ints)),
-    #    if i > 0 and (i+1) % 4 == 0:
-    #        print '\n  ',
-    #print '}'
-
-
Binary file tools/rgenetics/plinkbinJZ.pyc has changed
--- a/tools/rgenetics/rgCaCo.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,271 +0,0 @@
-#!/usr/local/bin/python
-# hack to run and process a plink case control association
-# expects args as
-# bfilepath outname jobname outformat (wig,xls)
-# ross lazarus
-# for wig files, we need annotation so look for map file or complain
-"""
-Parameters for wiggle track definition lines
-All options are placed in a single line separated by spaces:
-
-  track type=wiggle_0 name=track_label description=center_label \
-        visibility=display_mode color=r,g,b altColor=r,g,b \
-        priority=priority autoScale=on|off \
-        gridDefault=on|off maxHeightPixels=max:default:min \
-        graphType=bar|points viewLimits=lower:upper \
-        yLineMark=real-value yLineOnOff=on|off \
-        windowingFunction=maximum|mean|minimum smoothingWindow=off|2-16
-"""
-
-import sys,math,shutil,subprocess,os,time,tempfile,string
-from os.path import abspath
-from rgutils import timenow, plinke
-imagedir = '/static/rg' # if needed for images
-myversion = 'V000.1 April 2007'
-verbose = False
-
-def makeGFF(resf='',outfname='',logf=None,twd='.',name='track name',description='track description',topn=1000):
-    """
-    score must be scaled to 0-1000
-
-    Want to make some wig tracks from each analysis
-    Best n -log10(p). Make top hit the window.
-    we use our tab output which has
-    rs	chrom	offset	ADD_stat	ADD_p	ADD_log10p
-    rs3094315	1	792429	1.151	0.2528	0.597223
-
-    """
-
-    def is_number(s):
-        try:
-            float(s)
-            return True
-        except ValueError:
-            return False
-    header = 'track name=%s description="%s" visibility=2 useScore=1 color=0,60,120\n' % (name,description)
-    column_names = [ 'Seqname', 'Source', 'Feature', 'Start', 'End', 'Score', 'Strand', 'Frame', 'Group' ]
-    halfwidth=100
-    resfpath = os.path.join(twd,resf)
-    resf = open(resfpath,'r')
-    resfl = resf.readlines() # dumb but convenient for millions of rows
-    resfl = [x.split() for x in resfl]
-    headl = resfl[0]
-    resfl = resfl[1:]
-    headl = [x.strip().upper() for x in headl]
-    headIndex = dict(zip(headl,range(0,len(headl))))
-    whatwewant = ['CHR','RS','OFFSET','LOG10ARMITAGEP']
-    wewant = [headIndex.get(x,None) for x in whatwewant]
-    if None in wewant: # missing something
-       logf.write('### Error missing a required header from %s in makeGFF - headIndex=%s\n' % (whatwewant,headIndex))
-       return
-    ppos = wewant[3] # last in list
-    resfl = [x for x in resfl if x[ppos] > '' and x[ppos] <> 'NA']
-    resfl = [(float(x[ppos]),x) for x in resfl] # decorate
-    resfl.sort()
-    resfl.reverse() # using -log10 so larger is better
-    pvals = [x[0] for x in resfl] # need to scale
-    resfl = [x[1] for x in resfl] # drop decoration
-    resfl = resfl[:topn] # truncate
-    maxp = max(pvals) # need to scale
-    minp = min(pvals)
-    prange = abs(maxp-minp) + 0.5 # fudge
-    scalefact = 1000.0/prange
-    logf.write('###maxp=%f,minp=%f,prange=%f,scalefact=%f\n' % (maxp,minp,prange,scalefact))
-    for i,row in enumerate(resfl):
-        row[ppos] = '%d' % (int(scalefact*pvals[i]))
-        resfl[i] = row # replace
-    outf = file(outfname,'w')
-    outf.write(header)
-    outres = [] # need to resort into chrom offset order
-    for i,lrow in enumerate(resfl):
-        chrom,snp,offset,p, = [lrow[x] for x in wewant]
-        gff = ('chr%s' % chrom,'rgCaCo','variation','%d' % (int(offset)-halfwidth),
-               '%d' % (int(offset)+halfwidth),p,'.','.','%s logp=%1.2f' % (snp,pvals[i]))
-        outres.append(gff)
-    outres = [(x[0],int(x[3]),x) for x in outres] # decorate
-    outres.sort() # into chrom offset
-    outres=[x[2] for x in outres] # undecorate
-    outres = ['\t'.join(x) for x in outres]
-    outf.write('\n'.join(outres))
-    outf.write('\n')
-    outf.close()
-
-
-def plink_assocToGG(plinkout="hm",tag='test'):
-   """ plink --assoc output looks like this
-   #  CHR         SNP   A1      F_A      F_U   A2        CHISQ            P           OR
-   #   1   rs3094315    G   0.6685   0.1364    A        104.1    1.929e-24        12.77
-   # write as a genegraph input file
-   """
-   inf = file('%s.assoc' % plinkout,'r')
-   outf = file('%sassoc.xls' % plinkout,'w')
-   res = ['rs\tlog10p%s\tFakeInvOR%s\tRealOR%s' % (tag,tag,tag),] # output header for ucsc genome graphs
-   head = inf.next()
-   for l in inf:
-    ll = l.split()
-    if len(ll) >= 8:
-      p = float(ll[7])
-      if p <> 'NA': # eesh
-          logp = '%9.9f' % -math.log10(p)
-      else:
-          logp = 'NA'
-      try:
-         orat = ll[8]
-      except:
-         orat = 'NA'
-      orat2 = orat
-      # invert large negative odds ratios
-      if float(orat) < 1 and float(orat) > 0.0:
-         orat2 = '%9.9f' % (1.0/float(orat))
-      outl = [ll[1],logp, orat2, orat]
-      res.append('\t'.join(outl))
-   outf.write('\n'.join(res))
-   outf.write('\n')
-   outf.close()
-   inf.close()
-
-def xformModel(infname='',resf='',outfname='',
-               name='foo',mapf='/usr/local/galaxy/data/rg/ped/x.bim',flog=None):
-    """munge a plink .model file into either a ucsc track or an xls file
-    rerla@meme ~/plink]$ head hmYRI_CEU.model
-    CHR         SNP     TEST            AFF          UNAFF        CHISQ   DF            P
-    1   rs3094315     GENO       41/37/11        0/24/64           NA   NA           NA
-    1   rs3094315    TREND         119/59         24/152        81.05    1    2.201e-19
-    1   rs3094315  ALLELIC         119/59         24/152        104.1    1    1.929e-24
-    1   rs3094315      DOM          78/11          24/64           NA   NA           NA
-
-    bim file has
-[rerla@beast pbed]$ head plink_wgas1_example.bim
-1	rs3094315	0.792429	792429	G	A
-1	rs6672353	0.817376	817376	A	G
-    """
-    if verbose:
-        print 'Rgenetics rgCaCo.xformModel got resf=%s,  outfname=%s' % (resf,outfname)
-    res = []
-    rsdict = {}
-    map = file(mapf,'r')
-    for l in map: # plink map
-        ll = l.strip().split()
-        if len(ll) >= 3:
-            rs=ll[1].strip()
-            chrom = ll[0]
-            if chrom.lower() == 'x':
-                chrom='23'
-            elif chrom.lower() == 'y':
-                chrom = 24
-            elif chrom.lower() == 'mito':
-                chrom = 25
-            offset = ll[3]
-            rsdict[rs] = (chrom,offset)
-    res.append('rs\tChr\tOffset\tGenop\tlog10Genop\tArmitagep\tlog10Armitagep\tAllelep\tlog10Allelep\tDomp\tlog10Domp')
-    f = open(resf,'r')
-    headl = f.readline()
-    if headl.find('\t') <> -1:
-       headl = headl.split('\t')
-       delim = '\t'
-    else:
-       headl = headl.split()
-       delim = None
-    whatwewant = ['CHR','SNP','TEST','AFF','UNAFF','CHISQ','P']
-    wewant = [headl.index(x) for x in whatwewant]
-    llen = len(headl)
-    lnum = anum = 0
-    lastsnp = None # so we know when to write out a gg line
-    outl = {}
-    f.seek(0)
-    for lnum,l in enumerate(f):
-        if lnum == 0:
-            continue
-        ll = l.split()
-        if delim:
-           ll = l.split(delim)
-        if len(ll) >= llen: # valid line
-            chr,snp,test,naff,nuaff,chi,p = [ll[x] for x in wewant]
-            snp = snp.strip()
-            chrom,offset = rsdict.get(snp,(None,None))
-            anum += 1
-            fp = 1.0 # if NA
-            lp = 0.0
-            try:
-                fp = float(p)
-                if fp > 0:
-                  lp = -math.log10(fp)
-                else:
-                    fp = 9e-100
-                    flog.write('### WARNING - Plink calculated %s for %s p value!!! 9e-100 substituted!\n' % (p,test))
-                    flog.write('### offending line #%d in %s = %s' % (lnum,l))
-            except:
-                pass
-            if snp <> lastsnp:
-                if len(outl.keys()) > 3:
-                    sl = [outl.get(x,'?') for x in ('snp','chrom','offset','GENO','TREND','ALLELIC','DOM')]
-                    res.append('\t'.join(sl)) # last snp line
-                outl = {'snp':snp,'chrom':chrom,'offset':offset} # first 3 cols for gg line
-                lastsnp = snp # reset for next marker
-            #if p == 'NA':
-            #      p = 1.0
-            # let's pass downstream for handling R is fine?
-            outl[test] = '%s\t%f' % (p,lp)
-    if len(outl.keys()) > 3:
-        l = [outl.get(x,'?') for x in ('snp','chrom','offset','GENO','TREND','ALLELIC','DOM')]
-        res.append('\t'.join(l)) # last snp line
-    f = file(outfname,'w')
-    res.append('')
-    f.write('\n'.join(res))
-    f.close()
-
-
-
-
-if __name__ == "__main__":
-    """
-    # called as
-    <command interpreter="python">
-        rgCaCo.py '$i.extra_files_path/$i.metadata.base_name' "$name"
-        '$out_file1' '$logf' '$logf.files_path' '$gffout'
-    </command>    </command>
-    """
-    if len(sys.argv) < 7:
-       s = 'rgCaCo.py needs 6 params - got %s \n' % (sys.argv)
-       print >> sys.stdout, s
-       sys.exit(0)
-    bfname = sys.argv[1]
-    name = sys.argv[2]
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    name = name.translate(trantab)
-    outfname = sys.argv[3]
-    logf = sys.argv[4]
-    logoutdir = sys.argv[5]
-    gffout = sys.argv[6]
-    topn = 1000
-    try:
-        os.makedirs(logoutdir)
-    except:
-        pass
-    map_file = None
-    me = sys.argv[0]
-    amapf = '%s.bim' % bfname # to decode map in xformModel
-    flog = file(logf,'w')
-    logme = []
-    cdir = os.getcwd()
-    s = 'Rgenetics %s http://rgenetics.org Galaxy Tools, rgCaCo.py started %s\n' % (myversion,timenow())
-    print >> sys.stdout, s # so will appear as blurb for file
-    logme.append(s)
-    if verbose:
-        s = 'rgCaCo.py:  bfname=%s, logf=%s, argv = %s\n' % (bfname, logf, sys.argv)
-        print >> sys.stdout, s # so will appear as blurb for file
-        logme.append(s)
-    twd = tempfile.mkdtemp(suffix='rgCaCo') # make sure plink doesn't spew log file into the root!
-    tname = os.path.join(twd,name)
-    vcl = [plinke,'--noweb','--bfile',bfname,'--out',name,'--model']
-    p=subprocess.Popen(' '.join(vcl),shell=True,stdout=flog,cwd=twd)
-    retval = p.wait()
-    resf = '%s.model' % tname # plink output is here we hope
-    xformModel(bfname,resf,outfname,name,amapf,flog) # leaves the desired summary file
-    makeGFF(resf=outfname,outfname=gffout,logf=flog,twd=twd,name='rgCaCo_TopTable',description=name,topn=topn)
-    flog.write('\n'.join(logme))
-    flog.close() # close the log used
-    #shutil.copytree(twd,logoutdir)
-    shutil.rmtree(twd) # clean up
-
--- a/tools/rgenetics/rgCaCo.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-<tool id="rgCaCo1" name="Case Control:">
-    <description>for unrelated subjects</description>
-    <command interpreter="python">
-        rgCaCo.py '$i.extra_files_path/$i.metadata.base_name' "$title"  '$out_file1' '$logf' '$logf.files_path' '$gffout'
-    </command>
-    <inputs>
-      <param name="i"  type="data" label="RGenetics genotype data from your current history"
-      format="pbed" />
-       <param name='title' type='text' size="132" value='CaseControl' label="Title for this job"/>
-
-    </inputs>
-
-   <outputs>
-       <data format="tabular" name="out_file1" label="${title}_rgCaCo.xls" />
-       <data format="txt" name="logf" label="${title}_rgCaCo.log"/>
-       <data format="gff" name="gffout" label="${title}_rgCaCoTop.gff" />
-   </outputs>
-<tests>
- <test>
- <param name='i' value='tinywga' ftype='pbed' >
-   <metadata name='base_name' value='tinywga' />
-   <composite_data value='tinywga.bim' />
-   <composite_data value='tinywga.bed' />
-   <composite_data value='tinywga.fam' />
-   <edit_attributes type='name' value='tinywga' />
- </param>
- <param name='title' value='rgCaCotest1' />
- <output name='out_file1' file='rgCaCotest1_CaCo.xls' ftype='tabular' compare='diff' />
- <output name='logf' file='rgCaCotest1_CaCo_log.txt' ftype='txt' compare='diff' lines_diff='20' />
- <output name='gffout' file='rgCaCotest1_CaCo_topTable.gff' ftype='gff' compare='diff' />
- </test>
-</tests>
-<help>
-
-.. class:: infomark
-
-**Syntax**
-
-- **Genotype file** is the input case control data chosen from available library Plink binary files
-- **Map file** is the linkage format .map file corresponding to the genotypes in the Genotype file
-- **Type of test** is the kind of test statistic to report such as Armitage trend test or genotype test
-- **Format** determines how your data will be returned to your Galaxy workspace
-
------
-
-**Summary**
-
-This tool will perform some standard statistical tests comparing subjects designated as
-affected (cases) and unaffected subjects (controls). To avoid bias, it is important that
-controls who had been affected would have been eligible for sampling as cases. This may seem
-odd, but it requires that the cases and controls are drawn from the same sampling frame.
-
-The armitage trend test is robust to departure from HWE and so very attractive - after all, a real disease
-mutation may well result in distorted HWE at least in cases. All the others are susceptible to
-bias in the presence of HWE departures.
-
-All of these tests are exquisitely sensitive to non-differential population stratification in cases
-compared to controls and this must be tested before believing any results here. Use the PCA method for
-100k markers or more.
-
-If you don't see the genotype data set you want here, it can be imported using one of the methods available from
-the Galaxy Get Data tool page.
-
-Output format can be UCSC .bed if you want to see your
-results as a fully fledged UCSC track. A map file containing the chromosome and offset for each marker is required for
-writing this kind of output.
-Alternatively you can use .gg for the UCSC Genome Graphs tool which has all of the advantages
-of the the .bed track, plus a neat, visual front end that displays a lot of useful clues.
-Either of these are a very useful way of quickly getting a look
-at your data in full genomic context.
-
-Finally, if you can't live without
-spreadsheet data, choose the .xls tab delimited format. It's not a stupid binary excel file. Just a plain old tab delimited
-one with a header. Fortunately excel is dumb enough to open these without much protest.
-
-
------
-
-.. class:: infomark
-
-**Attribution**
-
-This Galaxy tool relies on Plink (see Plinksrc_) to test Casae Control association models.
-
-So, we rely on the author (Shaun Purcell) for the documentation you need specific to those settings - they are very nicely documented - see
-DOC_
-
-Tool and Galaxy datatypes originally designed and written for the Rgenetics
-series of whole genome scale statistical genetics tools by ross lazarus (ross.lazarus@gmail.com)
-
-Copyright Ross Lazarus March 2007
-This Galaxy wrapper is released licensed under the LGPL_ but is about as useful as a chocolate teapot without Plink which is GPL.
-
-I'm no lawyer, but it looks like you got GPL if you use this software. Good luck.
-
-.. _Plinksrc: http://pngu.mgh.harvard.edu/~purcell/plink/
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-
-.. _DOC: http://pngu.mgh.harvard.edu/~purcell/plink/anal.shtml#cc
-
-</help>
-</tool>
--- a/tools/rgenetics/rgClean.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-"""
-# galaxy tool xml files can define a galaxy supplied output filename
-# that must be passed to the tool and used to return output
-# here, the plink log file is copied to that file and removed
-# took a while to figure this out!
-# use exec_before_job to give files sensible names
-#
-# ross april 14 2007
-# plink cleanup script
-# ross lazarus March 2007 for camp illumina whole genome data
-# note problems with multiple commands being ignored - eg --freq --missing --mendel
-# only the first seems to get done...
-#
-##Summary statistics versus inclusion criteria
-##
-##Feature                         As summary statistic    As inclusion criteria
-##Missingness per individual      --missing               --mind N
-##Missingness per marker          --missing               --geno N
-##Allele frequency                --freq                  --maf N
-##Hardy-Weinberg equilibrium      --hardy                 --hwe N
-##Mendel error rates              --mendel                --me N M
-#
-# call as plinkClean.py $i $o $mind $geno $hwe $maf $mef $mei $outfile
-# note plinkClean_code.py does some renaming before the job starts
-
-
-    <command interpreter="python2.4">
-        rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind' '$geno' '$hwe' '$maf'
-        '$mef' '$mei' '$out_file1' '$out_file1.files_path' '$userId'
-
-
-"""
-import sys,shutil,os,subprocess, glob, string, tempfile, time
-from rgutils import galhtmlprefix, timenow, plinke
-prog = os.path.split(sys.argv[0])[-1]
-myversion = 'January 4 2010'
-verbose=False
-
-
-def fixoutaff(outpath='',newaff='1'):
-    """ quick way to create test data sets - set all aff to 1 or 2 for
-    some hapmap data and then merge
-    [rerla@beast galaxy]$ head tool-data/rg/library/pbed/affyHM_CEU.fam
-    1341 14 0 0 2 1
-    1341 2 13 14 2 1
-    1341 13 0 0 1 1
-    1340 9 0 0 1 1
-    1340 10 0 0 2 1
-    """
-    nchanged = 0
-    fam = '%s.fam' % outpath
-    famf = open(fam,'r')
-    fl = famf.readlines()
-    famf.close()
-    for i,row in enumerate(fl):
-        lrow = row.split()
-        if lrow[-1] <> newaff:
-            lrow[-1] = newaff
-            fl[i] = ' '.join(lrow)
-            fl[i] += '\n'
-            nchanged += 1
-    fo = open(fam,'w')
-    fo.write(''.join(fl))
-    fo.close()
-    return nchanged
-
-
-
-def clean():
-    """
-    """
-    if len(sys.argv) < 16:
-        print >> sys.stdout, '## %s expected 12 params in sys.argv, got %d - %s' % (prog,len(sys.argv),sys.argv)
-        print >> sys.stdout, """this script will filter a linkage format ped
-        and map file containing genotypes. It takes 14 parameters - the plink --f parameter and"
-        a new filename root for the output clean data followed by the mind,geno,hwe,maf, mef and mei"
-        documented in the plink docs plus the file to be returned to Galaxy
-        called as:
-        <command interpreter="python">
-        rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
-        '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' '$out_file1.files_path'
-        '$relfilter' '$afffilter' '$sexfilter' '$fixaff'
-        </command>
-
-        """
-        sys.exit(1)
-    plog = []
-    inpath = sys.argv[1]
-    inbase = sys.argv[2]
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    title = sys.argv[3].translate(trantab)
-    mind = sys.argv[4]
-    geno = sys.argv[5]
-    hwe = sys.argv[6]
-    maf = sys.argv[7]
-    me1 = sys.argv[8]
-    me2 = sys.argv[9]
-    outfname = sys.argv[10]
-    outfpath = sys.argv[11]
-    relf = sys.argv[12]
-    afff = sys.argv[13]
-    sexf = sys.argv[14]
-    fixaff = sys.argv[15]
-    output = os.path.join(outfpath,outfname)
-    outpath = os.path.join(outfpath,title)
-    outprunepath = os.path.join(outfpath,'ldprune_%s' % title)
-    try:
-      os.makedirs(outfpath)
-    except:
-      pass
-    bfile = os.path.join(inpath,inbase)
-    outf = file(outfname,'w')
-    vcl = [plinke,'--noweb','--bfile',bfile,'--make-bed','--out',
-          outpath,'--set-hh-missing','--mind',mind,
-          '--geno',geno,'--maf',maf,'--hwe',hwe,'--me',me1,me2]
-    # yes - the --me parameter takes 2 values - mendels per snp and per family
-    if relf == 'oo': # plink filters are what they leave...
-        vcl.append('--filter-nonfounders') # leave only offspring
-    elif relf == 'fo':
-        vcl.append('--filter-founders')
-    if afff == 'affonly':
-        vcl.append('--filter-controls')
-    elif relf == 'unaffonly':
-        vcl.append('--filter-cases')
-    if sexf == 'fsex':
-        vcl.append('--filter-females')
-    elif relf == 'msex':
-        vcl.append('--filter-males')
-    p=subprocess.Popen(' '.join(vcl),shell=True,cwd=outfpath)
-    retval = p.wait()
-    plog.append('%s started, called as %s' % (prog,' '.join(sys.argv)))
-    outf.write(galhtmlprefix % prog)
-    outf.write('<ul>\n')
-    plogf = '%s.log' % os.path.join(outfpath,title)
-    try:
-        plogl = file(plogf,'r').readlines()
-        plog += [x.strip() for x in plogl]
-    except:
-        plog += ['###Cannot open plink log file %s' % plogf,]
-    # if fixaff, want to 'fix' the fam file
-    if fixaff <> '0':
-        nchanged = fixoutaff(outpath=outpath,newaff=fixaff)
-        plog += ['## fixaff was requested  %d subjects affection status changed to %s' % (nchanged,fixaff)]
-    pf = file(plogf,'w')
-    pf.write('\n'.join(plog))
-    pf.close()
-    globme = os.path.join(outfpath,'*')
-    flist = glob.glob(globme)
-    flist.sort()
-    for i, data in enumerate( flist ):
-        outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
-    outf.write('</ul>\n')
-    outf.write("</ul></br></div></body></html>")
-    outf.close()
-
-
-if __name__ == "__main__":
-    clean()
-
--- a/tools/rgenetics/rgClean.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-<tool id="rgClean1" name="Clean genotypes:">
-    <description>filter markers, subjects</description>
-
-    <command interpreter="python">
-        rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
-        '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' '$out_file1.files_path'
-        '$relfilter' '$afffilter' '$sexfilter' '$fixaff'
-    </command>
-
-    <inputs>
-       <param name="input_file"  type="data" label="RGenetics genotype library file in compressed Plink format"
-         size="120" format="pbed" />
-       <param name="title" type="text" size="80" label="Descriptive title for cleaned genotype file" value="Cleaned_data"/>
-       <param name="geno"  type="text" label="Maximum Missing Fraction: Markers" value="0.05" />
-       <param name="mind" type="text" value="0.1" label="Maximum Missing Fraction: Subjects"/>
-       <param name="mef"  type="text" label="Maximum Mendel Error Rate: Family" value="0.05"/>
-       <param name="mei"  type="text" label="Maximum Mendel Error Rate: Marker" value="0.05"/>
-       <param name="hwe" type="text" value="0" label="Smallest HWE p value (set to 0 for all)" />
-       <param name="maf" type="text" value="0.01"
-       label="Smallest Minor Allele Frequency (set to 0 for all)"/>
-       <param name='relfilter' label = "Filter on pedigree relatedness" type="select"
-   	     optional="false" size="132"
-         help="Optionally remove related subjects if pedigree identifies founders and their offspring">
-         <option value="all" selected='true'>No filter on relatedness</option>
-         <option value="fo" >Keep Founders only (pedigree m/f ID = "0")</option>
-         <option value="oo" >Keep Offspring only (one randomly chosen if >1 sibs in family)</option>
-   		</param>
-       <param name='afffilter' label = "Filter on affection status" type="select"
-   	     optional="false" size="132"
-         help="Optionally remove affected or non affected subjects">
-         <option value="allaff" selected='true'>No filter on affection status</option>
-         <option value="affonly" >Keep Controls only (affection='1')</option>
-         <option value="unaffonly" >Keep Cases only (affection='2')</option>
-   		</param>
-       <param name='sexfilter' label = "Filter on gender" type="select"
-   	     optional="false" size="132"
-         help="Optionally remove all male or all female subjects">
-         <option value="allsex" selected='true'>No filter on gender status</option>
-         <option value="msex" >Keep Males only (pedigree gender='1')</option>
-         <option value="fsex" >Keep Females only (pedigree gender='2')</option>
-   		</param>
-       <param name="fixaff" type="text" value="0"
-          label = "Change ALL subjects affection status to (0=no change,1=unaff,2=aff)"
-          help="Use this option to switch the affection status to a new value for all output subjects" />
-   </inputs>
-
-   <outputs>
-       <data format="pbed" name="out_file1" metadata_source="input_file" label="${title}_rgClean.pbed"  />
-   </outputs>
-
-<tests>
- <test>
-    <param name='input_file' value='tinywga' ftype='pbed' >
-    <metadata name='base_name' value='tinywga' />
-    <composite_data value='tinywga.bim' />
-    <composite_data value='tinywga.bed' />
-    <composite_data value='tinywga.fam' />
-    <edit_attributes type='name' value='tinywga' />
-    </param>
-    <param name='title' value='rgCleantest1' />
-    <param name="geno" value="1" />
-    <param name="mind" value="1" />
-    <param name="mef" value="0" />
-    <param name="mei" value="0" />
-    <param name="hwe" value="0" />
-    <param name="maf" value="0" />
-    <param name="relfilter" value="all" />
-    <param name="afffilter" value="allaff" />
-    <param name="sexfilter" value="allsex" />
-    <param name="fixaff" value="0" />
-    <output name='out_file1' file='rgtestouts/rgClean/rgCleantest1.pbed' compare="diff" lines_diff="25" >
-    <extra_files type="file" name='rgCleantest1.bim' value="rgtestouts/rgClean/rgCleantest1.bim" compare="diff" />
-    <extra_files type="file" name='rgCleantest1.fam' value="rgtestouts/rgClean/rgCleantest1.fam" compare="diff" />
-    <extra_files type="file" name='rgCleantest1.bed' value="rgtestouts/rgClean/rgCleantest1.bed" compare="diff" />
-    </output>
- </test>
-</tests>
-<help>
-
-.. class:: infomark
-
-**Syntax**
-
-- **Genotype data** is the input genotype file chosen from your current history
-- **Descriptive title** is the name to use for the filtered output file
-- **Missfrac threshold: subjects** is the threshold for missingness by subject. Subjects with more than this fraction missing will be excluded from the import
-- **Missfrac threshold: markers** is the threshold for missingness by marker. Markers with more than this fraction missing will be excluded from the import
-- **MaxMendel Individuals** Mendel error fraction above which to exclude subjects with more than the specified fraction of mendelian errors in transmission (for family data only)
-- **MaxMendel Families** Mendel error fraction above which to exclude families with more than the specified fraction of mendelian errors in transmission (for family data only)
-- **HWE** is the threshold for HWE test p values below which the marker will not be imported. Set this to -1 and all markers will be imported regardless of HWE p value
-- **MAF** is the threshold for minor allele frequency - SNPs with lower MAF will be excluded
-- **Filters** for founders/offspring or affected/unaffected or males/females are optionally available if needed
-- **Change Affection** is only needed if you want to change the affection status for creating new analysis datasets
-
------
-
-**Attribution**
-
-This tool relies on the work of many people. It uses Plink http://pngu.mgh.harvard.edu/~purcell/plink/,
-and the R http://cran.r-project.org/ and
-Bioconductor http://www.bioconductor.org/ projects.
-respectively.
-
-In particular, http://pngu.mgh.harvard.edu/~purcell/plink/
-has excellent documentation describing the parameters you can set here.
-
-This implementation is a Galaxy tool wrapper around these third party applications.
-It was originally designed and written for family based data from the CAMP Illumina run of 2007 by
-ross lazarus (ross.lazarus@gmail.com) and incorporated into the rgenetics toolkit.
-
-Rgenetics merely exposes them, wrapping Plink so you can use it in Galaxy.
-
------
-
-**Summary**
-
-Reliable statistical inference depends on reliable data. Poor quality samples and markers
-may add more noise than signal, decreasing statistical power. Removing the worst of them
-can be done by setting thresholds for some of the commonly used technical quality measures
-for genotype data. Of course discordant replicate calls are also very informative but are not
-in scope here.
-
-Marker cleaning: Filters are available to remove markers below a specific minor allele
-frequency, beyond a Hardy Wienberg threshold, below a minor allele frequency threshold,
-or above a threshold for missingness. If family data are available, thresholds for Mendelian
-error can be set.
-
-Subject cleaning: Filters are available to remove subjects with many missing calls. Subjects and markers for family data can be filtered by proportions
-of Mendelian errors in observed transmission. Use the QC reporting tool to
-generate a comprehensive series of reports for quality control.
-
-Note that ancestry and cryptic relatedness should also be checked using the relevant tools.
-
------
-
-.. class:: infomark
-
-**Tip**
-
-You can check that you got what you asked for by running the QC tool to ensure that the distributions
-are truncated the way you expect. Note that you do not expect that the thresholds will be exactly
-what you set - some bad assays and subjects are out in multiple QC measures, so you sometimes have
-more samples or markers than you exactly set for each threshold. Finally, the ordering of
-operations matters and Plink is somewhat restrictive about what it will do on each pass
-of the data. At least it's fixed.
-
------
-
-This Galaxy tool was written by Ross Lazarus for the Rgenetics project
-It uses Plink for most calculations - for full Plink attribution, source code and documentation,
-please see http://pngu.mgh.harvard.edu/~purcell/plink/ plus some custom python code
-
-</help>
-</tool>
--- a/tools/rgenetics/rgClustalw.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-"""
-rgclustalw.py
-wrapper for clustalw necessitated by bad choice of output path for .dnd file based on input file. Naughty.
-Copyright ross lazarus march 2011
-All rights reserved
-Licensed under the LGPL
-"""
-
-import sys,optparse,os,subprocess,tempfile,shutil
-
-class Clustrunner:
-    """
-    """
-    def __init__(self,opts=None):
-        self.opts = opts
-        self.iname = 'infile_copy'
-        shutil.copy(self.opts.input,self.iname)
-
-    def run(self):
-        tlf = open(self.opts.outlog,'w')
-        cl = ['clustalw2 -INFILE=%s -OUTFILE=%s -OUTORDER=%s -TYPE=%s -OUTPUT=%s' % (self.iname,self.opts.output,self.opts.out_order,self.opts.dnarna,self.opts.outform)]
-        if self.opts.seq_range_end <> None and self.opts.seq_range_start <> None:
-            cl.append('-RANGE=%s,%s' % (self.opts.seq_range_start,self.opts.seq_range_end))
-        if self.opts.outform=='CLUSTAL' and self.opts.outseqnos <> None:
-            cl.append('-SEQNOS=ON')
-        process = subprocess.Popen(' '.join(cl), shell=True, stderr=tlf, stdout=tlf)
-        rval = process.wait()
-        dndf = '%s.dnd' % self.iname
-        if os.path.exists(dndf):
-            tlf.write('\nClustal created the following dnd file for your information:\n')
-            dnds = open('%s.dnd' % self.iname,'r').readlines()
-	    for row in dnds:
-                tlf.write(row)
-            tlf.write('\n')
-        tlf.close()
-        os.unlink(self.iname)
-
-
-
-if __name__ == "__main__":
-    op = optparse.OptionParser()
-    op.add_option('-i', '--input', default=None)
-    op.add_option('-o', '--output', default=None)
-    op.add_option('-t', '--outname', default="rgClustal")
-    op.add_option('-s', '--out_order', default='ALIGNMENT')
-    op.add_option('-f', '--outform', default='CLUSTAL')
-    op.add_option('-e', '--seq_range_end',default=None)
-    op.add_option('-b', '--seq_range_start',default=None)
-    op.add_option('-l','--outlog',default='rgClustalw.log')
-    op.add_option('-q', '--outseqnos',default=None)
-    op.add_option('-d', '--dnarna',default='DNA')
-
-    opts, args = op.parse_args()
-    assert opts.input <> None
-    assert os.path.isfile(opts.input)
-    c = Clustrunner(opts)
-    c.run()
-
-
-
--- a/tools/rgenetics/rgClustalw.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-<tool id="clustalw" name="ClustalW" version="0.1">
-   <description>multiple sequence alignment program for DNA or proteins</description>
-   <command interpreter="python">
-    rgClustalw.py -i "$input" -o "$output" -s "$out_order" -l "$outlog" -t "$outname" -d "$dnarna"
-    #if   ($range.mode=="part")
--b "$range.seq_range_start" -e "$range.seq_range_end"
-    #end if
-    #if ($outcontrol.outform=="clustal")
--f "CLUSTAL"
-    #if ($outcontrol.out_seqnos=="ON")
--q "ON"
-    #end if
-    #end if
-    #if ($outcontrol.outform=="phylip")
--f "PHYLIP"
-    #end if
-    #if ($outcontrol.outform=="fasta")
--f "FASTA"
-    #end if
-   </command>
-  <inputs>
-   <page>
-    <param format="fasta" name="input" type="data" label="Fasta File" />
-    <param name="outname" label="Name for output files to make it easy to remember what you did" type="text" size="50" value="Clustal_run" />
-    <param name="dnarna" type="select" label="Data Type">
-      <option value="DNA" selected="True">DNA nucleotide sequences</option>
-      <option value="PROTEIN">Protein sequences</option>
-    </param>
-    <conditional name="outcontrol">
-      <param name="outform" type="select" label="Output alignment format">
-        <option value="clustal" selected="True">Native Clustal output format</option>
-        <option value="phylip">Phylip format</option>
-        <option value="fasta">Fasta format</option>
-      </param>
-      <when value="fasta" />
-      <when value="phylip" />
-      <when value="clustal">
-       <param name="out_seqnos" type="select" label="Show residue numbers in clustal format output">
-         <option value="ON">yes</option>
-         <option value="OFF" selected="true">no</option>
-       </param>
-      </when>
-    </conditional>
-    <param name="out_order" type="select" label="Output Order">
-      <option value="ALIGNED">aligned</option>
-      <option value="INPUT">same order as input file</option>
-    </param>
-
-    <conditional name="range">
-        <param name="mode" type="select" label="Output complete alignment (or specify part to output)">
-          <option value="complete">complete alignment</option>
-          <option value="part">only part of the alignment</option>
-        </param>
-        <when value="complete">
-        </when>
-        <when value="part">
-           <param name="seq_range_start" size="5" type="integer" value="1" label="start point" help="sequence range to write">
-           </param>
-           <param name="seq_range_end" size="5" type="integer" value="99999" label="end point" >
-           </param>
-        </when>
-    </conditional>
-   </page>
-  </inputs>
-  <outputs>
-    <data format="clustal" name="output"  label="${outname}_output.${outcontrol.outform}">
-       <change_format>
-           <when input="outcontrol.outform" value="phylip" format="phylip" />
-           <when input="outcontrol.outform" value="fasta" format="fasta" />
-       </change_format>
-    </data>
-    <data format="txt" name="outlog"  label="${outname}_clustal_log.txt"/>
-  </outputs>
-  <tests>
-     <test>
-        <param name="input" value="rgClustal_testin.fasta" />
-      <param name = "outname" value="" />
-      <param name = "outform" value="fasta" />
-      <param name = "dnarna" value="DNA" />
-      <param name = "mode" value="complete" />
-      <param name = "out_order" value="ALIGNED" />
-      <output name="output" file="rgClustal_testout.fasta" ftype="fasta" />
-      <output name="outlog" file="rgClustal_testout.log" ftype="txt" lines_diff="5" />
-     </test>
-  </tests>
-  <help>
-
-**Note**
-
-This tool allows you to run a multiple sequence alignment with ClustalW2 (see Clustsrc_) using the default options.
-
-For a tutorial introduction, see ClustalW2_
-
-You can align DNA or protein sequences in the input file which should be multiple sequences to be aligned in a fasta file
-
-A log will be output to your history showing the output Clustal would normally write to standard output.
-
-The alignments will appear as a clustal format file or optionally, as phylip or fasta format files in your history. If you choose fasta as
-the output format, you can create a 'Logo' image using the Sequence Logo tool.
-
-If Clustal format is chosen, you have the option of adding basepair counts to the output
-
-A subsequence of the alignment can be output by setting the Output complete parameter to "Partial" and defining the offset and end of the subsequence to be output
-
-----
-
-**Attribution**
-
-Clustal attribution and associated documentation are available at Clustsrc_
-
-The first iteration of this Galaxy wrapper was written by Hans-Rudolf Hotz - see Clustfirst_
-
-It was modified by Ross Lazarus for the rgenetics project - tests and some additional parameters were added
-
-This wrapper is released licensed under the LGPL_
-
-.. _ClustalW2: http://www.ebi.ac.uk/2can/tutorials/protein/clustalw.html
-
-.. _Clustsrc: http://www.clustal.org
-
-.. _Clustfirst: http://lists.bx.psu.edu/pipermail/galaxy-dev/2010-November/003732.html
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-
-  </help>
-
-</tool>
-
--- a/tools/rgenetics/rgEigPCA.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,342 +0,0 @@
-"""
-run smartpca
-
-This uses galaxy code developed by Dan to deal with
-arbitrary output files using an html dataset with it's own
-subdirectory containing the arbitrary files
-We create that html file and add all the links we need
-
-Note that we execute the smartpca.perl program in the output subdirectory
-to avoid having to clear out the job directory after running
-
-Code to convert linkage format ped files into eigenstratgeno format is left here
-in case we decide to autoconvert
-
-Added a plot in R with better labels than the default eigensoft plot december 26 2007
-
-DOCUMENTATION OF smartpca program:
-
-smartpca runs Principal Components Analysis on input genotype data and
-  outputs principal components (eigenvectors) and eigenvalues.
-  The method assumes that samples are unrelated.  (However, a small number
-  of cryptically related individuals is usually not a problem in practice
-  as they will typically be discarded as outliers.)
-
-5 different input formats are supported.  See ../CONVERTF/README
-for documentation on using the convertf program to convert between formats.
-
-The syntax of smartpca is "../bin/smartpca -p parfile".  We illustrate
-how parfile works via a toy example (see example.perl in this directory).
-This example takes input in EIGENSTRAT format.  The syntax of how to take input
-in other formats is analogous to the convertf program, see ../CONVERTF/README.
-
-The smartpca program prints various statistics to standard output.
-To redirect this information to a file, change the above syntax to
-"../bin/smartpca -p parfile >logfile".  For a description of these
-statistics, see the documentation file smartpca.info in this directory.
-
-Estimated running time of the smartpca program is
-  2.5e-12 * nSNP * NSAMPLES^2 hours            if not removing outliers.
-  2.5e-12 * nSNP * NSAMPLES^2 hours * (1+m)    if m outlier removal iterations.
-Thus, under the default of up to 5 outlier removal iterations, running time is
-  up to 1.5e-11 * nSNP * NSAMPLES^2 hours.
-
-------------------------------------------------------------------------
-
-DESCRIPTION OF EACH PARAMETER in parfile for smartpca:
-
-genotypename: input genotype file (in any format: see ../CONVERTF/README)
-snpname:      input snp file      (in any format: see ../CONVERTF/README)
-indivname:    input indiv file    (in any format: see ../CONVERTF/README)
-evecoutname:  output file of eigenvectors.  See numoutevec parameter below.
-evaloutname:  output file of all eigenvalues
-
-OPTIONAL PARAMETERS:
-
-numoutevec:     number of eigenvectors to output.  Default is 10.
-numoutlieriter: maximum number of outlier removal iterations.
-  Default is 5.  To turn off outlier removal, set this parameter to 0.
-numoutlierevec: number of principal components along which to
-  remove outliers during each outlier removal iteration.  Default is 10.
-outliersigmathresh: number of standard deviations which an individual must
-  exceed, along one of the top (numoutlierevec) principal components, in
-  order for that individual to be removed as an outlier.  Default is 6.0.
-outlieroutname: output logfile of outlier individuals removed. If not specified,
-  smartpca will print this information to stdout, which is the default.
-usenorm: Whether to normalize each SNP by a quantity related to allele freq.
-  Default is YES.  (When analyzing microsatellite data, should be set to NO.
-  See Patterson et al. 2006.)
-altnormstyle: Affects very subtle details in normalization formula.
-  Default is YES (normalization formulas of Patterson et al. 2006)
-  To match EIGENSTRAT (normalization formulas of Price et al. 2006), set to NO.
-missingmode: If set to YES, then instead of doing PCA on # reference alleles,
-  do PCA on whether each data point is missing or nonmissing.  Default is NO.
-  May be useful for detecting informative missingness (Clayton et al. 2005).
-nsnpldregress: If set to a positive integer, then LD correction is turned on,
-  and input to PCA will be the residual of a regression involving that many
-  previous SNPs, according to physical location.  See Patterson et al. 2006.
-  Default is 0 (no LD correction).  If desiring LD correction, we recommend 2.
-maxdistldregress: If doing LD correction, this is the maximum genetic distance
-  (in Morgans) for previous SNPs used in LD correction.  Default is no maximum.
-poplistname:   If wishing to infer eigenvectors using only individuals from a
-  subset of populations, and then project individuals from all populations
-  onto those eigenvectors, this input file contains a list of population names,
-  one population name per line, which will be used to infer eigenvectors.
-  It is assumed that the population of each individual is specified in the
-  indiv file.  Default is to use individuals from all populations.
-phylipoutname: output file containing an fst matrix which can be used as input
-  to programs in the PHYLIP package, such as the "fitch" program for
-  constructing phylogenetic trees.
-noxdata:    if set to YES, all SNPs on X chr are excluded from the data set.
-  The smartpca default for this parameter is YES, since different variances
-  for males vs. females on X chr may confound PCA analysis.
-nomalexhet: if set to YES, any het genotypes on X chr for males are changed
-  to missing data.  The smartpca default for this parameter is YES.
-badsnpname: specifies a list of SNPs which should be excluded from the data set.
-  Same format as example.snp.  Cannot be used if input is in
-  PACKEDPED or PACKEDANCESTRYMAP format.
-popsizelimit: If set to a positive integer, the result is that only the first
-  popsizelimit individuals from each population will be included in the
-  analysis. It is assumed that the population of each individual is specified
-  in the indiv file.  Default is to use all individuals in the analysis.
-
-The next 5 optional parameters allow the user to output genotype, snp and
-  indiv files which will be identical to the input files except that:
-    Any individuals set to Ignore in the input indiv file will be
-      removed from the data set (see ../CONVERTF/README)
-    Any data excluded or set to missing based on noxdata, nomalexhet and
-      badsnpname parameters (see above) will be removed from the data set.
-    The user may decide to output these files in any format.
-outputformat:    ANCESTRYMAP,  EIGENSTRAT, PED, PACKEDPED or PACKEDANCESTRYMAP
-genotypeoutname: output genotype file
-snpoutname:      output snp file
-indivoutname:    output indiv file
-outputgroup: see documentation in ../CONVERTF/README
-"""
-import sys,os,time,subprocess,string,glob
-from rgutils import RRun, galhtmlprefix, galhtmlpostfix, timenow, smartpca, rexe, plinke
-verbose = False
-
-def makePlot(eigpca='test.pca',title='test',pdfname='test.pdf',h=8,w=10,nfp=None,rexe=''):
-    """
-    the eigenvec file has a # row with the eigenvectors, then subject ids, eigenvecs and lastly
-    the subject class
-    Rpy not being used here. Write a real R script and run it. Sadly, this means putting numbers
-    somewhere - like in the code as monster R vector constructor c(99.3,2.14) strings
-    At least you have the data and the analysis in one single place. Highly reproducible little
-    piece of research.
-    """
-    debug=False
-    f = file(eigpca,'r')
-    R = []
-    if debug:
-      R.append('sessionInfo()')
-      R.append("print('dir()=:')")
-      R.append('dir()')
-      R.append("print('pdfname=%s')" % pdfname)
-    gvec = []
-    pca1 = []
-    pca2 = []
-    groups = {}
-    glist = [] # list for legend
-    ngroup = 1 # increment for each new group encountered for pch vector
-    for n,row in enumerate(f):
-        if n > 1:
-            rowlist = row.strip().split()
-            group = rowlist[-1]
-            v1 = rowlist[1]
-            v2 = rowlist[2]
-            try:
-                v1 = float(v1)
-            except:
-                v1 = 0.0
-            try:
-                v2 = float(v2)
-            except:
-                v2 = 0.0
-            if not groups.get(group,None):
-                groups[group] = ngroup
-                glist.append(group)
-                ngroup += 1 # for next group
-            gvec.append(groups[group]) # lookup group number
-            pca1.append('%f' % v1)
-            pca2.append('%f' % v2)
-    # now have vectors of group,pca1 and pca2
-    llist = [x.encode('ascii') for x in glist] # remove label unicode - eesh
-    llist = ['"%s"' % x for x in llist] # need to quote for R
-    R.append('llist=c(%s)' % ','.join(llist))
-
-    plist = range(2,len(llist)+2) # pch - avoid black circles
-    R.append('glist=c(%s)' % ','.join(['%d' % x for x in plist]))
-    pgvec = ['%d' % (plist[i-1]) for i in gvec] # plot symbol/colour for each point
-    R.append("par(lab=c(10,10,10))") # so our grid is denser than the default 5
-    R.append("par(mai=c(1,1,1,0.5))")
-    maint = title
-    R.append('pdf("%s",h=%d,w=%d)' % (pdfname,h,w))
-    R.append("par(lab=c(10,10,10))")
-    R.append('pca1 = c(%s)' % ','.join(pca1))
-    R.append('pca2 = c(%s)' % ','.join(pca2))
-    R.append('pgvec = c(%s)' % ','.join(pgvec))
-    s = "plot(pca1,pca2,type='p',main='%s', ylab='Second ancestry eigenvector'," % maint
-    s += "xlab='First ancestry eigenvector',col=pgvec,cex=0.8,pch=pgvec)"
-    R.append(s)
-    R.append('legend("top",legend=llist,pch=glist,col=glist,title="Sample")')
-    R.append('grid(nx = 10, ny = 10, col = "lightgray", lty = "dotted")')
-    R.append('dev.off()')
-    R.append('png("%s.png",h=%d,w=%d,units="in",res=72)' % (pdfname,h,w))
-    s = "plot(pca1,pca2,type='p',main='%s', ylab='Second ancestry eigenvector'," % maint
-    s += "xlab='First ancestry eigenvector',col=pgvec,cex=0.8,pch=pgvec)"
-    R.append(s)
-    R.append('legend("top",legend=llist,pch=glist,col=glist,title="Sample")')
-    R.append('grid(nx = 10, ny = 10, col = "lightgray", lty = "dotted")')
-    R.append('dev.off()')
-    rlog,flist = RRun(rcmd=R,title=title,outdir=nfp)
-    print >> sys.stdout, '\n'.join(R)
-    print >> sys.stdout, rlog
-
-
-def getfSize(fpath,outpath):
-    """
-    format a nice file size string
-    """
-    size = ''
-    fp = os.path.join(outpath,fpath)
-    if os.path.isfile(fp):
-        n = float(os.path.getsize(fp))
-        if n > 2**20:
-            size = ' (%1.1f MB)' % (n/2**20)
-        elif n > 2**10:
-            size = ' (%1.1f KB)' % (n/2**10)
-        elif n > 0:
-            size = ' (%d B)' % (int(n))
-    return size
-
-
-def runEigen():
-    """ run the smartpca prog - documentation follows
-
-    smartpca.perl -i fakeped_100.eigenstratgeno -a fakeped_100.map -b fakeped_100.ind -p fakeped_100 -e fakeped_100.eigenvals -l
-        fakeped_100.eigenlog -o fakeped_100.eigenout
-
-DOCUMENTATION OF smartpca.perl program:
-
-This program calls the smartpca program (see ../POPGEN/README).
-For this to work, the bin directory containing smartpca MUST be in your path.
-See ./example.perl for a toy example.
-
-../bin/smartpca.perl
--i example.geno  : genotype file in EIGENSTRAT format (see ../CONVERTF/README)
--a example.snp   : snp file   (see ../CONVERTF/README)
--b example.ind   : indiv file (see ../CONVERTF/README)
--k k             : (Default is 10) number of principal components to output
--o example.pca   : output file of principal components.  Individuals removed
-                   as outliers will have all values set to 0.0 in this file.
--p example.plot  : prefix of output plot files of top 2 principal components.
-                   (labeling individuals according to labels in indiv file)
--e example.eval  : output file of all eigenvalues
--l example.log   : output logfile
--m maxiter       : (Default is 5) maximum number of outlier removal iterations.
-                   To turn off outlier removal, set -m 0.
--t topk          : (Default is 10) number of principal components along which
-                   to remove outliers during each outlier removal iteration.
--s sigma         : (Default is 6.0) number of standard deviations which an
-                   individual must exceed, along one of topk top principal
-                   components, in order to be removed as an outlier.
-
-    now uses https://www.bx.psu.edu/cgi-bin/trac.cgi/galaxy/changeset/1832
-
-All files can be viewed however, by making links in the primary (HTML) history item like:
-<img src="display_child?parent_id=2&designation=SomeImage?" alt="Some Image"/>
-<a href="display_child?parent_id=2&designation=SomeText?">Some Text</a>
-
-    <command interpreter="python">
-    rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1"
-    "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca"
-    </command>
-
-    """
-    if len(sys.argv) < 9:
-        print 'Need an input genotype file root, a title, a temp id and the temp file path for outputs,'
-        print ' and the 4 integer tuning parameters k,m,t and s in order. Given that, will run smartpca for eigensoft'
-        sys.exit(1)
-    else:
-        print >> sys.stdout, 'rgEigPCA.py got %s' % (' '.join(sys.argv))
-    skillme = ' %s' % string.punctuation
-    trantab = string.maketrans(skillme,'_'*len(skillme))
-    ofname = sys.argv[5]
-    progname = os.path.basename(sys.argv[0])
-    infile = sys.argv[1]
-    infpath,base_name = os.path.split(infile) # now takes precomputed or autoconverted ldreduced dataset
-    title = sys.argv[2].translate(trantab) # must replace all of these for urls containing title
-    outfile1 = sys.argv[3]
-    newfilepath = sys.argv[4]
-    try:
-       os.mkdirs(newfilepath)
-    except:
-       pass
-    op = os.path.split(outfile1)[0]
-    try: # for test - needs this done
-        os.makedirs(op)
-    except:
-        pass
-    eigen_k = sys.argv[5]
-    eigen_m = sys.argv[6]
-    eigen_t = sys.argv[7]
-    eigen_s = sys.argv[8]
-    eigpca = sys.argv[9] # path to new dataset for pca results - for later adjustment
-    eigentitle = os.path.join(newfilepath,title)
-    explanations=['Samples plotted in first 2 eigenvector space','Principle components','Eigenvalues',
-    'Smartpca log (contents shown below)']
-    rplotname = 'PCAPlot.pdf'
-    eigenexts = [rplotname, "pca.xls", "eval.xls"]
-    newfiles = ['%s_%s' % (title,x) for x in eigenexts] # produced by eigenstrat
-    rplotout = os.path.join(newfilepath,newfiles[0]) # for R plots
-    eigenouts = [x for x in newfiles]
-    eigenlogf = '%s_log.txt' % title
-    newfiles.append(eigenlogf) # so it will also appear in the links
-    lfname = outfile1
-    lf = file(lfname,'w')
-    lf.write(galhtmlprefix % progname)
-    try:
-        os.makedirs(newfilepath)
-    except:
-        pass
-    smartCL = '%s -i %s.bed -a %s.bim -b %s.fam -o %s -p %s -e %s -l %s -k %s -m %s -t %s -s %s' % \
-          (smartpca,infile, infile, infile, eigenouts[1],'%s_eigensoftplot.pdf' % title,eigenouts[2],eigenlogf, \
-           eigen_k, eigen_m, eigen_t, eigen_s)
-    env = os.environ
-    p=subprocess.Popen(smartCL,shell=True,cwd=newfilepath)
-    retval = p.wait()
-    # copy the eigenvector output file needed for adjustment to the user's eigenstrat library directory
-    elog = file(os.path.join(newfilepath,eigenlogf),'r').read()
-    eeigen = os.path.join(newfilepath,'%s.evec' % eigenouts[1]) # need these for adjusting
-    try:
-        eigpcaRes = file(eeigen,'r').read()
-    except:
-        eigpcaRes = ''
-    file(eigpca,'w').write(eigpcaRes)
-    makePlot(eigpca=eigpca,pdfname=newfiles[0],title=title,nfp=newfilepath,rexe=rexe)
-    s = 'Output from %s run at %s<br/>\n' % (progname,timenow())
-    lf.write('<h4>%s</h4>\n' % s)
-    lf.write('newfilepath=%s, rexe=%s' % (newfilepath,rexe))
-    lf.write('(click on the image below to see a much higher quality PDF version)')
-    thumbnail = '%s.png' % newfiles[0] # foo.pdf.png - who cares?
-    if os.path.exists(os.path.join(newfilepath,thumbnail)):
-        lf.write('<table border="0" cellpadding="10" cellspacing="10"><tr><td>\n')
-        lf.write('<a href="%s"><img src="%s" alt="%s" hspace="10" align="left" /></a></td></tr></table><br/>\n' \
-            % (newfiles[0],thumbnail,explanations[0]))
-    allfiles = os.listdir(newfilepath)
-    allfiles.sort()
-    sizes = [getfSize(x,newfilepath) for x in allfiles]
-    lallfiles = ['<li><a href="%s">%s %s</a></li>\n' % (x,x,sizes[i]) for i,x in enumerate(allfiles)] # html list
-    lf.write('<div class="document">All Files:<ol>%s</ol></div>' % ''.join(lallfiles))
-    lf.write('<div class="document">Log %s contents follow below<p/>' % eigenlogf)
-    lf.write('<pre>%s</pre></div>' % elog) # the eigenlog
-    s = 'If you need to rerun this analysis, the command line used was\n%s\n<p/>' % (smartCL)
-    lf.write(s)
-    lf.write(galhtmlpostfix) # end galhtmlprefix div
-    lf.close()
-
-
-if __name__ == "__main__":
-   runEigen()
--- a/tools/rgenetics/rgEigPCA.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,167 +0,0 @@
-<tool id="rgEigPCA1" name="Eigensoft:">
-    <description>PCA Ancestry using SNP</description>
-
-    <command interpreter="python">
-    rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1"
-    "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca"
-    </command>
-
-    <inputs>
-
-       <param name="i"  type="data" label="Input genotype data file"
-          size="120" format="ldindep" />
-       <param name="title"  type="text" value="Ancestry PCA" label="Title for outputs from this run"
-          size="80"  />
-       <param name="k"  type="integer" value="4" label="Number of principal components to output"
-          size="3"  />
-       <param name="m"  type="integer" value="0" label="Max. outlier removal iterations"
-          help="To turn on outlier removal, set m=5 or so. Do this if you plan on adjusting any analyses"
-          size="3"  />
-       <param name="t"  type="integer" value="5" label="# principal components used for outlier removal"
-          size="3"  />
-       <param name="s"  type="integer" value="6" label="#SDs for outlier removal"
-          help = "Any individual with SD along one of k top principal components > s will be removed as an outlier."
-          size="3"  />
-
-   </inputs>
-
-   <outputs>
-       <data name="out_file1" format="html" label="${title}_rgEig.html"/>
-       <data name="pca" format="txt" label="${title}_rgEig.txt"/>
-   </outputs>
-
-<tests>
- <test>
-   <param name='i' value='tinywga' ftype='ldindep' >
-   <metadata name='base_name' value='tinywga' />
-   <composite_data value='tinywga.bim' />
-   <composite_data value='tinywga.bed' />
-   <composite_data value='tinywga.fam' />
-   <edit_attributes type='name' value='tinywga' />
-   </param>
-    <param name='title' value='rgEigPCAtest1' />
-    <param name="k" value="4" />
-    <param name="m" value="2" />
-    <param name="t" value="2" />
-    <param name="s" value="2" />
-    <output name='out_file1' file='rgtestouts/rgEigPCA/rgEigPCAtest1.html' ftype='html' compare='diff' lines_diff='195'>
-    <extra_files type="file" name='rgEigPCAtest1_PCAPlot.pdf' value="rgtestouts/rgEigPCA/rgEigPCAtest1_PCAPlot.pdf" compare="sim_size" delta="3000"/>
-    </output>
-    <output name='pca' file='rgtestouts/rgEigPCA/rgEigPCAtest1.txt' compare='diff'/>
- </test>
-</tests>
-
-<help>
-
-
-**Syntax**
-
-- **Genotype data** is an input genotype dataset in Plink lped (http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml) format. See below for notes
-- **Title** is used to name the output files so you can remember what the outputs are for
-- **Tuning parameters** are documented in the Eigensoft (http://genepath.med.harvard.edu/~reich/Software.htm) documentation - see below
-
-
------
-
-**Summary**
-
-Eigensoft requires ld-reduced genotype data.
-Galaxy has an automatic converter for genotype data in Plink linkage pedigree (lped) format.
-For details of this generic genotype format, please see the Plink documentation at
-http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
-
-Reading that documentation, you'll see that the linkage pedigree format is really two related files with the same
-file base name - a map and ped file - eg 'mygeno.ped' and 'mygeno.map'.
-The map file has the chromosome, offset, genetic offset and snp name corresponding to each
-genotype stored as separate alleles in the ped file. The ped file has family id, individual id, father id (or 0), mother id
-(or 0), gender (1=male, 2=female, 0=unknown) and affection (1=unaffected, 2=affected, 0=unknown),
-then two separate allele columns for each genotype.
-
-Once you have your data in the right format, you can upload those into your Galaxy history using the "upload" tool.
-
-To upload your lped data in the upload tool, choose 'lped' as the 'file format'. The tool form will change to
-allow you to navigate to and select each member of the pair of  ped and map files stored on your local computer
-(or available at a public URL for Galaxy to grab).
-Give the dataset a meaningful name (replace rgeneticsData with something more useful!) and click execute.
-
-When the upload is done, your new lped format dataset will appear in your history and then,
-when you choose the ancestry tool, that history dataset will be available as input.
-
-**Warning for the Impatient**
-
-When you execute the tool, it will look like it has not started running for a while as the automatic converter
-reduces the amount of LD - otherwise eigenstrat gives biased results.
-
-
-**Attribution**
-
-This tool runs and relies on the work of many others, including the
-maintainers of the Eigensoft program, and the R and
-Bioconductor projects. For full attribution, source code and documentation, please see
-http://genepath.med.harvard.edu/~reich/Software.htm, http://cran.r-project.org/
-and http://www.bioconductor.org/ respectively
-
-This implementation is a Galaxy tool wrapper around these third party applications.
-It was originally designed and written for family based data from the CAMP Illumina run of 2007 by
-ross lazarus (ross.lazarus@gmail.com) and incorporated into the rgenetics toolkit.
-
-copyright Ross Lazarus 2007
-Licensed under the terms of the LGPL as documented http://www.gnu.org/licenses/lgpl.html
-but is about as useful as a sponge boat without EIGENSOFT pca code.
-
-**README from eigensoft2 distribution at http://genepath.med.harvard.edu/~reich/Software.htm**
-
-[rerla@beast eigensoft2]$ cat README
-EIGENSOFT version 2.0, January 2008 (for Linux only)
-
-This is the same as our EIGENSOFT 2.0 BETA release with a few recent changes
-as described at http://genepath.med.harvard.edu/~reich/New_In_EIGENSOFT.htm.
-
-Features of EIGENSOFT version 2.0 include:
--- Keeping track of ref/var alleles in all file formats: see CONVERTF/README
--- Handling data sets up to 8 billion genotypes: see CONVERTF/README
--- Output SNP weightings of each principal component: see POPGEN/README
-
-The EIGENSOFT package implements methods from the following 2 papers:
-Patterson N. et al. 2006 PLoS Genetics in press (population structure)
-Price A.L. et al. 2006 NG 38:904-9 (EIGENSTRAT stratification correction)
-
-See POPGEN/README for documentation of population structure programs.
-
-See EIGENSTRAT/README for documentation of EIGENSTRAT programs.
-
-See CONVERTF/README for documentation of programs for converting file formats.
-
-
-Executables and source code:
-----------------------------
-All C executables are in the bin/ directory.
-
-We have placed source code for all C executables in the src/ directory,
-for users who wish to modify and recompile our programs.  For example, to
-recompile the eigenstrat program, type
-"cd src"
-"make eigenstrat"
-"mv eigenstrat ../bin"
-
-Note that some of our software will only compile if your system has the
-lapack package installed.  (This package is used to compute eigenvectors.)
-Some users may need to change "blas-3" to "blas" in the Makefile,
-depending on how blas and lapack are installed.
-
-If cc is not available on your system, try "cp Makefile.alt Makefile"
-and then recompile.
-
-If you have trouble compiling and running our code, try compiling and
-running the pcatoy program in the src directory:
-"cd src"
-"make pcatoy"
-"./pcatoy"
-If you are unable to run the pcatoy program successfully, please contact
-your system administrator for help, as this is a systems issue which is
-beyond our scope.  Your system administrator will be able to troubleshoot
-your systems issue using this trivial program.  [You can also try running
-the pcatoy program in the bin directory, which we have already compiled.]
-</help>
-</tool>
-
--- a/tools/rgenetics/rgFastQC.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-"""
-wrapper for fastqc
-
-called as
-  <command interpreter="python">
-    rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"
-  </command>
-
-
-
-Current release seems overly intolerant of sam/bam header strangeness
-Author notified...
-
-
-"""
-
-import os,sys,subprocess,optparse,shutil,tempfile
-from rgutils import getFileString
-
-class FastQC():
-    """wrapper
-    """
-
-
-    def __init__(self,opts=None):
-        assert opts <> None
-        self.opts = opts
-
-
-    def run_fastqc(self):
-        """
-        In batch mode fastqc behaves not very nicely - will write to a new folder in
-        the same place as the infile called [infilebasename]_fastqc
-    rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc
-    duplication_levels.png  fastqc_icon.png          per_base_n_content.png         per_sequence_gc_content.png       summary.txt
-    error.png               fastqc_report.html       per_base_quality.png           per_sequence_quality.png          tick.png
-    fastqc_data.txt         per_base_gc_content.png  per_base_sequence_content.png  sequence_length_distribution.png  warning.png
-
-        """
-        dummy,tlog = tempfile.mkstemp(prefix='rgFastQClog')
-        sout = open(tlog, 'w')
-        fastq = os.path.basename(self.opts.input)
-        cl = [self.opts.executable,'-o %s' % self.opts.outputdir]
-        if self.opts.informat in ['sam','bam']:
-            cl.append('-f %s' % self.opts.informat)
-        if self.opts.contaminants <> None :
-            cl.append('-c %s' % self.opts.contaminants)
-        cl.append(self.opts.input)
-        p = subprocess.Popen(' '.join(cl), shell=True, stderr=sout, stdout=sout, cwd=self.opts.outputdir)
-        return_value = p.wait()
-        sout.close()
-        runlog = open(tlog,'r').readlines()
-        os.unlink(tlog)
-        flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh
-        odpath = None
-        for f in flist:
-            d = os.path.join(self.opts.outputdir,f)
-            if os.path.isdir(d):
-                if d.endswith('_fastqc'):
-                    odpath = d
-        hpath = None
-        if odpath <> None:
-            try:
-                hpath = os.path.join(odpath,'fastqc_report.html')
-                rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the <body> tag
-            except:
-                pass
-        if hpath == None:
-            res =  ['## odpath=%s: No output found in %s. Output for the run was:<pre>\n' % (odpath,hpath),]
-            res += runlog
-            res += ['</pre>\n',
-                   'Please read the above for clues<br/>\n',
-                   'If you selected a sam/bam format file, it might not have headers or they may not start with @HD?<br/>\n',
-                   'It is also possible that the log shows that fastqc is not installed?<br/>\n',
-                   'If that is the case, please tell the relevant Galaxy administrator that it can be snarfed from<br/>\n',
-                   'http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/<br/>\n',]
-            return res
-        self.fix_fastqcimages(odpath)
-        flist = os.listdir(self.opts.outputdir) # these have now been fixed
-        excludefiles = ['tick.png','warning.png','fastqc_icon.png','error.png']
-        flist = [x for x in flist if not x in excludefiles]
-        for i in range(len(rep)): # need to fix links to Icons and Image subdirectories in lastest fastqc code - ugh
-            rep[i] = rep[i].replace('Icons/','')
-            rep[i] = rep[i].replace('Images/','')
-
-        html = self.fix_fastqc(rep,flist,runlog)
-        return html
-
-
-
-    def fix_fastqc(self,rep=[],flist=[],runlog=[]):
-        """ add some of our stuff to the html
-        """
-        bs = '</body></html>\n' # hope they don't change this
-        try:
-            bodyindex = rep.index(bs) # hope they don't change this
-        except:
-            bodyindex = len(rep) - 1
-        res = []
-        res.append('<table>\n')
-        flist.sort()
-        for i,f in enumerate(flist):
-             if not(os.path.isdir(f)):
-                 fn = os.path.split(f)[-1]
-                 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, self.opts.outputdir)))
-        res.append('</table><p/>\n')
-        res.append('<a href="http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/">FastQC documentation and full attribution is here</a><br/><hr/>\n')
-        res.append('FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://rgenetics.org for details and licensing\n')
-        fixed = rep[:bodyindex] + res + rep[bodyindex:]
-        return fixed # with our additions
-
-
-    def fix_fastqcimages(self,odpath):
-        """ Galaxy wants everything in the same files_dir
-        """
-        icpath = os.path.join(odpath,'Icons')
-        impath = os.path.join(odpath,'Images')
-        for adir in [icpath,impath,odpath]:
-            if os.path.exists(adir):
-                flist = os.listdir(adir) # get all files created
-                for f in flist:
-                   if not os.path.isdir(os.path.join(adir,f)):
-                       sauce = os.path.join(adir,f)
-                       dest = os.path.join(self.opts.outputdir,f)
-                       shutil.move(sauce,dest)
-                os.rmdir(adir)
-
-
-
-if __name__ == '__main__':
-    op = optparse.OptionParser()
-    op.add_option('-i', '--input', default=None)
-    op.add_option('-o', '--htmloutput', default=None)
-    op.add_option('-d', '--outputdir', default="/tmp/shortread")
-    op.add_option('-f', '--informat', default='fastq')
-    op.add_option('-n', '--namejob', default='rgFastQC')
-    op.add_option('-c', '--contaminants', default=None)
-    op.add_option('-e', '--executable', default='fastqc')
-    opts, args = op.parse_args()
-    assert opts.input <> None
-    assert os.path.isfile(opts.executable),'##rgFastQC.py error - cannot find executable %s' % opts.executable
-    if not os.path.exists(opts.outputdir):
-        os.makedirs(opts.outputdir)
-    f = FastQC(opts)
-    html = f.run_fastqc()
-    f = open(opts.htmloutput, 'w')
-    f.write(''.join(html))
-    f.close()
-
--- a/tools/rgenetics/rgFastQC.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-<tool name="Fastqc: Fastqc QC" id="fastqc" version="0.1">
-  <description>using FastQC from Babraham</description>
-  <command interpreter="python">
-    rgFastQC.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix" -f $input_file.ext -e ${GALAXY_DATA_INDEX_DIR}/shared/jars/FastQC/fastqc
-#if $contaminants.dataset and str($contaminants) > ''
--c "$contaminants"
-#end if
-  </command>
-  <requirements>
-    <requirement type="package">FastQC</requirement>
-  </requirements>
-  <inputs>
-    <param format="fastqsanger,fastq,bam,sam" name="input_file" type="data" label="Short read data from your current history" />
-    <param name="out_prefix" value="FastQC" type="text" label="Title for the output file - to remind you what the job was for" size="80" />
-    <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
-           help="tab delimited file with 2 columns: name and sequence.  For example: Illumina Small RNA RT Primer	CAAGCAGAAGACGGCATACGA"/>
-  </inputs>
-  <outputs>
-    <data format="html" name="html_file"  label="${out_prefix}.html" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="1000gsample.fastq" />
-      <param name="out_prefix" value="fastqc_out" />
-      <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
-      <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**Purpose**
-
-FastQC aims to provide a simple way to do some quality control checks on raw
-sequence data coming from high throughput sequencing pipelines.
-It provides a modular set of analyses which you can use to give a quick
-impression of whether your data has any problems of
-which you should be aware before doing any further analysis.
-
-The main functions of FastQC are:
-
-- Import of data from BAM, SAM or FastQ files (any variant)
-- Providing a quick overview to tell you in which areas there may be problems
-- Summary graphs and tables to quickly assess your data
-- Export of results to an HTML based permanent report
-- Offline operation to allow automated generation of reports without running the interactive application
-
-**FastQC documentation**
-
-This is a Galaxy interface to the external package FastQC_.
-Specific documentation on FastQC can be found on that site.
-FastQC incorporates the Picard-tools_ libraries for sam/bam processing.
-
- .. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/
- .. _Picard-tools: http://picard.sourceforge.net/index.shtml
-
-The contaminants file parameter was borrowed from the independently developed
-fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson.
-
------
-
-.. class:: infomark
-
-**Inputs and outputs**
-
-This wrapper will accept any fastq file as well as sam or bam as the primary file to check.
-It will also take an optional file containing a list of contaminants information, in the form of
-a tab-delimited file with 2 columns, name and sequence.
-
-The tool produces a single HTML output file that contains all of the results, including the following:
-
-- Basic Statistics
-- Per base sequence quality
-- Per sequence quality scores
-- Per base sequence content
-- Per base GC content
-- Per sequence GC content
-- Per base N content
-- Sequence Length Distribution
-- Sequence Duplication Levels
-- Overrepresented sequences
-- Kmer Content
-
-All except Basic Statistics and Overrepresented sequences are plots.
-
-</help>
-</tool>
--- a/tools/rgenetics/rgGLM.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,287 +0,0 @@
-#!/usr/local/bin/python
-"""
-# added most of the available options for linear models
-# june 2009 rml
-# hack to run and process a plink quantitative trait
-#
-
-This is a wrapper for Shaun Purcell's Plink linear/logistic models for
-traits, covariates and genotypes.
-
-It requires some judgement to interpret the findings
-We need some better visualizations - manhattan plots are good.
-svg with rs numbers for top 1%?
-
-toptable tools - truncate a gg file down to some low percentile
-
-intersect with other tables - eg gene expression regressions on snps
-
-
-
-"""
-
-import sys,math,shutil,subprocess,os,string,tempfile,shutil,commands
-from rgutils import plinke
-
-def makeGFF(resf='',outfname='',logf=None,twd='.',name='track name',description='track description',topn=1000):
-    """
-    score must be scaled to 0-1000
-
-    Want to make some wig tracks from each analysis
-    Best n -log10(p). Make top hit the window.
-    we use our tab output which has
-    rs	chrom	offset	ADD_stat	ADD_p	ADD_log10p
-    rs3094315	1	792429	1.151	0.2528	0.597223
-
-    """
-
-    def is_number(s):
-        try:
-            float(s)
-            return True
-        except ValueError:
-            return False
-    header = 'track name=%s description="%s" visibility=2 useScore=1 color=0,60,120\n' % (name,description)
-    column_names = [ 'Seqname', 'Source', 'Feature', 'Start', 'End', 'Score', 'Strand', 'Frame', 'Group' ]
-    halfwidth=100
-    resfpath = os.path.join(twd,resf)
-    resf = open(resfpath,'r')
-    resfl = resf.readlines() # dumb but convenient for millions of rows
-    resfl = [x.split() for x in resfl]
-    headl = resfl[0]
-    resfl = resfl[1:]
-    headl = [x.strip().upper() for x in headl]
-    headIndex = dict(zip(headl,range(0,len(headl))))
-    chrpos = headIndex.get('CHROM',None)
-    rspos = headIndex.get('RS',None)
-    offspos = headIndex.get('OFFSET',None)
-    ppos = headIndex.get('ADD_LOG10P',None)
-    wewant = [chrpos,rspos,offspos,ppos]
-    if None in wewant: # missing something
-       logf.write('### Error missing a required header in makeGFF - headIndex=%s\n' % headIndex)
-       return
-    resfl = [x for x in resfl if x[ppos] > '']
-    resfl = [(float(x[ppos]),x) for x in resfl] # decorate
-    resfl.sort()
-    resfl.reverse() # using -log10 so larger is better
-    resfl = resfl[:topn] # truncate
-    pvals = [x[0] for x in resfl] # need to scale
-    resfl = [x[1] for x in resfl] # drop decoration
-    if len(pvals) == 0:
-        logf.write('### no pvalues found in resfl - %s' % (resfl[:3]))
-        sys.exit(1)
-    maxp = max(pvals) # need to scale
-    minp = min(pvals)
-    prange = abs(maxp-minp) + 0.5 # fudge
-    scalefact = 1000.0/prange
-    logf.write('###maxp=%f,minp=%f,prange=%f,scalefact=%f\n' % (maxp,minp,prange,scalefact))
-    for i,row in enumerate(resfl):
-        row[ppos] = '%d' % (int(scalefact*pvals[i]))
-        resfl[i] = row # replace
-    outf = file(outfname,'w')
-    outf.write(header)
-    outres = [] # need to resort into chrom offset order
-    for i,lrow in enumerate(resfl):
-        chrom,snp,offset,p, = [lrow[x] for x in wewant]
-        gff = ('chr%s' % chrom,'rgGLM','variation','%d' % (int(offset)-halfwidth),
-               '%d' % (int(offset)+halfwidth),p,'.','.','%s logp=%1.2f' % (snp,pvals[i]))
-        outres.append(gff)
-    outres = [(x[0],int(x[3]),x) for x in outres] # decorate
-    outres.sort() # into chrom offset
-    outres=[x[2] for x in outres] # undecorate
-    outres = ['\t'.join(x) for x in outres]
-    outf.write('\n'.join(outres))
-    outf.write('\n')
-    outf.close()
-
-
-
-def xformQassoc(resf='',outfname='',logf=None,twd='.'):
-    """	plink.assoc.linear to gg file
-from the docs
-The output per each SNP might look something like:
-
-    CHR        SNP      BP  A1       TEST   NMISS       OR      STAT         P
-      5   rs000001   10001   A        ADD     664   0.7806    -1.942   0.05216
-      5   rs000001   10001   A     DOMDEV     664   0.9395   -0.3562    0.7217
-      5   rs000001   10001   A       COV1     664   0.9723   -0.7894    0.4299
-      5   rs000001   10001   A       COV2     664    1.159    0.5132    0.6078
-      5   rs000001   10001   A   GENO_2DF     664       NA     5.059    0.0797
-    need to transform into gg columns for each distinct test
-    or bed for tracks?
-
-    """
-    logf.write('xformQassoc got resf=%s, outfname=%s\n' % (resf,outfname))
-    resdict = {}
-    rsdict = {}
-    markerlist = []
-    # plink is "clever" - will run logistic if only 2 categories such as gender
-    resfs = resf.split('.')
-    if resfs[-1] == 'logistic':
-        resfs[-1] = 'linear'
-    else:
-        resfs[-1] = 'logistic'
-    altresf = '.'.join(resfs)
-
-    altresfpath = os.path.join(twd,altresf)
-    resfpath = os.path.join(twd,resf)
-    try:
-        resf = open(resfpath,'r')
-    except:
-        try:
-            resf = open(altresfpath,'r')
-        except:
-            print >> sys.stderr, '## error - no file plink output %s or %s found - cannot continue' % (resfpath, altresfpath)
-            sys.exit(1)
-    for lnum,row in enumerate(resf):
-        if lnum == 0:
-            headl = row.split()
-            headl = [x.strip().upper() for x in headl]
-            headIndex = dict(zip(headl,range(0,len(headl))))
-            chrpos = headIndex.get('CHR',None)
-            rspos = headIndex.get('SNP',None)
-            offspos = headIndex.get('BP',None)
-            nmisspos = headIndex.get('NMISS',None)
-            testpos = headIndex.get('TEST',None)
-            ppos = headIndex.get('P',None)
-            coeffpos = headIndex.get('OR',None)
-            if not coeffpos:
-                coeffpos = headIndex.get('BETA',None)
-            apos = headIndex.get('A1',None)
-            statpos = headIndex.get('STAT',None)
-            wewant = [chrpos,rspos,offspos,testpos,statpos,ppos,coeffpos,apos]
-            if None in wewant: # missing something
-               logf.write('missing a required header in xformQassoc - headIndex=%s\n' % headIndex)
-               return
-            llen = len(headl)
-        else: # no Nones!
-            ll = row.split()
-            if len(ll) >= llen: # valid line
-                chrom,snp,offset,test,stat,p,coeff,allele = [ll[x] for x in wewant]
-                snp = snp.strip()
-                if p <> 'NA' :
-                  try:
-                    ffp = float(p)
-                    if ffp <> 0:
-                       lp =  -math.log10(ffp)
-                  except:
-                    lp = 0.0
-                  resdict.setdefault(test,{})
-                  resdict[test][snp] = (stat,p,'%f' % lp)
-                  if rsdict.get(snp,None) == None:
-                      rsdict[snp] = (chrom,offset)
-                      markerlist.append(snp)
-    # now have various tests indexed by rs
-    tk = resdict.keys()
-    tk.sort() # tests
-    ohead = ['rs','chrom','offset']
-    for t in tk: # add headers
-        ohead.append('%s_stat' % t)
-        ohead.append('%s_p' % t)
-        ohead.append('%s_log10p' % t)
-    oheads = '\t'.join(ohead)
-    res = [oheads,]
-    for snp in markerlist: # retain original order
-        chrom,offset = rsdict[snp]
-        outl = [snp,chrom,offset]
-        for t in tk:
-            outl += resdict[t][snp] # add stat,p for this test
-        outs = '\t'.join(outl)
-        res.append(outs)
-    f = file(outfname,'w')
-    res.append('')
-    f.write('\n'.join(res))
-    f.close()
-
-
-if __name__ == "__main__":
-    """
-
-    <command interpreter="python">
-        rgGLM.py '$i.extra_files_path/$i.metadata.base_name' '$phef.extra_files_path/$phef.metadata.base_name'
-        "$title1" '$predvar' '$covar' '$out_file1' '$logf' '$i.metadata.base_name'
-        '$inter' '$cond' '$gender' '$mind' '$geno' '$maf' '$logistic' '$wigout'
-    </command>
-    """
-    topn = 1000
-    killme = string.punctuation+string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    if len(sys.argv) < 17:
-       s = 'rgGLM.py needs 17 params - got %s \n' % (sys.argv)
-       sys.stderr.write(s) # print >>,s would probably also work?
-       sys.exit(0)
-    blurb = 'rgGLM.py called with %s' % sys.argv
-    print >> sys.stdout,blurb
-    bfname = sys.argv[1]
-    phename = sys.argv[2]
-    title = sys.argv[3]
-    title.translate(trantab)
-    predvar = sys.argv[4]
-    covar = sys.argv[5].strip()
-    outfname = sys.argv[6]
-    logfname = sys.argv[7]
-    op = os.path.split(logfname)[0]
-    try: # for test - needs this done
-        os.makedirs(op)
-    except:
-        pass
-    basename = sys.argv[8].translate(trantab)
-    inter = sys.argv[9] == '1'
-    cond = sys.argv[10].strip()
-    if cond == 'None':
-        cond = ''
-    gender = sys.argv[11] == '1'
-    mind = sys.argv[12]
-    geno = sys.argv[13]
-    maf = sys.argv[14]
-    logistic = sys.argv[15].strip()=='1'
-    gffout = sys.argv[16]
-    me = sys.argv[0]
-    phepath = '%s.pphe' % phename
-    twd = tempfile.mkdtemp(suffix='rgGLM') # make sure plink doesn't spew log file into the root!
-    tplog = os.path.join(twd,'%s.log' % basename) # should be path to plink log
-    vcl = [plinke,'--noweb','--bfile',bfname,'--pheno-name','"%s"' % predvar,'--pheno',
-           phepath,'--out',basename,'--mind %s' % mind, '--geno %s' % geno,
-           '--maf %s' % maf]
-    if logistic:
-        vcl.append('--logistic')
-        resf = '%s.assoc.logistic' % basename # plink output is here we hope
-    else:
-        vcl.append('--linear')
-        resf = '%s.assoc.linear' % basename # plink output is here we hope
-    resf = os.path.join(twd,resf)
-    if gender:
-        vcl.append('--sex')
-    if inter:
-        vcl.append('--interaction')
-    if covar > 'None':
-        vcl += ['--covar',phepath,'--covar-name',covar] # comma sep list of covariates
-    tcfile = None
-    if len(cond) > 0: # plink wants these in a file..
-        dummy,tcfile = tempfile.mkstemp(suffix='condlist') #
-        f = open(tcfile,'w')
-        cl = cond.split()
-        f.write('\n'.join(cl))
-        f.write('\n')
-        f.close()
-        vcl.append('--condition-list %s' % tcfile)
-    p=subprocess.Popen(' '.join(vcl),shell=True,cwd=twd)
-    retval = p.wait()
-    if tcfile:
-        os.unlink(tcfile)
-    plinklog = file(tplog,'r').read()
-    logf = file(logfname,'w')
-    logf.write(blurb)
-    logf.write('\n')
-    logf.write('vcl=%s\n' % vcl)
-    xformQassoc(resf=resf,outfname=outfname,logf=logf,twd=twd) # leaves the desired summary file
-    makeGFF(resf=outfname,outfname=gffout,logf=logf,twd=twd,name='rgGLM_TopTable',description=title,topn=topn)
-    logf.write('\n')
-    logf.write(plinklog)
-    logf.close()
-    #shutil.rmtree(twd) # clean up
-
-
-
-
-
--- a/tools/rgenetics/rgGLM.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,146 +0,0 @@
-<tool id="rgGLM1" name="Linear Models:" version="0.2">
-    <description>for genotype data</description>
-    <code file="rgGLM_code.py"/>
-    <command interpreter="python">
-        rgGLM.py '$i.extra_files_path/$i.metadata.base_name' '$phef.extra_files_path/$phef.metadata.base_name'
-        "$title" '$predvar' '$covar' '$out_file1' '$logf' '$i.metadata.base_name'
-        '$inter' '$cond' '$gender' '$mind' '$geno' '$maf' '$logistic' '$gffout'
-    </command>
-
-    <inputs>
-      <page>
-       <param name='title' label='Title for outputs' type='text' value='GLM' size="80" />
-       <param name="i" type="data" format="pbed" label="Genotype file" size="80"  />
-       <param name="phef"  type="data" format="pphe" label="Phenotype file" size="80"
-       help="Dependent variable and covariates will be chosen from this file on the next page"/>
-       <param name="logistic" type="text" value = "0" label="1=Use a logistic model (trait must be 1/2 coded like affection)"
-       help="Please read the Plink documentation about this option"  />
-       <param name="gender" type="text" value = "0" label="1=Add a gender term to model"  />
-       <param name='inter' label='1=Build an interaction model - please read the docs carefully before using this'
-         type='text' value='0' size="1" />
-       <param name="cond"  type="text"  area='true' size='15x20' value = ""
-       label="condition on this whitespace delimited rs (snp id) list"  />
-       <param name="mind" type="float" value = "0.1" label="Remove subjects with missing genotypes gt (eg 0.1)"
-       help = "Set to 1 to include all subjects in the input file" />
-       <param name="geno"  type="float" value = "0.1" label="Remove markers with missing genotypes gt (eg 0.1)"
-       help = "Set to 1 to include all markers in the input file"  />
-       <param name="maf"  type="float" value = "0.01" label="Remove markers with MAF lt (eg 0.01) "
-       help = "Set to 0 to include all markers in the input file"/>
-      </page>
-      <page>
-       <param name="predvar" size="80"  type="select" label="Dependent Trait"
-       dynamic_options="get_phecols(phef=phef,selectOne=1)"  display="radio" multiple="false"
-       help="Model this characteristic in terms of subject snp genotypes - eg rare allele dosage for additive model" />
-       <param name="covar" size="80"  type="select" label="Covariates"
-       dynamic_options="get_phecols(phef=phef,selectOne=0)" multiple="true" display="checkboxes"
-       help="Use these phenotypes as covariates in models of snp dosage effects on the dependent trait"/>
-      </page>
-   </inputs>
-
-   <outputs>
-       <data format="tabular" name="out_file1" label="${title}_rgGLM.xls"/>
-       <data format="txt" name="logf" label="${title}_rgGLMlog.txt" />
-       <data format="gff" name="gffout"  label="${title}_rgGLM.gff"/>
-   </outputs>
-<tests>
- <test>
-  <param name='i' value='tinywga' ftype='pbed' >
-   <metadata name='base_name' value='tinywga' />
-   <composite_data value='tinywga.bim' />
-   <composite_data value='tinywga.bed' />
-   <composite_data value='tinywga.fam' />
-   <edit_attributes type='name' value='tinywga' />
- </param>
- <param name='phef' value='tinywga' ftype='pphe' >
-   <metadata name='base_name' value='tinywga' />
-   <composite_data value='tinywga.pphe' />
-   <edit_attributes type='name' value='tinywga' />
- </param>
- <param name='title' value='rgGLMtest1' />
- <param name='predvar' value='c1' />
- <param name='covar' value='None' />
- <param name='inter' value='0' />
- <param name='cond' value='' />
- <param name='gender' value='0' />
- <param name='mind' value='1.0' />
- <param name='geno' value='1.0' />
- <param name='maf' value='0.0' />
- <param name='logistic' value='0' />
- <output name='out_file1' file='rgGLMtest1_GLM.xls' ftype='tabular' compare="diff" />
- <output name='logf' file='rgGLMtest1_GLM_log.txt' ftype='txt' compare="diff" lines_diff='36'/>
- <output name='gffout' file='rgGLMtest1_GLM_topTable.gff' compare="diff" ftype='gff' />
- </test>
-</tests>
-<help>
-
-.. class:: infomark
-
-**Syntax**
-
-Note this is a two form tool - you will choose the dependent trait and covariates
-on the second page based on the phenotype file you choose on the first page
-
-- **Genotype file** is the input Plink format compressed genotype (pbed) file
-- **Phenotype file** is the input Plink phenotype (pphe) file with FAMID IID followed by phenotypes
-- **Dependant variable** is the term on the left of the model and is chosen from the pphe columns on the second page
-- **Logistic** if you are (eg) using disease status as the outcome variable (case/control) - otherwise the model is linear.
-- **Covariates** are covariate terms on the right of the model, also chosen on the second page
-- **Interactions** will add interactions - please be careful how you interpret these - see the Plink documentation.
-- **Gender** will add gender as a model term - described in the Plink documentation
-- **Condition** will condition the model on one or more specific SNP rs ids as a whitespace delimited sequence
-- **Format** determines how your data will be returned to your Galaxy workspace
-
------
-
-.. class:: infomark
-
-**Summary**
-
-This tool will test GLM models for SNP predicting a dependent phenotype
-variable with adjustment for specified covariates.
-
-If you don't see the genotype or phenotype data set you want here, it can be imported using
-one of the methods available from the rg get data tool group.
-
-Output format can be UCSC .bed if you want to see one column of your
-results as a fully fledged UCSC genome browser track. A map file containing the chromosome and offset for each marker is
-required for writing this kind of output.
-Alternatively you can use .gg for the UCSC Genome Graphs tool which has all of the advantages
-of the the .bed track, plus a neat, visual front end that displays a lot of useful clues.
-Either of these are a very useful way of quickly getting a look
-at your data in full genomic context.
-
-Finally, if you can't live without
-spreadsheet data, choose the .xls tab delimited format. It's not a stupid binary excel file. Just a plain old tab
-delimited
-one with a header. Fortunately excel is dumb enough to open these without much protest.
-
------
-
-.. class:: infomark
-
-**Attribution**
-
-This Galaxy tool relies on Plink (see Plinksrc_) to test GLM models.
-
-So, we rely on the author (Shaun Purcell) for the documentation you need specific to those settings - they are very nicely documented - see
-DOC_
-
-Tool and Galaxy datatypes originally designed and written for the Rgenetics
-series of whole genome scale statistical genetics tools by ross lazarus (ross.lazarus@gmail.com)
-
-Copyright Ross Lazarus March 2007
-This Galaxy wrapper is released licensed under the LGPL_ but is about as useful as a chocolate teapot without Plink which is GPL.
-
-I'm no lawyer, but it looks like you got GPL if you use this software. Good luck.
-
-.. _Plinksrc: http://pngu.mgh.harvard.edu/~purcell/plink/
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-
-.. _DOC: http://pngu.mgh.harvard.edu/~purcell/plink/anal.shtml#glm
-
-</help>
-</tool>
-
-
--- a/tools/rgenetics/rgGLM_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-# before running the qc, need to rename various output files
-import os,string,time
-from galaxy import datatypes
-
-
-def get_phecols(phef='',selectOne=0):
-   """return column names """
-   phepath = phef.extra_files_path
-   phename = phef.metadata.base_name
-   phe = os.path.join(phepath,'%s.pphe' % phename)
-   head = open(phe,'r').next()
-   c = head.strip().split()[2:] # first are fid,iid
-   res = [(cname,cname,False) for cname in c]
-   if len(res) >= 1:
-       if selectOne:
-          x,y,z = res[0] # 0,1 = fid,iid
-          res[0] = (x,y,True) # set second selected
-       else:
-          res.insert(0,('None','None',True))
-   else:
-      res = [('None','no phenotype columns found',False),]
-   return res
-
--- a/tools/rgenetics/rgGRR.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1089 +0,0 @@
-"""
-# july 2009: Need to see outliers so need to draw them last?
-# could use clustering on the zscores to guess real relationships for unrelateds
-# but definitely need to draw last
-# added MAX_SHOW_ROWS to limit the length of the main report page
-# Changes for Galaxy integration
-# added more robust knuth method for one pass mean and sd
-# no difference really - let's use scipy.mean() and scipy.std() instead...
-# fixed labels and changed to .xls for outlier reports so can open in excel
-# interesting - with a few hundred subjects, 5k gives good resolution
-# and 100k gives better but not by much
-# TODO remove non autosomal markers
-# TODO it would be best if label had the zmean and zsd as these are what matter for
-# outliers rather than the group mean/sd
-# mods to rgGRR.py from channing CVS which John Ziniti has rewritten to produce SVG plots
-# to make a Galaxy tool - we need the table of mean and SD for interesting pairs, the SVG and the log
-# so the result should be an HTML file
-
-# rgIBS.py
-# use a random subset of markers for a quick ibs
-# to identify sample dups and closely related subjects
-# try snpMatrix and plink and see which one works best for us?
-# abecasis grr plots mean*sd for every subject to show clusters
-# mods june 23 rml to avoid non-autosomal markers
-# we seem to be distinguishing parent-child by gender - 2 clouds!
-
-
-snpMatrix from David Clayton has:
-ibs.stats function to calculate the identity-by-state stats of a group of samples
-Description
-Given a snp.matrix-class or a X.snp.matrix-class object with N samples, calculates some statistics
-about the relatedness of every pair of samples within.
-
-Usage
-ibs.stats(x)
-8 ibs.stats
-Arguments
-x a snp.matrix-class or a X.snp.matrix-class object containing N samples
-Details
-No-calls are excluded from consideration here.
-Value
-A data.frame containing N(N - 1)/2 rows, where the row names are the sample name pairs separated
-by a comma, and the columns are:
-Count count of identical calls, exclusing no-calls
-Fraction fraction of identical calls comparied to actual calls being made in both samples
-Warning
-In some applications, it may be preferable to subset a (random) selection of SNPs first - the
-calculation
-time increases as N(N - 1)M/2 . Typically for N = 800 samples and M = 3000 SNPs, the
-calculation time is about 1 minute. A full GWA scan could take hours, and quite unnecessary for
-simple applications such as checking for duplicate or related samples.
-Note
-This is mostly written to find mislabelled and/or duplicate samples.
-Illumina indexes their SNPs in alphabetical order so the mitochondria SNPs comes first - for most
-purpose it is undesirable to use these SNPs for IBS purposes.
-TODO: Worst-case S4 subsetting seems to make 2 copies of a large object, so one might want to
-subset before rbind(), etc; a future version of this routine may contain a built-in subsetting facility
-"""
-import sys,os,time,random,string,copy,optparse
-
-try:
-  set
-except NameError:
-  from Sets import Set as set
-
-from rgutils import timenow,plinke
-
-import plinkbinJZ
-
-
-opts = None
-verbose = False
-
-showPolygons = False
-
-class NullDevice:
-  def write(self, s):
-    pass
-
-tempstderr = sys.stderr # save
-#sys.stderr = NullDevice()
-# need to avoid blather about deprecation and other strange stuff from scipy
-# the current galaxy job runner assumes that
-# the job is in error if anything appears on sys.stderr
-# grrrrr. James wants to keep it that way instead of using the
-# status flag for some strange reason. Presumably he doesn't use R or (in this case, scipy)
-import numpy
-import scipy
-from scipy import weave
-
-
-sys.stderr=tempstderr
-
-
-PROGNAME = os.path.split(sys.argv[0])[-1]
-X_AXIS_LABEL = 'Mean Alleles Shared'
-Y_AXIS_LABEL = 'SD Alleles Shared'
-LEGEND_ALIGN = 'topleft'
-LEGEND_TITLE = 'Relationship'
-DEFAULT_SYMBOL_SIZE = 1.0 # default symbol size
-DEFAULT_SYMBOL_SIZE = 0.5 # default symbol size
-
-### Some colors for R/rpy
-R_BLACK  = 1
-R_RED    = 2
-R_GREEN  = 3
-R_BLUE   = 4
-R_CYAN   = 5
-R_PURPLE = 6
-R_YELLOW = 7
-R_GRAY   = 8
-
-### ... and some point-styles
-
-###
-PLOT_HEIGHT = 600
-PLOT_WIDTH = 1150
-
-
-#SVG_COLORS = ('black', 'darkblue', 'blue', 'deepskyblue', 'firebrick','maroon','crimson')
-#SVG_COLORS = ('cyan','dodgerblue','mediumpurple', 'fuchsia', 'red','gold','gray')
-SVG_COLORS = ('cyan','dodgerblue','mediumpurple','forestgreen', 'lightgreen','gold','gray')
-# dupe,parentchild,sibpair,halfsib,parents,unrel,unkn
-#('orange', 'red', 'green', 'chartreuse', 'blue', 'purple', 'gray')
-
-OUTLIERS_HEADER_list = ['Mean','Sdev','ZMean','ZSdev','FID1','IID1','FID2','IID2','RelMean_M','RelMean_SD','RelSD_M','RelSD_SD','PID1','MID1','PID2','MID2','Ped']
-OUTLIERS_HEADER = '\t'.join(OUTLIERS_HEADER_list)
-TABLE_HEADER='fid1_iid1\tfid2_iid2\tmean\tsdev\tzmean\tzsdev\tgeno\trelcode\tpid1\tmid1\tpid2\tmid2\n'
-
-
-### Relationship codes, text, and lookups/mappings
-N_RELATIONSHIP_TYPES = 7
-REL_DUPE, REL_PARENTCHILD, REL_SIBS, REL_HALFSIBS, REL_RELATED, REL_UNRELATED, REL_UNKNOWN = range(N_RELATIONSHIP_TYPES)
-REL_LOOKUP = {
-    REL_DUPE:        ('dupe',        R_BLUE,   1),
-    REL_PARENTCHILD: ('parentchild', R_YELLOW, 1),
-    REL_SIBS:        ('sibpairs',    R_RED,    1),
-    REL_HALFSIBS:    ('halfsibs',    R_GREEN,  1),
-    REL_RELATED:     ('parents',     R_PURPLE, 1),
-    REL_UNRELATED:   ('unrelated',   R_CYAN,   1),
-    REL_UNKNOWN:     ('unknown',     R_GRAY,   1),
-    }
-OUTLIER_STDEVS = {
-    REL_DUPE:        2,
-    REL_PARENTCHILD: 2,
-    REL_SIBS:        2,
-    REL_HALFSIBS:    2,
-    REL_RELATED:     2,
-    REL_UNRELATED:   3,
-    REL_UNKNOWN:     2,
-    }
-# note now Z can be passed in
-
-REL_STATES = [REL_LOOKUP[r][0] for r in range(N_RELATIONSHIP_TYPES)]
-REL_COLORS = SVG_COLORS
-REL_POINTS = [REL_LOOKUP[r][2] for r in range(N_RELATIONSHIP_TYPES)]
-
-DEFAULT_MAX_SAMPLE_SIZE = 10000
-
-REF_COUNT_HOM1 = 3
-REF_COUNT_HET  = 2
-REF_COUNT_HOM2 = 1
-MISSING        = 0
-MAX_SHOW_ROWS = 100 # framingham has millions - delays showing output page - so truncate and explain
-MARKER_PAIRS_PER_SECOND_SLOW = 15000000.0
-MARKER_PAIRS_PER_SECOND_FAST = 70000000.0
-
-
-galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
-<title></title>
-<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
-</head>
-<body>
-<div class="document">
-"""
-
-
-SVG_HEADER = '''<?xml version="1.0" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.2//EN" "http://www.w3.org/Graphics/SVG/1.2/DTD/svg12.dtd">
-
-<svg width="1280" height="800"
-     xmlns="http://www.w3.org/2000/svg" version="1.2"
-     xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1280 800" onload="init()">
-
-  <script type="text/ecmascript" xlink:href="/static/scripts/checkbox_and_radiobutton.js"/>
-  <script type="text/ecmascript" xlink:href="/static/scripts/helper_functions.js"/>
-  <script type="text/ecmascript" xlink:href="/static/scripts/timer.js"/>
-  <script type="text/ecmascript">
-    <![CDATA[
-      var checkBoxes = new Array();
-      var radioGroupBandwidth;
-      var colours = ['%s','%s','%s','%s','%s','%s','%s'];
-      function init() {
-          var style = {"font-family":"Arial,Helvetica", "fill":"black", "font-size":12};
-          var dist = 12;
-          var yOffset = 4;
-
-          //A checkBox for each relationship type dupe,parentchild,sibpair,halfsib,parents,unrel,unkn
-          checkBoxes["dupe"] = new checkBox("dupe","checkboxes",20,40,"cbRect","cbCross",true,"Duplicate",style,dist,yOffset,undefined,hideShowLayer);
-          checkBoxes["parentchild"] = new checkBox("parentchild","checkboxes",20,60,"cbRect","cbCross",true,"Parent-Child",style,dist,yOffset,undefined,hideShowLayer);
-          checkBoxes["sibpairs"] = new checkBox("sibpairs","checkboxes",20,80,"cbRect","cbCross",true,"Sib-pairs",style,dist,yOffset,undefined,hideShowLayer);
-          checkBoxes["halfsibs"] = new checkBox("halfsibs","checkboxes",20,100,"cbRect","cbCross",true,"Half-sibs",style,dist,yOffset,undefined,hideShowLayer);
-          checkBoxes["parents"] = new checkBox("parents","checkboxes",20,120,"cbRect","cbCross",true,"Parents",style,dist,yOffset,undefined,hideShowLayer);
-          checkBoxes["unrelated"] = new checkBox("unrelated","checkboxes",20,140,"cbRect","cbCross",true,"Unrelated",style,dist,yOffset,undefined,hideShowLayer);
-          checkBoxes["unknown"] = new checkBox("unknown","checkboxes",20,160,"cbRect","cbCross",true,"Unknown",style,dist,yOffset,undefined,hideShowLayer);
-
-      }
-
-      function hideShowLayer(id, status, label) {
-          var vis = "hidden";
-          if (status) {
-              vis = "visible";
-          }
-          document.getElementById(id).setAttributeNS(null, 'visibility', vis);
-      }
-
-      function showBTT(evt, rel, mm, dm, md, dd, n, mg, dg, lg, hg) {
-    var x = parseInt(evt.pageX)-250;
-    var y = parseInt(evt.pageY)-110;
-        switch(rel) {
-        case 0:
-        fill = colours[rel];
-        relt = "dupe";
-        break;
-        case 1:
-        fill = colours[rel];
-        relt = "parentchild";
-        break;
-        case 2:
-        fill = colours[rel];
-        relt = "sibpairs";
-        break;
-        case 3:
-        fill = colours[rel];
-        relt = "halfsibs";
-        break;
-        case 4:
-        fill = colours[rel];
-        relt = "parents";
-        break;
-        case 5:
-        fill = colours[rel];
-        relt = "unrelated";
-        break;
-        case 6:
-        fill = colours[rel];
-        relt = "unknown";
-        break;
-        default:
-        fill = "cyan";
-        relt = "ERROR_CODE: "+rel;
-    }
-
-    document.getElementById("btRel").textContent = "GROUP: "+relt;
-    document.getElementById("btMean").textContent = "mean="+mm+" +/- "+dm;
-        document.getElementById("btSdev").textContent = "sdev="+dm+" +/- "+dd;
-        document.getElementById("btPair").textContent = "npairs="+n;
-        document.getElementById("btGeno").textContent = "ngenos="+mg+" +/- "+dg+" (min="+lg+", max="+hg+")";
-        document.getElementById("btHead").setAttribute('fill', fill);
-
-        var tt = document.getElementById("btTip");
-    tt.setAttribute("transform", "translate("+x+","+y+")");
-    tt.setAttribute('visibility', 'visible');
-      }
-
-      function showOTT(evt, rel, s1, s2, mean, sdev, ngeno, rmean, rsdev) {
-    var x = parseInt(evt.pageX)-150;
-    var y = parseInt(evt.pageY)-180;
-
-        switch(rel) {
-        case 0:
-        fill = colours[rel];
-        relt = "dupe";
-        break;
-        case 1:
-        fill = colours[rel];
-        relt = "parentchild";
-        break;
-        case 2:
-        fill = colours[rel];
-        relt = "sibpairs";
-        break;
-        case 3:
-        fill = colours[rel];
-        relt = "halfsibs";
-        break;
-        case 4:
-        fill = colours[rel];
-        relt = "parents";
-        break;
-        case 5:
-        fill = colours[rel];
-        relt = "unrelated";
-        break;
-        case 6:
-        fill = colours[rel];
-        relt = "unknown";
-        break;
-        default:
-        fill = "cyan";
-        relt = "ERROR_CODE: "+rel;
-    }
-
-    document.getElementById("otRel").textContent = "PAIR: "+relt;
-    document.getElementById("otS1").textContent = "s1="+s1;
-    document.getElementById("otS2").textContent = "s2="+s2;
-    document.getElementById("otMean").textContent = "mean="+mean;
-        document.getElementById("otSdev").textContent = "sdev="+sdev;
-        document.getElementById("otGeno").textContent = "ngenos="+ngeno;
-        document.getElementById("otRmean").textContent = "relmean="+rmean;
-        document.getElementById("otRsdev").textContent = "relsdev="+rsdev;
-    document.getElementById("otHead").setAttribute('fill', fill);
-
-        var tt = document.getElementById("otTip");
-    tt.setAttribute("transform", "translate("+x+","+y+")");
-    tt.setAttribute('visibility', 'visible');
-      }
-
-      function hideBTT(evt) {
-        document.getElementById("btTip").setAttributeNS(null, 'visibility', 'hidden');
-      }
-
-      function hideOTT(evt) {
-        document.getElementById("otTip").setAttributeNS(null, 'visibility', 'hidden');
-      }
-
-     ]]>
-  </script>
-  <defs>
-    <!-- symbols for check boxes -->
-    <symbol id="cbRect" overflow="visible">
-        <rect x="-5" y="-5" width="10" height="10" fill="white" stroke="dimgray" stroke-width="1" cursor="pointer"/>
-    </symbol>
-    <symbol id="cbCross" overflow="visible">
-        <g pointer-events="none" stroke="black" stroke-width="1">
-            <line x1="-3" y1="-3" x2="3" y2="3"/>
-            <line x1="3" y1="-3" x2="-3" y2="3"/>
-        </g>
-    </symbol>
-  </defs>
-
-<desc>Developer Works Dynamic Scatter Graph Scaling Example</desc>
-
-<!-- Now Draw the main X and Y axis -->
-<g style="stroke-width:1.0; stroke:black; shape-rendering:crispEdges">
-   <!-- X Axis top and bottom -->
-   <path d="M 100 100 L 1250 100 Z"/>
-   <path d="M 100 700 L 1250 700 Z"/>
-
-   <!-- Y Axis left and right -->
-   <path d="M 100  100 L 100  700 Z"/>
-   <path d="M 1250 100 L 1250 700 Z"/>
-</g>
-
-<g transform="translate(100,100)">
-
-  <!-- Grid Lines -->
-  <g style="fill:none; stroke:#dddddd; stroke-width:1; stroke-dasharray:2,2; text-anchor:end; shape-rendering:crispEdges">
-
-    <!-- Vertical grid lines -->
-    <line x1="125" y1="0" x2="115" y2="600" />
-    <line x1="230" y1="0" x2="230" y2="600" />
-    <line x1="345" y1="0" x2="345" y2="600" />
-    <line x1="460" y1="0" x2="460" y2="600" />
-    <line x1="575" y1="0" x2="575" y2="600" style="stroke-dasharray:none;" />
-    <line x1="690" y1="0" x2="690" y2="600"   />
-    <line x1="805" y1="0" x2="805" y2="600"   />
-    <line x1="920" y1="0" x2="920" y2="600"   />
-    <line x1="1035" y1="0" x2="1035" y2="600" />
-
-    <!-- Horizontal grid lines -->
-    <line x1="0" y1="60" x2="1150" y2="60"   />
-    <line x1="0" y1="120" x2="1150" y2="120" />
-    <line x1="0" y1="180" x2="1150" y2="180" />
-    <line x1="0" y1="240" x2="1150" y2="240" />
-    <line x1="0" y1="300" x2="1150" y2="300" style="stroke-dasharray:none;" />
-    <line x1="0" y1="360" x2="1150" y2="360" />
-    <line x1="0" y1="420" x2="1150" y2="420" />
-    <line x1="0" y1="480" x2="1150" y2="480" />
-    <line x1="0" y1="540" x2="1150" y2="540" />
-  </g>
-
-  <!-- Legend -->
-  <g style="fill:black; stroke:none" font-size="12" font-family="Arial" transform="translate(25,25)">
-    <rect width="160" height="270" style="fill:none; stroke:black; shape-rendering:crispEdges" />
-    <text x="5" y="20" style="fill:black; stroke:none;" font-size="13" font-weight="bold">Given Pair Relationship</text>
-    <rect x="120" y="35" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/>
-    <rect x="120" y="55" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/>
-    <rect x="120" y="75" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/>
-    <rect x="120" y="95" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/>
-    <rect x="120" y="115" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/>
-    <rect x="120" y="135" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/>
-    <rect x="120" y="155" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/>
-    <text x="15"  y="195" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore gt 15</text>
-    <circle cx="125" cy="192" r="6" style="stroke:red; fill:gold; fill-opacity:1.0; stroke-width:1;"/>
-    <text x="15" y="215" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore 4 to 15</text>
-    <circle cx="125" cy="212" r="3" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/>
-    <text x="15" y="235" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore lt 4</text>
-    <circle cx="125" cy="232" r="2" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/>
-    <g id="checkboxes">
-    </g>
-  </g>
-
-
-   <g style='fill:black; stroke:none' font-size="17" font-family="Arial">
-    <!-- X Axis Labels -->
-    <text x="480" y="660">Mean Alleles Shared</text>
-    <text x="0"    y="630" >1.0</text>
-    <text x="277"  y="630" >1.25</text>
-    <text x="564"  y="630" >1.5</text>
-    <text x="842" y="630" >1.75</text>
-    <text x="1140" y="630" >2.0</text>
-  </g>
-
-  <g transform="rotate(270)" style="fill:black; stroke:none" font-size="17" font-family="Arial">
-    <!-- Y Axis Labels -->
-    <text x="-350" y="-40">SD Alleles Shared</text>
-    <text x="-20" y="-10" >1.0</text>
-    <text x="-165" y="-10" >0.75</text>
-    <text x="-310" y="-10" >0.5</text>
-    <text x="-455" y="-10" >0.25</text>
-    <text x="-600" y="-10" >0.0</text>
-  </g>
-
-<!-- Plot Title -->
-<g style="fill:black; stroke:none" font-size="18" font-family="Arial">
-    <text x="425" y="-30">%s</text>
-</g>
-
-<!-- One group/layer of points for each relationship type -->
-'''
-
-SVG_FOOTER = '''
-<!-- End of Data -->
-</g>
-<g id="btTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial">
-  <rect width="250" height="110" style="fill:silver" rx="2" ry="2"/>
-  <rect id="btHead" width="250" height="20" rx="2" ry="2" />
-  <text id="btRel" y="14" x="85">unrelated</text>
-  <text id="btMean" y="40" x="4">mean=1.5 +/- 0.04</text>
-  <text id="btSdev" y="60" x="4">sdev=0.7 +/- 0.03</text>
-  <text id="btPair" y="80" x="4">npairs=1152</text>
-  <text id="btGeno" y="100" x="4">ngenos=4783 +/- 24 (min=1000, max=5000)</text>
-</g>
-
-<g id="otTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial">
-  <rect width="150" height="180" style="fill:silver" rx="2" ry="2"/>
-  <rect id="otHead" width="150" height="20" rx="2" ry="2" />
-  <text id="otRel" y="14" x="40">sibpairs</text>
-  <text id="otS1" y="40" x="4">s1=fid1,iid1</text>
-  <text id="otS2" y="60" x="4">s2=fid2,iid2</text>
-  <text id="otMean" y="80" x="4">mean=1.82</text>
-  <text id="otSdev" y="100" x="4">sdev=0.7</text>
-  <text id="otGeno" y="120" x="4">ngeno=4487</text>
-  <text id="otRmean" y="140" x="4">relmean=1.85</text>
-  <text id="otRsdev" y="160" x="4">relsdev=0.65</text>
-</g>
-</svg>
-'''
-
-
-DEFAULT_MAX_SAMPLE_SIZE = 5000
-
-REF_COUNT_HOM1 = 3
-REF_COUNT_HET  = 2
-REF_COUNT_HOM2 = 1
-MISSING        = 0
-
-MARKER_PAIRS_PER_SECOND_SLOW = 15000000
-MARKER_PAIRS_PER_SECOND_FAST = 70000000
-
-POLYGONS = {
-    REL_UNRELATED:   ((1.360, 0.655), (1.385, 0.730), (1.620, 0.575), (1.610, 0.505)),
-    REL_HALFSIBS:    ((1.630, 0.500), (1.630, 0.550), (1.648, 0.540), (1.648, 0.490)),
-    REL_SIBS:        ((1.660, 0.510), (1.665, 0.560), (1.820, 0.410), (1.820, 0.390)),
-    REL_PARENTCHILD: ((1.650, 0.470), (1.650, 0.490), (1.750, 0.440), (1.750, 0.420)),
-    REL_DUPE:        ((1.970, 0.000), (1.970, 0.150), (2.000, 0.150), (2.000, 0.000)),
-    }
-
-def distance(point1, point2):
-    """ Calculate the distance between two points
-    """
-    (x1,y1) = [float(d) for d in point1]
-    (x2,y2) = [float(d) for d in point2]
-    dx = abs(x1 - x2)
-    dy = abs(y1 - y2)
-    return math.sqrt(dx**2 + dy**2)
-
-def point_inside_polygon(x, y, poly):
-    """ Determine if a point (x,y) is inside a given polygon or not
-        poly is a list of (x,y) pairs.
-
-        Taken from: http://www.ariel.com.au/a/python-point-int-poly.html
-    """
-
-    n = len(poly)
-    inside = False
-
-    p1x,p1y = poly[0]
-    for i in range(n+1):
-        p2x,p2y = poly[i % n]
-        if y > min(p1y,p2y):
-            if y <= max(p1y,p2y):
-                if x <= max(p1x,p2x):
-                    if p1y != p2y:
-                        xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
-                    if p1x == p2x or x <= xinters:
-                        inside = not inside
-        p1x,p1y = p2x,p2y
-    return inside
-
-def readMap(pedfile):
-    """
-    """
-    mapfile = pedfile.replace('.ped', '.map')
-    marker_list = []
-    if os.path.exists(mapfile):
-        print 'readMap: %s' % (mapfile)
-        fh = file(mapfile, 'r')
-    for line in fh:
-        marker_list.append(line.strip().split())
-    fh.close()
-    print 'readMap: %s markers' % (len(marker_list))
-    return marker_list
-
-def calcMeanSD(useme):
-    """
-    A numerically stable algorithm is given below. It also computes the mean.
-    This algorithm is due to Knuth,[1] who cites Welford.[2]
-    n = 0
-    mean = 0
-    M2 = 0
-
-    foreach x in data:
-      n = n + 1
-      delta = x - mean
-      mean = mean + delta/n
-      M2 = M2 + delta*(x - mean)      // This expression uses the new value of mean
-    end for
-
-    variance_n = M2/n
-    variance = M2/(n - 1)
-    """
-    mean = 0.0
-    M2 = 0.0
-    sd = 0.0
-    n = len(useme)
-    if n > 1:
-        for i,x in enumerate(useme):
-            delta = x - mean
-            mean = mean + delta/(i+1) # knuth uses n+=1 at start
-            M2 = M2 + delta*(x - mean)      # This expression uses the new value of mean
-        variance = M2/(n-1) # assume is sample so lose 1 DOF
-        sd = pow(variance,0.5)
-    return mean,sd
-
-
-def doIBSpy(ped=None,basename='',outdir=None,logf=None,
-            nrsSamples=10000,title='title',pdftoo=0,Zcutoff=2.0):
-    #def doIBS(pedName, title, nrsSamples=None, pdftoo=False):
-    """ started with snpmatrix but GRR uses actual IBS counts and sd's
-    """
-    repOut = [] # text strings to add to the html display
-    refallele = {}
-    tblf = '%s_table.xls' % (title)
-    tbl = file(os.path.join(outdir,tblf), 'w')
-    tbl.write(TABLE_HEADER)
-    svgf = '%s.svg' % (title)
-    svg = file(os.path.join(outdir,svgf), 'w')
-
-    nMarkers = len(ped._markers)
-    if nMarkers < 5:
-        print sys.stderr, '### ERROR - %d is too few markers for reliable estimation in %s - terminating' % (nMarkers,PROGNAME)
-        sys.exit(1)
-    nSubjects = len(ped._subjects)
-    nrsSamples = min(nMarkers, nrsSamples)
-    if opts and opts.use_mito:
-        markers = range(nMarkers)
-        nrsSamples = min(len(markers), nrsSamples)
-        sampleIndexes = sorted(random.sample(markers, nrsSamples))
-    else:
-        autosomals = ped.autosomal_indices()
-        nrsSamples = min(len(autosomals), nrsSamples)
-        sampleIndexes = sorted(random.sample(autosomals, nrsSamples))
-
-    print ''
-    print 'Getting random.sample of %s from %s total' % (nrsSamples, nMarkers)
-    npairs = (nSubjects*(nSubjects-1))/2 # total rows in table
-    newfiles=[svgf,tblf]
-    explanations = ['rgGRR Plot (requires SVG)','Mean by SD alleles shared - %d rows' % npairs]
-    # these go with the output file links in the html file
-    s = 'Reading genotypes for %s subjects and %s markers\n' % (nSubjects, nrsSamples)
-    logf.write(s)
-    minUsegenos = nrsSamples/2 # must have half?
-    nGenotypes = nSubjects*nrsSamples
-    stime = time.time()
-    emptyRows = set()
-    genos = numpy.zeros((nSubjects, nrsSamples), dtype=int)
-    for s in xrange(nSubjects):
-        nValid = 0
-        #getGenotypesByIndices(self, s, mlist, format)
-        genos[s] = ped.getGenotypesByIndices(s, sampleIndexes, format='ref')
-        nValid = sum([1 for g in genos[s] if g])
-        if not nValid:
-            emptyRows.add(s)
-            sub = ped.getSubject(s)
-            print 'All missing for row %d (%s)' % (s, sub)
-            logf.write('All missing for row %d (%s)\n' % (s, sub))
-    rtime = time.time() - stime
-    if verbose:
-        print '@@Read %s genotypes in %s seconds' % (nGenotypes, rtime)
-
-
-    ### Now the expensive part.  For each pair of subjects, we get the mean number
-    ### and standard deviation of shared alleles over all of the markers where both
-    ### subjects have a known genotype.  Identical subjects should have mean shared
-    ### alleles very close to 2.0 with a standard deviation very close to 0.0.
-    tot = nSubjects*(nSubjects-1)/2
-    nprog = tot/10
-    nMarkerpairs = tot * nrsSamples
-    estimatedTimeSlow = nMarkerpairs/MARKER_PAIRS_PER_SECOND_SLOW
-    estimatedTimeFast = nMarkerpairs/MARKER_PAIRS_PER_SECOND_FAST
-
-    pairs = []
-    pair_data = {}
-    means = []    ## Mean IBS for each pair
-    ngenoL = []   ## Count of comparable genotypes for each pair
-    sdevs = []    ## Standard dev for each pair
-    rels  = []    ## A relationship code for each pair
-    zmeans  = [0.0 for x in xrange(tot)]    ## zmean score for each pair for the relgroup
-    zstds  = [0.0 for x in xrange(tot)]   ## zstd score for each pair for the relgrp
-    skip = set()
-    ndone = 0     ## How many have been done so far
-
-    logf.write('Calculating %d pairs...\n' % (tot))
-    logf.write('Estimated time is %2.2f to %2.2f seconds ...\n' % (estimatedTimeFast, estimatedTimeSlow))
-
-    t1sum = 0
-    t2sum = 0
-    t3sum = 0
-    now = time.time()
-    scache = {}
-    _founder_cache = {}
-    C_CODE = """
-    #include "math.h"
-    int i;
-    int sumibs = 0;
-    int ssqibs = 0;
-    int ngeno  = 0;
-    float mean = 0;
-    float M2 = 0;
-    float delta = 0;
-    float sdev=0;
-    float variance=0;
-    for (i=0; i<nrsSamples; i++) {
-        int a1 = g1[i];
-        int a2 = g2[i];
-        if (a1 != 0 && a2 != 0) {
-            ngeno += 1;
-            int shared = 2-abs(a1-a2);
-            delta = shared - mean;
-            mean = mean + delta/ngeno;
-            M2 += delta*(shared-mean);
-            // yes that second time, the updated mean is used see calcmeansd above;
-            //printf("%d %d %d %d %d %d\\n", i, a1, a2, ngeno, shared, squared);
-            }
-    }
-    if (ngeno > 1) {
-        variance = M2/(ngeno-1);
-        sdev = sqrt(variance);
-        //printf("OK: %d %3.2f %3.2f\\n", ngeno, mean, sdev);
-    }
-    //printf("%d %d %d %1.2f %1.2f\\n", ngeno, sumibs, ssqibs, mean, sdev);
-    result[0] = ngeno;
-    result[1] = mean;
-    result[2] = sdev;
-    return_val = ngeno;
-    """
-    started = time.time()
-    for s1 in xrange(nSubjects):
-        if s1 in emptyRows:
-            continue
-        (fid1,iid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache.setdefault(s1, ped.getSubject(s1))
-
-        isFounder1 = _founder_cache.setdefault(s1, (did1==mid1))
-        g1 = genos[s1]
-
-        for s2 in xrange(s1+1, nSubjects):
-            if s2 in emptyRows:
-                continue
-            t1s = time.time()
-
-            (fid2,iid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache.setdefault(s2, ped.getSubject(s2))
-
-            g2 = genos[s2]
-            isFounder2 = _founder_cache.setdefault(s2, (did2==mid2))
-
-            # Determine the relationship for this pair
-            relcode = REL_UNKNOWN
-            if (fid2 == fid1):
-                if iid1 == iid2:
-                    relcode = REL_DUPE
-                elif (did2 == did1) and (mid2 == mid1) and did1 != mid1:
-                    relcode = REL_SIBS
-                elif (iid1 == mid2) or (iid1 == did2) or (iid2 == mid1) or (iid2 == did1):
-                    relcode = REL_PARENTCHILD
-                elif (str(did1) != '0' and (did2 == did1)) or (str(mid1) != '0' and (mid2 == mid1)):
-                    relcode = REL_HALFSIBS
-                else:
-                    # People in the same family should be marked as some other
-                    # form of related.  In general, these people will have a
-                    # pretty random spread of similarity. This distinction is
-                    # probably not very useful most of the time
-                    relcode = REL_RELATED
-            else:
-                ### Different families
-                relcode = REL_UNRELATED
-
-            t1e = time.time()
-            t1sum += t1e-t1s
-
-
-            ### Calculate sum(2-abs(a1-a2)) and sum((2-abs(a1-a2))**2) and count
-            ### the number of contributing genotypes.  These values are not actually
-            ### calculated here, but instead are looked up in a table for speed.
-            ### FIXME: This is still too slow ...
-            result = [0.0, 0.0, 0.0]
-            ngeno = weave.inline(C_CODE, ['g1', 'g2', 'nrsSamples', 'result'])
-            if ngeno >= minUsegenos:
-                _, mean, sdev = result
-                means.append(mean)
-                sdevs.append(sdev)
-                ngenoL.append(ngeno)
-                pairs.append((s1, s2))
-                rels.append(relcode)
-            else:
-                skip.add(ndone) # signal no comparable genotypes for this pair
-            ndone += 1
-            t2e = time.time()
-            t2sum += t2e-t1e
-            t3e = time.time()
-            t3sum += t3e-t2e
-
-    logme = [ 'T1:  %s' % (t1sum), 'T2:  %s' % (t2sum), 'T3:  %s' % (t3sum),'TOT: %s' % (t3e-now),
-             '%s pairs with no (or not enough) comparable genotypes (%3.1f%%)' % (len(skip),
-                                                            float(len(skip))/float(tot)*100)]
-    logf.write('%s\n' % '\t'.join(logme))
-    ### Calculate mean and standard deviation of scores on a per relationship
-    ### type basis, allowing us to flag outliers for each particular relationship
-    ### type
-    relstats = {}
-    relCounts = {}
-    outlierFiles = {}
-    for relCode, relInfo in REL_LOOKUP.items():
-        relName, relColor, relStyle = relInfo
-        useme = [means[x] for x in xrange(len(means)) if rels[x] == relCode]
-        relCounts[relCode] = len(useme)
-        mm = scipy.mean(useme)
-        ms = scipy.std(useme)
-        useme = [sdevs[x] for x in xrange(len(sdevs)) if rels[x] == relCode]
-        sm = scipy.mean(useme)
-        ss = scipy.std(useme)
-        relstats[relCode] = {'sd':(sm,ss), 'mean':(mm,ms)}
-        s = 'Relstate %s (n=%d): mean(mean)=%3.2f sdev(mean)=%3.2f, mean(sdev)=%3.2f sdev(sdev)=%3.2f\n' % \
-          (relName,relCounts[relCode], mm, ms, sm, ss)
-        logf.write(s)
-
-    ### now fake z scores for each subject like abecasis recommends max(|zmu|,|zsd|)
-    ### within each group, for each pair, z=(groupmean-pairmean)/groupsd
-    available = len(means)
-    logf.write('%d pairs are available of %d\n' % (available, tot))
-    ### s = '\nOutliers:\nrelationship\tzmean\tzsd\tped1\tped2\tmean\tsd\trmeanmean\trmeansd\trsdmean\trsdsd\n'
-    ### logf.write(s)
-    pairnum   = 0
-    offset    = 0
-    nOutliers = 0
-    cexs      = []
-    outlierRecords = dict([(r, []) for r in range(N_RELATIONSHIP_TYPES)])
-    zsdmax = 0
-    for s1 in range(nSubjects):
-        if s1 in emptyRows:
-            continue
-        (fid1,iid1,did1,mid1,sex1,aff1,ok1,d_sid1,m_sid1) = scache[s1]
-        for s2 in range(s1+1, nSubjects):
-            if s2 in emptyRows:
-                continue
-            if pairnum not in skip:
-                ### Get group stats for this relationship
-                (fid2,iid2,did2,mid2,sex2,aff2,ok2,d_sid2,m_sid2) = scache[s2]
-                try:
-                    r = rels[offset]
-                except IndexError:
-                    logf.write('###OOPS offset %d available %d  pairnum %d  len(rels) %d', offset, available, pairnum, len(rels))
-                notfound = ('?',('?','0','0'))
-                relInfo = REL_LOOKUP.get(r,notfound)
-                relName, relColor, relStyle = relInfo
-                rmm,rmd = relstats[r]['mean'] # group mean, group meansd alleles shared
-                rdm,rdd = relstats[r]['sd'] # group sdmean, group sdsd alleles shared
-
-                try:
-                    zsd = (sdevs[offset] - rdm)/rdd # distance from group mean in group sd units
-                except:
-                    zsd = 1
-                if abs(zsd) > zsdmax:
-                    zsdmax = zsd # keep for sort scaling
-                try:
-                    zmean = (means[offset] - rmm)/rmd # distance from group mean
-                except:
-                    zmean = 1
-                zmeans[offset] = zmean
-                zstds[offset] = zsd
-                pid=(s1,s2)
-                zrad = max(zsd,zmean)
-                if zrad < 4:
-                    zrad = 2
-                elif 4 < zrad < 15:
-                    zrad = 3 # to 9
-                else: # > 15 6=24+
-                    zrad=zrad/4
-                    zrad = min(zrad,6) # scale limit
-                zrad = max(2,max(zsd,zmean)) # as > 2, z grows
-                pair_data[pid] = (zmean,zsd,r,zrad)
-                if max(zsd,zmean) > Zcutoff: # is potentially interesting
-                    mean = means[offset]
-                    sdev = sdevs[offset]
-                    outlierRecords[r].append((mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd,did1,mid1,did2,mid2))
-                    nOutliers += 1
-                tbl.write('%s_%s\t%s_%s\t%f\t%f\t%f\t%f\t%d\t%s\t%s\t%s\t%s\t%s\n' % \
-                          (fid1, iid1, fid2, iid2, mean, sdev, zmean,zsd, ngeno, relName, did1,mid1,did2,mid2))
-                offset += 1
-            pairnum += 1
-    logf.write( 'Outliers: %s\n' % (nOutliers))
-
-    ### Write outlier files for each relationship type
-    repOut.append('<h2>Outliers in tab delimited files linked above are also listed below</h2>')
-    lzsd = round(numpy.log10(zsdmax)) + 1
-    scalefactor = 10**lzsd
-    for relCode, relInfo in REL_LOOKUP.items():
-        relName, _, _ = relInfo
-        outliers = outlierRecords[relCode]
-        if not outliers:
-            continue
-        outliers = [(scalefactor*int(abs(x[3]))+ int(abs(x[2])),x) for x in outliers] # decorate
-        outliers.sort()
-        outliers.reverse() # largest deviation first
-        outliers = [x[1] for x in outliers] # undecorate
-        nrows = len(outliers)
-        truncated = 0
-        if nrows > MAX_SHOW_ROWS:
-            s = '<h3>%s outlying pairs (top %d of %d) from %s</h3><table border="0" cellpadding="3">' % \
-               (relName,MAX_SHOW_ROWS,nrows,title)
-            truncated = nrows - MAX_SHOW_ROWS
-        else:
-            s = '<h3>%s outlying pairs (n=%d) from %s</h3><table border="0" cellpadding="3">' % (relName,nrows,title)
-        repOut.append(s)
-        fhname = '%s_rgGRR_%s_outliers.xls' % (title, relName)
-        fhpath = os.path.join(outdir,fhname)
-        fh = open(fhpath, 'w')
-        newfiles.append(fhname)
-        explanations.append('%s Outlier Pairs %s, N=%d, Cutoff SD=%f' % (relName,title,len(outliers),Zcutoff))
-        fh.write(OUTLIERS_HEADER)
-        s = ''.join(['<th>%s</th>' % x for x in OUTLIERS_HEADER_list])
-        repOut.append('<tr align="center">%s</tr>' % s)
-        for n,rec in enumerate(outliers):
-            #(mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd) = rec
-            s = '%f\t%f\t%f\t%f\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\t%s\t%s\t%s\t%s\t' % tuple(rec)
-            fh.write('%s%s\n' % (s,relName))
-            # (mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd, did1,mid1,did2,mid2))
-            s = '''<td>%f</td><td>%f</td><td>%f</td><td>%f</td><td>%s</td><td>%s</td>
-            <td>%s</td><td>%s</td><td>%f</td><td>%f</td><td>%f</td><td>%f</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td>''' % tuple(rec)
-            s = '%s<td>%s</td>' % (s,relName)
-            if n < MAX_SHOW_ROWS:
-                repOut.append('<tr align="center">%s</tr>' % s)
-        if truncated > 0:
-            repOut.append('<H2>WARNING: %d rows truncated - see outlier file for all %d rows</H2>' % (truncated,
-                                                                                            nrows))
-        fh.close()
-        repOut.append('</table><p>')
-
-    ### Now, draw the plot in jpeg and svg formats, and optionally in the PDF format
-    ### if requested
-    logf.write('Plotting ...')
-    pointColors = [REL_COLORS[rel] for rel in rels]
-    pointStyles = [REL_POINTS[rel] for rel in rels]
-
-    mainTitle = '%s (%s subjects, %d snp)' % (title, nSubjects, nrsSamples)
-    svg.write(SVG_HEADER % (SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[4],
-        SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[0],SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[1],
-        SVG_COLORS[2],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[3],SVG_COLORS[4],SVG_COLORS[4],
-        SVG_COLORS[5],SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[6],mainTitle))
-    #rpy.r.jpeg(filename='%s.jpg' % (title), width=1600, height=1200, pointsize=12, quality=100, bg='white')
-    #rpy.r.par(mai=(1,1,1,0.5))
-    #rpy.r('par(xaxs="i",yaxs="i")')
-    #rpy.r.plot(means, sdevs, main=mainTitle, ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2))
-    #rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE)
-    #rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted')
-    #rpy.r.dev_off()
-
-    ### We will now go through each relationship type to partition plot points
-    ### into "bulk" and "outlier" groups.  Bulk points will represent common
-    ### mean/sdev pairs and will cover the majority of the points in the plot --
-    ### they will use generic tooltip informtion about all of the pairs
-    ### represented by that point.  "Outlier" points will be uncommon pairs,
-    ### with very specific information in their tooltips.  It would be nice to
-    ### keep hte total number of plotted points in the SVG representation to
-    ### ~10000 (certainly less than 100000?)
-    pointMap = {}
-    orderedRels = [y[1] for y in reversed(sorted([(relCounts.get(x, 0),x) for x in REL_LOOKUP.keys()]))]
-    # do we really want this? I want out of zone points last and big
-    for relCode in orderedRels:
-        svgColor = SVG_COLORS[relCode]
-        relName, relColor, relStyle = REL_LOOKUP[relCode]
-        svg.write('<g id="%s" style="stroke:%s; fill:%s; fill-opacity:1.0; stroke-width:1;" cursor="pointer">\n' % (relName, svgColor, svgColor))
-        pMap = pointMap.setdefault(relCode, {})
-        nPoints = 0
-        rpairs=[]
-        rgenos=[]
-        rmeans=[]
-        rsdevs=[]
-        rz = []
-        for x,rel in enumerate(rels): # all pairs
-            if rel == relCode:
-                s1,s2 = pairs[x]
-                pid=(s1,s2)
-                zmean,zsd,r,zrad = pair_data[pid][:4]
-                rpairs.append(pairs[x])
-                rgenos.append(ngenoL[x])
-                rmeans.append(means[x])
-                rsdevs.append(sdevs[x])
-                rz.append(zrad)
-        ### Now add the svg point group for this relationship to the svg file
-        for x in range(len(rmeans)):
-            svgX = '%d' % ((rmeans[x] - 1.0) * PLOT_WIDTH) # changed so mean scale is 1-2
-            svgY = '%d' % (PLOT_HEIGHT - (rsdevs[x] * PLOT_HEIGHT)) # changed so sd scale is 0-1
-            s1, s2 = rpairs[x]
-            (fid1,uid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache[s1]
-            (fid2,uid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache[s2]
-            ngenos = rgenos[x]
-            nPoints += 1
-            point = pMap.setdefault((svgX, svgY), [])
-            point.append((rmeans[x], rsdevs[x], fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos,rz[x]))
-        for (svgX, svgY) in pMap:
-            points = pMap[(svgX, svgY)]
-            svgX = int(svgX)
-            svgY = int(svgY)
-            if len(points) > 1:
-                mmean,dmean = calcMeanSD([p[0] for p in points])
-                msdev,dsdev = calcMeanSD([p[1] for p in points])
-                mgeno,dgeno = calcMeanSD([p[-1] for p in points])
-                mingeno = min([p[-1] for p in points])
-                maxgeno = max([p[-1] for p in points])
-                svg.write("""<circle cx="%d" cy="%d" r="2"
-                onmouseover="showBTT(evt, %d, %1.2f, %1.2f, %1.2f, %1.2f, %d, %d, %d, %d, %d)"
-                onmouseout="hideBTT(evt)" />\n""" % (svgX, svgY, relCode, mmean, dmean, msdev, dsdev, len(points), mgeno, dgeno, mingeno, maxgeno))
-            else:
-                mean, sdev, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos, zrad = points[0][:12]
-                rmean = float(relstats[relCode]['mean'][0])
-                rsdev = float(relstats[relCode]['sd'][0])
-                if zrad < 4:
-                    zrad = 2
-                elif 4 < zrad < 9:
-                    zrad = 3 # to 9
-                else: # > 9 5=15+
-                    zrad=zrad/3
-                    zrad = min(zrad,5) # scale limit
-                if zrad <= 3:
-                    svg.write('<circle cx="%d" cy="%d" r="%s" onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)" onmouseout="hideOTT(evt)" />\n' % (svgX, svgY, zrad, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev))
-                else: # highlight pairs a long way from expectation by outlining circle in red
-                    svg.write("""<circle cx="%d" cy="%d" r="%s" style="stroke:red; fill:%s; fill-opacity:1.0; stroke-width:1;"
-                    onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)"
-                    onmouseout="hideOTT(evt)" />\n""" % \
-                    (svgX, svgY, zrad, svgColor, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev))
-        svg.write('</g>\n')
-
-    ### Create a pdf as well if indicated on the command line
-    ### WARNING! for framingham share, with about 50M pairs, this is a 5.5GB pdf!
-##    if pdftoo:
-##        pdfname = '%s.pdf' % (title)
-##        rpy.r.pdf(pdfname, 6, 6)
-##        rpy.r.par(mai=(1,1,1,0.5))
-##        rpy.r('par(xaxs="i",yaxs="i")')
-##        rpy.r.plot(means, sdevs, main='%s, %d snp' % (title, nSamples), ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2))
-##        rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE)
-##        rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted')
-##        rpy.r.dev_off()
-
-    ### Draw polygons
-    if showPolygons:
-        svg.write('<g id="polygons" cursor="pointer">\n')
-        for rel, poly in POLYGONS.items():
-            points = ' '.join(['%s,%s' % ((p[0]-1.0)*float(PLOT_WIDTH), (PLOT_HEIGHT - p[1]*PLOT_HEIGHT)) for p in poly])
-            svg.write('<polygon points="%s" fill="transparent" style="stroke:%s; stroke-width:1"/>\n' % (points, SVG_COLORS[rel]))
-        svg.write('</g>\n')
-
-
-    svg.write(SVG_FOOTER)
-    svg.close()
-    return newfiles,explanations,repOut
-
-def doIBS(n=100):
-    """parse parameters from galaxy
-    expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n'
-    <command interpreter="python">
-         rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
-        '$out_file1' '$out_file1.files_path' "$title1"  '$n' '$Z'
-    </command>
-
-    """
-    u="""<command interpreter="python">
-         rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
-        '$out_file1' '$out_file1.files_path' "$title1"  '$n' '$Z'
-         </command>
-      """
-
-
-    if len(sys.argv) < 7:
-        print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please'
-        print >> sys.stdout, u
-        sys.exit(1)
-    ts = '%s%s' % (string.punctuation,string.whitespace)
-    ptran =  string.maketrans(ts,'_'*len(ts))
-    inpath = sys.argv[1]
-    ldinpath = os.path.split(inpath)[0]
-    basename = sys.argv[2]
-    outhtml = sys.argv[3]
-    newfilepath = sys.argv[4]
-    title = sys.argv[5].translate(ptran)
-    logfname = 'Log_%s.txt' % title
-    logpath = os.path.join(newfilepath,logfname) # log was a child - make part of html extra_files_path zoo
-    n = int(sys.argv[6])
-    try:
-        Zcutoff = float(sys.argv[7])
-    except:
-        Zcutoff = 2.0
-    try:
-        os.makedirs(newfilepath)
-    except:
-        pass
-    logf = file(logpath,'w')
-    efp,ibase_name = os.path.split(inpath) # need to use these for outputs in files_path
-    ped = plinkbinJZ.BPed(inpath)
-    ped.parse(quick=True)
-    if ped == None:
-        print >> sys.stderr, '## doIBSpy problem - cannot open %s or %s - cannot run' % (ldreduced,basename)
-        sys.exit(1)
-    newfiles,explanations,repOut = doIBSpy(ped=ped,basename=basename,outdir=newfilepath,
-                                    logf=logf,nrsSamples=n,title=title,pdftoo=0,Zcutoff=Zcutoff)
-    logf.close()
-    logfs = file(logpath,'r').readlines()
-    lf = file(outhtml,'w')
-    lf.write(galhtmlprefix % PROGNAME)
-    # this is a mess. todo clean up - should each datatype have it's own directory? Yes
-    # probably. Then titles are universal - but userId libraries are separate.
-    s = '<div>Output from %s run at %s<br>\n' % (PROGNAME,timenow())
-    lf.write('<h4>%s</h4>\n' % s)
-    fixed = ["'%s'" % x for x in sys.argv] # add quotes just in case
-    s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % (' '.join(fixed))
-    lf.write(s)
-    # various ways of displaying svg - experiments related to missing svg mimetype on test (!)
-    #s = """<object data="%s" type="image/svg+xml"  width="%d" height="%d">
-    #       <embed src="%s" type="image/svg+xml" width="%d" height="%d" />
-    #       </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
-    s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
-    #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
-    lf.write(s)
-    lf.write('<div><h4>Click the links below to save output files and plots</h4><br><ol>\n')
-    for i in range(len(newfiles)):
-       if i == 0:
-            lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' % (newfiles[i],explanations[i]))
-       else:
-             lf.write('<li><a href="%s">%s</a></li>\n' % (newfiles[i],explanations[i]))
-    flist = os.listdir(newfilepath)
-    for fname in flist:
-        if not fname in newfiles:
-             lf.write('<li><a href="%s">%s</a></li>\n' % (fname,fname))
-    lf.write('</ol></div>')
-    lf.write('<div>%s</div>' % ('\n'.join(repOut))) # repOut is a list of tables
-    lf.write('<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>' % (logfname,''.join(logfs)))
-    lf.write('</body></html>\n')
-    lf.close()
-    logf.close()
-
-if __name__ == '__main__':
-    doIBS()
--- a/tools/rgenetics/rgGRR.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-<tool id="rgGRR1" name="GRR:">
-    <description>Pairwise Allele Sharing</description>
-    <command interpreter="python">
-         rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
-        '$out_file1' '$out_file1.files_path' "$title"  '$n' '$Z'
-    </command>
-    <inputs>
-      <param name="i"  type="data" label="Genotype data file from your current history"
-      format="ldindep" />
-       <param name='title' type='text' size="80" value='rgGRR' label="Title for this job"/>
-       <param name="n" type="integer" label="N snps to use (0=all)" value="5000" />
-       <param name="Z" type="float" label="Z score cutoff for outliers (eg 2)" value="6"
-       help="2 works but for very large numbers of pairs, you might want to see less than 5%" />
-    </inputs>
-    <outputs>
-       <data format="html" name="out_file1" label="${title}_rgGRR.html"/>
-    </outputs>
-
-<tests>
- <test>
-    <param name='i' value='tinywga' ftype='ldindep' >
-    <metadata name='base_name' value='tinywga' />
-    <composite_data value='tinywga.bim' />
-    <composite_data value='tinywga.bed' />
-    <composite_data value='tinywga.fam' />
-    <edit_attributes type='name' value='tinywga' />
-    </param>
-  <param name='title' value='rgGRRtest1' />
-  <param name='n' value='100' />
-  <param name='Z' value='6' />
-  <param name='force' value='true' />
-  <output name='out_file1' file='rgtestouts/rgGRR/rgGRRtest1.html' ftype='html' compare="diff" lines_diff='350'>
-    <extra_files type="file" name='Log_rgGRRtest1.txt' value="rgtestouts/rgGRR/Log_rgGRRtest1.txt" compare="diff" lines_diff="170"/>
-    <extra_files type="file" name='rgGRRtest1.svg' value="rgtestouts/rgGRR/rgGRRtest1.svg" compare="diff" lines_diff="1000" />
-    <extra_files type="file" name='rgGRRtest1_table.xls' value="rgtestouts/rgGRR/rgGRRtest1_table.xls" compare="diff" lines_diff="100" />
-  </output>
- </test>
-</tests>
-
-
-<help>
-
-.. class:: infomark
-
-**Explanation**
-
-This tool will calculate allele sharing among all subjects, one pair at a time. It outputs measures of average alleles
-shared and measures of variability for each pair of subjects and creates an interactive image where each pair is
-plotted in this mean/variance space. It is based on the GRR windows application available at
-http://www.sph.umich.edu/csg/abecasis/GRR/
-
-The plot is interactive - you can unselect one of the relationships in the legend to remove all those points
-from the plot for example. Details of outlier pairs will pop up when the pointer is over them. e found by moving your pointer
-over them. This relies on a working browser SVG plugin - try getting one installed for your browser if the interactivity is
-broken.
-
------
-
-**Syntax**
-
-- **Genotype file** is the input pedigree data chosen from available library Plink binary files
-- **Title** will be used to name the outputs so make it mnemonic and useful
-- **N** is left 0 to use all snps - otherwise you get a random sample - much quicker with little loss of precision > 5000 SNPS
-
-**Summary**
-
-Warning - this tool works pairwise so slows down exponentially with sample size. An LD-reduced dataset is
-strongly recommended as it will give good resolution with relatively few SNPs. Do not use all million snps from a whole
-genome chip - it's overkill - 5k is good, 10k is almost indistinguishable from 100k.
-
-SNP are sampled randomly from the autosomes - otherwise parent/child pairs will be separated by gender.
-This tool will estimate mean pairwise allele shareing among all subjects. Based on the work of Abecasis, it has
-been rewritten so it can run with much larger data sets, produces cross platform svg and runs
-on a Galaxy server, instead of being MS windows only. Written in is Python, it uses numpy, and the innermost loop
-is inline C so it can calculate about 50M SNPpairs/sec on a typical opteron server.
-
-Setting N to some (fraction) of available markers will speed up calculation - the difference is most painful for
-large subject N. The real cost is that every subject must be compared to every other one over all genotypes -
-this is an exponential problem on subjects.
-
-If you don't see the genotype data set you want here, it can be imported using one of the methods available from
-the Rgenetics Get Data tool.
-
------
-
-**Attribution**
-
-Based on an idea from G. Abecasis implemented as GRR (windows only) at http://www.sph.umich.edu/csg/abecasis/GRR/
-
-Ross Lazarus wrote the original pdf writer Galaxy tool version.
-John Ziniti added the C and created the slick svg representation.
-Copyright Ross Lazarus 2007
-Licensed under the terms of the LGPL as documented http://www.gnu.org/licenses/lgpl.html
-</help>
-</tool>
--- a/tools/rgenetics/rgGTOOL.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-#!/usr/local/bin/python
-# hack to run and process a linkage format file into
-# the format used by Marchini's SNPTEST imputed case control association
-# expects args as
-#         rgGTOOL.py $i $o $discrete $logf $outdir
-# ross lazarus
-
-import sys,math,shutil,subprocess,os,time
-from os.path import abspath
-imagedir = '/static/rg' # if needed for images
-myversion = 'V000.1 August 2007'
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 6:
-       s = 'rgGTOOL.py needs 5 params - got %s \n' % (sys.argv)
-       sys.stderr.write(s) # print >>,s would probably also work?
-       sys.exit(0)
-    print 'Rgenetics %s http://rgenetics.org SNPTEST Tools, rgGTOOL.py starting at %s' % (myversion,timenow())
-    pname = sys.argv[1]
-    lpedname = pname.split('.ped')[0] # get file name part
-    outname = sys.argv[2]
-    discrete = sys.argv[3]
-    logf = sys.argv[4]
-    outdir = sys.argv[5]
-    cdir = os.getcwd()
-    me = sys.argv[0]
-    mypath = abspath(os.path.join(cdir,me)) # get abs path to this python script
-    shpath = abspath(os.path.sep.join(mypath.split(os.path.sep)[:-1]))
-    alogf = abspath(os.path.join(cdir,logf)) # absolute paths
-    apedf = abspath(os.path.join(cdir,'%s.ped' % lpedname)) # absolute paths
-    amapf = abspath(os.path.join(cdir,'%s.map' % lpedname)) # absolute paths
-    outg = abspath(os.path.join(outdir,'%s.gen' % outname)) # absolute paths
-    outs = abspath(os.path.join(outdir,'%s.sample' % outname)) # absolute paths
-    workdir = abspath(os.path.sep.join(mypath.split(os.path.sep)[:-1])) # trim end off './database/files/foo.dat'
-    os.chdir(workdir)
-    tlogname = '%s.logtemp' % outname
-    sto = file(tlogname,'w')
-    sto.write('rgGTOOL.py: called with %s\n' % (sys.argv))
-    exme = 'gtool'
-    vcl = [exme,'-P','--ped',apedf,'--map',amapf,'--discrete_phenotype',discrete,'--og',outg,'--os',outs]
-    #'/usr/local/bin/plink','/usr/local/bin/plink',pc1,pc2,pc3)
-    #os.spawnv(os.P_WAIT,plink,vcl)
-    p=subprocess.Popen(' '.join(vcl),shell=True,stdout=sto)
-    retval = p.wait()
-    sto.write('rgGTOOL.py after calling %s: vcl=%s\n' % (exme,vcl))
-    sto.close()
-    shutil.move(tlogname,alogf)
-    os.chdir(cdir)
-
-
-
--- a/tools/rgenetics/rgGTOOL.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-<tool id="rgGTOOL1" name="Converter">
-
-
-    <description>from linkage format to SNPTEST Marchini files</description>
-
-    <command interpreter="python">
-        rgGTOOL.py $i $o $discrete $logf $outdir
-    </command>
-
-    <inputs>
-       <param name="i"  type="select" label="Genotype file" dynamic_options="get_lib_pedfiles()" />
-       <param name="discrete" type="select" label="Make Case/Control based on affection 2/1">
-                        <option selected="yes" value="1">Discrete</option>
-                        <option value="0">Continuous</option>
-       </param>
-       <param name="o" type="text" label="Output Marchini format name" value="Marchini"/>
-       <param name="outdir" type="hidden" value="/usr/local/galaxy/data/rg/snptest" />
-   </inputs>
-
-   <outputs>
-       <data format="txt" name="logf"  />
-   </outputs>
-<help>
-
-
-**Syntax**
-
-- **Genotype file** is the input linkage format pedigree and corresponding map file
-- **Discrete** is the type of phenotype in the affection column
-- **Output name** is the file name (.gen and .sample will be added) for the new SNPTEST compatible file
-
-**Note on Discrete**
-See GTOOL_ documentation link below for more details. Briefly, if
-your linkage format pedigree file has 1/2 in column 6 for control/case respectively, setting this to Yes will create two
-complete sets of output files distinguished by 1 and 2 respectively. otherwise, affection status is assumed to contain a
-continuous phenotype and a single output set is produced
-
-
-**Summary**
-
-Code used here from Jonathon Marchini's group - see documentation at GTOOL_.
-
-.. _GTOOL: http://www.stats.ox.ac.uk/~marchini/software/gwas/gtool.html
-
------
-
-**Attribution**
-Originally designed and written for the Rgenetics
-series of Galaxy tools by ross lazarus (ross.lazarus@gmail.com), who didn't write GTOOL_
-but wishes he had.
-
-</help>
-</tool>
--- a/tools/rgenetics/rgHaploView.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,513 +0,0 @@
-"""
-released under the terms of the LGPL
-copyright ross lazarus August 2007
-for the rgenetics project
-
-Special galaxy tool for the camp2007 data
-Allows grabbing genotypes from an arbitrary region and estimating
-ld using haploview
-
-stoopid haploview won't allow control of dest directory for plots - always end
-up where the data came from - need to futz to get it where it belongs
-
-Needs a mongo results file in the location hardwired below or could be passed in as
-a library parameter - but this file must have a very specific structure
-rs chrom offset float1...floatn
-
-
-"""
-
-
-import sys, array, os, string, tempfile, shutil, subprocess, glob
-from rgutils import galhtmlprefix
-
-progname = os.path.split(sys.argv[0])[1]
-
-javabin = 'java'
-#hvbin = '/usr/local/bin/Haploview.jar'
-#hvbin = '/home/universe/linux-i686/haploview/Haploview.jar'
-# get this from tool as a parameter - can use
-
-
-
-atrandic = {'A':'1','C':'2','G':'3','T':'4','N':'0','-':'0','1':'1','2':'2','3':'3','4':'4','0':'0'}
-
-class NullDevice:
-    """ a dev/null for ignoring output
-    """
-    def write(self, s):
-        pass
-
-class ldPlot:
-
-    def __init__(self, argv=[]):
-        """
-        setup
-        """
-        self.args=argv
-        self.parseArgs(argv=self.args)
-        self.setupRegions()
-
-    def parseArgs(self,argv=[]):
-        """
-        """
-        ts = '%s%s' % (string.punctuation,string.whitespace)
-        ptran =  string.maketrans(ts,'_'*len(ts))
-        ### Figure out what genomic region we are interested in
-        self.region = argv[1]
-        self.orslist = argv[2].replace('X',' ').lower() # galaxy replaces newlines with XX - go figure
-        self.title = argv[3].translate(ptran)
-        # for outputs
-        self.outfile = argv[4]
-        self.logfn = 'Log_%s.txt' % (self.title)
-        self.histextra = argv[5]
-        self.base_name = argv[6]
-        self.pedFileBase = os.path.join(self.histextra,self.base_name)
-        print 'pedfilebase=%s' % self.pedFileBase
-        self.minMaf=argv[7]
-        if self.minMaf:
-            try:
-                self.minMaf = float(self.minMaf)
-            except:
-                self.minMaf = 0.0
-        self.maxDist=argv[8] or None
-        self.ldType=argv[9] or 'RSQ'
-        self.hiRes = (argv[10].lower() == 'hi')
-        self.memSize= argv[11] or '1000'
-        self.memSize = int(self.memSize)
-        self.outfpath = argv[12]
-        self.infotrack = False # note that otherwise this breaks haploview in headless mode
-        #infotrack = argv[13] == 'info'
-        # this fails in headless mode as at april 2010 with haploview 4.2
-        self.tagr2 = argv[14] or '0.8'
-        hmpanels = argv[15] # eg "['CEU','YRI']"
-        if hmpanels:
-           hmpanels = hmpanels.replace('[','')
-           hmpanels = hmpanels.replace(']','')
-           hmpanels = hmpanels.replace("'",'')
-           hmpanels = hmpanels.split(',')
-        self.hmpanels = hmpanels
-        self.hvbin = argv[16] # added rml june 2008
-        self.bindir = os.path.split(self.hvbin)[0]
-        # jan 2010 - always assume utes are on path to avoid platform problems
-        self.pdfjoin = 'pdfjoin' # os.path.join(bindir,'pdfjoin')
-        self.pdfnup = 'pdfnup' # os.path.join(bindir,'pdfnup')
-        self.mogrify = 'mogrify' # os.path.join(bindir,'mogrify')
-        self.convert = 'convert' # os.path.join(bindir,'convert')
-        self.log_file = os.path.join(self.outfpath,self.logfn)
-        self.MAP_FILE = '%s.map' % self.pedFileBase
-        self.DATA_FILE = '%s.ped' % self.pedFileBase
-        try:
-            os.makedirs(self.outfpath)
-            s = '## made new path %s\n' % self.outfpath
-        except:
-            pass
-        self.lf = file(self.log_file,'w')
-        s = 'PATH=%s\n' % os.environ.get('PATH','?')
-        self.lf.write(s)
-
-    def getRs(self):
-        if self.region > '':
-            useRs = []
-            useRsdict={}
-            try: # TODO make a regexp?
-                c,rest = self.region.split(':')
-                chromosome = c.replace('chr','')
-                rest = rest.replace(',','') # remove commas
-                spos,epos = rest.split('-')
-                spos = int(spos)
-                epos = int(epos)
-                s = '## %s parsing chrom %s from %d to %d\n' % (progname,chromosome,spos,epos)
-                self.lf.write(s)
-                self.lf.write('\n')
-                print >> sys.stdout, s
-            except:
-                s = '##! %s unable to parse region %s - MUST look like "chr8:10,000-100,000\n' % (progname,self.region)
-                print >> sys.stdout, s
-                self.lf.write(s)
-                self.lf.write('\n')
-                self.lf.close()
-                sys.exit(1)
-        else:
-            useRs = self.orslist.split() # galaxy replaces newlines with XX - go figure
-            useRsdict = dict(zip(useRs,useRs))
-        return useRs, useRsdict
-
-
-    def setupRegions(self):
-        """
-        This turns out to be complex because we allow the user
-        flexibility - paste a list of rs or give a region.
-        In most cases, some subset has to be generated correctly before running Haploview
-        """
-        chromosome = ''
-        spos = epos = -9
-        rslist = []
-        rsdict = {}
-        useRs,useRsdict = self.getRs()
-        self.useTemp = False
-        try:
-            dfile = open(self.DATA_FILE, 'r')
-        except: # bad input file name?
-            s = '##! RGeno unable to open file %s\n' % (self.DATA_FILE)
-            self.lf.write(s)
-            self.lf.write('\n')
-            self.lf.close()
-            print >> sys.stdout, s
-            raise
-            sys.exit(1)
-        try:
-            mfile = open(self.MAP_FILE, 'r')
-        except: # bad input file name?
-            s = '##! RGeno unable to open file %s' % (self.MAP_FILE)
-            lf.write(s)
-            lf.write('\n')
-            lf.close()
-            print >> sys.stdout, s
-            raise
-            sys.exit(1)
-        if len(useRs) > 0 or spos <> -9 : # subset region
-            self.useTemp = True
-            ### Figure out which markers are in this region
-            markers = []
-            snpcols = {}
-            chroms = {}
-            minpos = 2**32
-            maxpos = 0
-            for lnum,row in enumerate(mfile):
-                line = row.strip()
-                if not line: continue
-                chrom, snp, genpos, abspos = line.split()
-                try:
-                    ic = int(chrom)
-                except:
-                    ic = None
-                if ic and ic <= 23:
-                    try:
-                        abspos = int(abspos)
-                        if abspos > maxpos:
-                            maxpos = abspos
-                        if abspos < minpos:
-                            minpos = abspos
-                    except:
-                        abspos = epos + 999999999 # so next test fails
-                if useRsdict.get(snp,None) or (spos <> -9 and chrom == chromosome and (spos <= abspos <= epos)):
-                    if chromosome == '':
-                        chromosome = chrom
-                    chroms.setdefault(chrom,chrom)
-                    markers.append((chrom,abspos,snp)) # decorate for sort into genomic
-                    snpcols[snp] = lnum # so we know which col to find genos for this marker
-            markers.sort()
-            rslist = [x[2] for x in markers] # drop decoration
-            rsdict = dict(zip(rslist,rslist))
-            if len(rslist) == 0:
-                s = '##! %s: Found no rs numbers matching %s' % (progname,self.args[1:3])
-                self.lf.write(s)
-                self.lf.write('\n')
-                self.lf.close()
-                print >> sys.stdout, s
-                sys.exit(1)
-            if spos == -9:
-                spos = minpos
-                epos = maxpos
-            s = '## %s looking for %d rs (%s)' % (progname,len(rslist),rslist[:5])
-            self.lf.write(s)
-            print >> sys.stdout, s
-            wewant = [(6+(2*snpcols[x])) for x in rslist] #
-            # column indices of first geno of each marker pair to get the markers into genomic
-            ### ... and then parse the rest of the ped file to pull out
-            ### the genotypes for all subjects for those markers
-            # /usr/local/galaxy/data/rg/1/lped/
-            self.tempMapName = os.path.join(self.outfpath,'%s.info' % self.title)
-            self.tempMap = file(self.tempMapName,'w')
-            self.tempPedName = os.path.join(self.outfpath,'%s.ped' % self.title)
-            self.tempPed = file(self.tempPedName,'w')
-            self.pngpath = '%s.LD.PNG' % self.tempPedName
-            map = ['%s\t%s' % (x[2],x[1]) for x in markers] # snp,abspos in genomic order for haploview
-            self.tempMap.write('%s\n' % '\n'.join(map))
-            self.tempMap.close()
-            nrows = 0
-            for line in dfile:
-                line = line.strip()
-                if not line:
-                    continue
-                fields = line.split()
-                preamble = fields[:6]
-                g = ['%s %s' % (fields[snpcol], fields[snpcol+1]) for snpcol in wewant]
-                g = ' '.join(g)
-                g = g.split() # we'll get there
-                g = [atrandic.get(x,'0') for x in g] # numeric alleles...
-                self.tempPed.write('%s %s\n' % (' '.join(preamble), ' '.join(g)))
-                nrows += 1
-            self.tempPed.close()
-            s = '## %s: wrote %d markers, %d subjects for region %s\n' % (progname,len(rslist),nrows,self.region)
-            self.lf.write(s)
-            self.lf.write('\n')
-            print >> sys.stdout,s
-        else: # even if using all, must set up haploview info file instead of map
-            markers = []
-            chroms = {}
-            spos = sys.maxint
-            epos = -spos
-            for lnum,row in enumerate(mfile):
-              line = row.strip()
-              if not line: continue
-              chrom, snp, genpos, abspos = line.split()
-              try:
-                ic = int(chrom)
-              except:
-                ic = None
-              if ic and ic <= 23:
-                if chromosome == '':
-                    chromosome = chrom
-                chroms.setdefault(chrom,chrom)
-                try:
-                    p = int(abspos)
-                    if p < spos and p <> 0:
-                        spos = p
-                    if p > epos and p <> 0:
-                        epos = p
-                except:
-                    pass
-                markers.append('%s %s' % (snp,abspos)) # no sort - pass
-            # now have spos and epos for hapmap if hmpanels
-            self.tempMapName = os.path.join(self.outfpath,'%s.info' % self.title)
-            self.tempMap = file(self.tempMapName,'w')
-            self.tempMap.write('\n'.join(markers))
-            self.tempMap.close()
-            self.tempPedName = os.path.join(self.outfpath,'%s.ped' % self.title)
-            try: # will fail on winblows!
-                os.symlink(self.DATA_FILE,self.tempPedName)
-            except:
-                shutil.copy(self.DATA_FILE,self.tempPedName) # wasteful but..
-        self.nchroms = len(chroms) # if > 1 can't really do this safely
-        dfile.close()
-        mfile.close()
-        self.spos = spos
-        self.epos = epos
-        self.chromosome = chromosome
-        if self.nchroms > 1:
-            s = '## warning - multiple chromosomes found in your map file - %s\n' % ','.join(chroms.keys())
-            self.lf.write(s)
-            print >> sys.stdout,s
-            sys.exit(1)
-
-    def run(self,vcl):
-        """
-        """
-        p=subprocess.Popen(vcl,shell=True,cwd=self.outfpath,stderr=self.lf,stdout=self.lf)
-        retval = p.wait()
-        self.lf.write('## executing %s returned %d\n' % (vcl,retval))
-
-    def plotHmPanels(self,ste):
-        """
-        """
-        sp = '%d' % (self.spos/1000.) # hapmap wants kb
-        ep = '%d' % (self.epos/1000.)
-        fnum=0
-        for panel in self.hmpanels:
-            if panel > '' and panel.lower() <> 'none': # in case someone checks that option too :)
-                ptran = panel.strip()
-                ptran = ptran.replace('+','_')
-                fnum += 1 # preserve an order or else we get sorted
-                vcl = [javabin,'-jar',self.hvbin,'-n','-memory','%d' % self.memSize,
-                  '-chromosome',self.chromosome, '-panel',panel.strip(),
-                  '-hapmapDownload','-startpos',sp,'-endpos',ep,
-                  '-ldcolorscheme',self.ldType]
-                if self.minMaf:
-                    vcl += ['-minMaf','%f' % self.minMaf]
-                if self.maxDist:
-                    vcl += ['-maxDistance',self.maxDist]
-                if self.hiRes:
-                    vcl.append('-png')
-                else:
-                    vcl.append('-compressedpng')
-                if self.infotrack:
-                    vcl.append('-infoTrack')
-                p=subprocess.Popen(' '.join(vcl),shell=True,cwd=self.outfpath,stderr=ste,stdout=self.lf)
-                retval = p.wait()
-                inpng = 'Chromosome%s%s.LD.PNG' % (self.chromosome,panel)
-                inpng = inpng.replace(' ','') # mysterious spaces!
-                outpng = '%d_HapMap_%s_%s.png' % (fnum,ptran,self.chromosome)
-                # hack for stupid chb+jpt
-                outpng = outpng.replace(' ','')
-                tmppng = '%s.tmp.png' % self.title
-                tmppng = tmppng.replace(' ','')
-                outpng = os.path.split(outpng)[-1]
-                vcl = [self.convert, '-resize 800x400!', inpng, tmppng]
-                self.run(' '.join(vcl))
-                s = "text 10,300 'HapMap %s'" % ptran.strip()
-                vcl = [self.convert, '-pointsize 25','-fill maroon',
-                      '-draw "%s"' % s, tmppng, outpng]
-                self.run(' '.join(vcl))
-                try:
-                    os.remove(os.path.join(self.outfpath,tmppng))
-                except:
-                    pass
-
-    def doPlots(self):
-        """
-        """
-        DATA_FILE = self.tempPedName # for haploview
-        INFO_FILE = self.tempMapName
-        fblog,blog = tempfile.mkstemp()
-        ste = open(blog,'w') # to catch the blather
-        # if no need to rewrite - set up names for haploview call
-        vcl = [javabin,'-jar',self.hvbin,'-n','-memory','%d' % self.memSize,'-pairwiseTagging',
-               '-pedfile',DATA_FILE,'-info',INFO_FILE,'-tagrsqcounts',
-               '-tagrsqcutoff',self.tagr2, '-ldcolorscheme',self.ldType]
-        if self.minMaf:
-            vcl += ['-minMaf','%f' % self.minMaf]
-        if self.maxDist:
-            vcl += ['-maxDistance',self.maxDist]
-        if self.hiRes:
-            vcl.append('-png')
-        else:
-            vcl.append('-compressedpng')
-        if self.nchroms == 1:
-            vcl += ['-chromosome',self.chromosome]
-        if self.infotrack:
-            vcl.append('-infoTrack')
-        self.run(' '.join(vcl))
-        vcl = [self.mogrify, '-resize 800x400!', '*.PNG']
-        self.run(' '.join(vcl))
-        inpng = '%s.LD.PNG' % DATA_FILE # stupid but necessary - can't control haploview name mangle
-        inpng = inpng.replace(' ','')
-        inpng = os.path.split(inpng)[-1]
-        tmppng = '%s.tmp.png' % self.title
-        tmppng = tmppng.replace(' ','')
-        outpng = '1_%s.png' % self.title
-        outpng = outpng.replace(' ','')
-        outpng = os.path.split(outpng)[-1]
-        vcl = [self.convert, '-resize 800x400!', inpng, tmppng]
-        self.run(' '.join(vcl))
-        s = "text 10,300 '%s'" % self.title[:40]
-        vcl = [self.convert, '-pointsize 25','-fill maroon',
-              '-draw "%s"' % s, tmppng, outpng]
-        self.run(' '.join(vcl))
-        try:
-            os.remove(os.path.join(self.outfpath,tmppng))
-        except:
-            pass    # label all the plots then delete all the .PNG files before munging
-        fnum=1
-        if self.hmpanels:
-            self.plotHmPanels(ste)
-        nimages = len(glob.glob(os.path.join(self.outfpath,'*.png'))) # rely on HaploView shouting - PNG @!
-        self.lf.write('### nimages=%d\n' % nimages)
-        if nimages > 0: # haploview may fail?
-            vcl = '%s -format pdf -resize 800x400! *.png' % self.mogrify
-            self.run(vcl)
-            vcl = '%s *.pdf --fitpaper true --outfile alljoin.pdf' % self.pdfjoin
-            self.run(vcl)
-            vcl = '%s alljoin.pdf --nup 1x%d --outfile allnup.pdf' % (self.pdfnup,nimages)
-            self.run(vcl)
-            vcl = '%s -resize x300 allnup.pdf allnup.png' % (self.convert)
-            self.run(vcl)
-        ste.close() # temp file used to catch haploview blather
-        hblather = open(blog,'r').readlines() # to catch the blather
-        os.unlink(blog)
-        if len(hblather) > 0:
-           self.lf.write('## In addition, Haploview complained:')
-           self.lf.write(''.join(hblather))
-           self.lf.write('\n')
-        self.lf.close()
-
-    def writeHtml(self):
-        """
-        """
-        flist = glob.glob(os.path.join(self.outfpath, '*'))
-        flist.sort()
-        ts = '!"#$%&\'()*+,-/:;<=>?@[\\]^_`{|}~' + string.whitespace
-        ftran =  string.maketrans(ts,'_'*len(ts))
-        outf = file(self.outfile,'w')
-        outf.write(galhtmlprefix % progname)
-        s = '<h4>rgenetics for Galaxy %s, wrapping HaploView</h4>' % (progname)
-        outf.write(s)
-        mainthumb = 'allnup.png'
-        mainpdf = 'allnup.pdf'
-        if os.path.exists(os.path.join(self.outfpath,mainpdf)):
-            if not os.path.exists(os.path.join(self.outfpath,mainthumb)):
-                outf.write('<table><tr><td colspan="3"><a href="%s">Main combined LD plot</a></td></tr></table>\n' % (mainpdf))
-            else:
-                outf.write('<table><tr><td><a href="%s"><img src="%s" title="Main combined LD image" hspace="10" align="middle">' % (mainpdf,mainthumb))
-                outf.write('</td><td>Click the thumbnail at left to download the main combined LD image <a href=%s>%s</a></td></tr></table>\n' % (mainpdf,mainpdf))
-        else:
-            outf.write('(No main image was generated - this usually means a Haploview error connecting to Hapmap site - please try later)<br/>\n')
-        outf.write('<br><div><hr><ul>\n')
-        for i, data in enumerate( flist ):
-            dn = os.path.split(data)[-1]
-            if dn[:3] <> 'all':
-                continue
-            newdn = dn.translate(ftran)
-            if dn <> newdn:
-                os.rename(os.path.join(self.outfpath,dn),os.path.join(self.outfpath,newdn))
-                dn = newdn
-            dnlabel = dn
-            ext = dn.split('.')[-1]
-            if dn == 'allnup.pdf':
-                dnlabel = 'All pdf plots on a single page'
-            elif dn == 'alljoin.pdf':
-                dnlabel = 'All pdf plots, each on a separate page'
-            outf.write('<li><a href="%s">%s - %s</a></li>\n' % (dn,dn,dnlabel))
-        for i, data in enumerate( flist ):
-            dn = os.path.split(data)[-1]
-            if dn[:3] == 'all':
-                continue
-            newdn = dn.translate(ftran)
-            if dn <> newdn:
-                os.rename(os.path.join(self.outfpath,dn),os.path.join(self.outfpath,newdn))
-                dn = newdn
-            dnlabel = dn
-            ext = dn.split('.')[-1]
-            if dn == 'allnup.pdf':
-                dnlabel = 'All pdf plots on a single page'
-            elif dn == 'alljoin.pdf':
-                dnlabel = 'All pdf plots, each on a separate page'
-            elif ext == 'info':
-                dnlabel = '%s map data for Haploview input' % self.title
-            elif ext == 'ped':
-                dnlabel = '%s genotype data for Haploview input' % self.title
-            elif dn.find('CEU') <> -1 or dn.find('YRI') <> -1 or dn.find('CHB_JPT') <> -1: # is hapmap
-                dnlabel = 'Hapmap data'
-            if ext == 'TAGS' or ext == 'TESTS' or ext == 'CHAPS':
-                dnlabel = dnlabel + ' Tagger output'
-            outf.write('<li><a href="%s">%s - %s</a></li>\n' % (dn,dn,dnlabel))
-        outf.write('</ol><br>')
-        outf.write("</div><div><hr>Job Log follows below (see %s)<pre>" % self.logfn)
-        s = file(self.log_file,'r').readlines()
-        s = '\n'.join(s)
-        outf.write('%s</pre><hr></div>' % s)
-        outf.write('</body></html>')
-        outf.close()
-        if self.useTemp:
-            try:
-                os.unlink(self.tempMapName)
-                os.unlink(self.tempPedName)
-            except:
-                pass
-
-if __name__ == "__main__":
-    """  ### Sanity check the arguments
-
-    <command interpreter="python">
-    rgHaploView.py "$ucsc_region" "$rslist" "$title" "$out_file1"
-    "$lhistIn.extra_files_path" "$lhistIn.metadata.base_name"
-    "$minmaf" "$maxdist" "$ldtype" "$hires" "$memsize" "$out_file1.files_path"
-    "$infoTrack" "$tagr2" "$hmpanel" ${GALAXY_DATA_INDEX_DIR}/rg/bin/haploview.jar
-    </command>
-
-    remember to figure out chromosome and complain if > 1?
-    and use the -chromosome <1-22,X,Y> parameter to haploview
-    skipcheck?
-    """
-    progname = os.path.split(sys.argv[0])[-1]
-    if len(sys.argv) < 16:
-        s = '##!%s: Expected 16 params in sys.argv, got %d (%s)' % (progname,len(sys.argv), sys.argv)
-        print s
-        sys.exit(1)
-    ld = ldPlot(argv = sys.argv)
-    ld.doPlots()
-    ld.writeHtml()
-
-
-
--- a/tools/rgenetics/rgHaploView.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,152 +0,0 @@
-<tool id="rgHaploView1" name="LD plots:" version="0.3">
-
-    <description>and comparisons with HapMap data</description>
-
-    <command interpreter="python">
-    rgHaploView.py "$ucsc_region" "$rslist" "$title" "$out_file1"
-    "$lhistIn.extra_files_path" "$lhistIn.metadata.base_name"
-    "$minmaf" "$maxdist" "$ldtype" "$hires" "$memsize" "$out_file1.files_path"
-    "$infoTrack" "$tagr2" "$hmpanel" ${GALAXY_DATA_INDEX_DIR}/shared/jars/haploview.jar
-    </command>
-
-    <inputs>
-
-       <param name="lhistIn" type="data" format="lped"
-        label="Current history lPed format data"
-        size="80" help="Linkage Ped format data from your current history" />
-
-       <param name="ucsc_region" type="text" label="Optional subset region (blank=ALL. WARNING: doing this will fail if >1 chromosome in input data!)"
-        size="80" optional="true"
-        help="Region eg: chr9:119,506,000-119,518,000 Leave blank for all or to extract the rs list supplied below."/>
-
-       <param name="rslist" type="text" area='true' size='5x20' label="rs list" optional="true"
-       help="List of rs numbers to select - cut and paste or type, use space delimiters. Leave blank to extract region supplied above."  />
-
-       <param name="title" type="text" size="80" label="Title for output files" optional="true"
-        help="Descriptive title for new genotype/map files" value="LD Plots" />
-
-    <param name="ldtype" type="select" label="Type of LD measure to estimate and plot"
-        size="80" help="" >
-        <option value="RSQ" selected="True">rsquared (default)</option>
-        <option value="DEFAULT">D prime</option>
-        <option value="DPALT">D prime alternative</option>
-        <option value="GAB">Gabriel</option>
-        <option value="GAM">4 Gamete test</option>
-    </param>
-
-   <param name="minmaf" type="float" label = "Minimum minor allele frequency to use" value="0.05"
-    help="If &gt; 0.0, markers below this MAF will be ignored for calculations"/>
-
-    <param name="maxdist" type="integer" label = "Maximum distance (kbp) between markers for LD estimate"
-    value="200" help="If &lt; &gt; 0, only marker pairs at or below this distance will have LD calculated"/>
-
-    <param name="hmpanel" type="select" multiple="true" label="Hapmap panels to compare"
-        size="40" help="HapMap data LD plots will also be produced for each selected population panel" >
-        <option value='CEU' selected="True">CEPH (European) (default)</option>
-        <option value='YRI'>Yoruba (African)</option>
-        <option value='CHB+JPT'>Chinese + Japanese</option>
-        <option value="">(None - no comparison)</option>
-    </param>
-    <param name="tagr2" type="float" label = "rsquared threshold for tagging outputs" value="0.8"
-    help="Tagging output will use this value as the minimum rsquared threshold"/>
-
-    <param name="infoTrack" type="select" label="Add Hapmap information track to image"
-    help="Refseq genes and snp density can be added to the plot if desired for orientation" >
-    <option value="info">Add Information track (DISABLED! Awaiting bug fix from Haploview authors since reported in October 2009)</option>
-    <option value="noinfo" selected="True">No Information track</option>
-    </param>
-
-    <param name="hires" type="select" label="High resolution plots"
-    help="A high resolution plot file may be possible but only for small regions - not reliable &gt;100's of snps">
-    <option value="hi">High resolution - only a few (hundreds of) markers</option>
-    <option value="lo" selected="True">Low resolution - large number of markers</option>
-    </param>
-
-    <param name="memsize" type="select" label="System RAM to allocate"
-        size="80" help="Very large files will need extra memory (java is a bit of a pig)" >
-        <option value="1024">1GB</option>
-        <option value="2048" selected="True">2GB (default)</option>
-        <option value="4096">4GB</option>
-        <option value="6144">6GB</option>
-        <option value="8192">8GB</option>
-    </param>
-
-   </inputs>
-
-   <outputs>
-       <data format="html" name="out_file1" label="${title}.html" />
-   </outputs>
-
-<!-- python $TOOLPATH/$TOOL.py "" "rs2283802Xrs2267000Xrs16997606Xrs4820537Xrs3788347Xrs756632Xrs4820539Xrs2283804Xrs2267006Xrs4822363X" \
-"$NPRE" $OUTPATH/${NPRE}.html "test" "" "$INPATH" "tinywga" 0.0 200000 "RSQ" "lo" "2048" "$OUTPATH" "hg18" "noinfo" "0.8" \
-"['CEU','YRI','CHB+JPT']" $BINPATH/haploview.jar -->
-<tests>
- <test>
-  <param name='lhistIn' value='tinywga' ftype='lped' >
-   <metadata name='base_name' value='tinywga' />
-   <composite_data value='tinywga.ped' />
-   <composite_data value='tinywga.map' />
-   <edit_attributes type='name' value='tinywga' />
-  </param>
- <param name='ucsc_region' value='' />
- <param name='title' value='rgHaploViewtest1' />
- <param name='rslist' value="rs2283802 rs2267000 rs16997606 rs4820537 rs3788347 rs756632Xrs4820539 rs2283804 rs2267006 rs4822363" />
- <param name='ldtype' value='RSQ' />
- <param name='minmaf' value='0.0' />
- <param name='maxdist' value='200000' />
- <param name='tagr2' value='0.8' />
- <param name='hmpanel' value="YRI" />
- <param name='infoTrack' value='noinfo' />
- <param name='hires' value='lo' />
- <param name='memsize' value='2048' />
- <output name='out_file1' file='rgtestouts/rgHaploView/rgHaploViewtest1.html' ftype='html' lines_diff="60">
-    <extra_files type="file" name='alljoin.pdf' value="rgtestouts/rgHaploView/alljoin.pdf" compare="sim_size" delta="50000"/>
-    <extra_files type="file" name='allnup.pdf' value="rgtestouts/rgHaploView/allnup.pdf" compare="sim_size" delta="50000" />
-    <extra_files type="file" name='Log_rgHaploViewtest1.txt' value="rgtestouts/rgHaploView/Log_rgHaploViewtest1.txt" compare="diff" lines_diff="50"/>
-    <extra_files type="file" name='rgHaploViewtest1.ped.TESTS' value="rgtestouts/rgHaploView/rgHaploViewtest1.ped.TESTS" compare="diff"
-            lines_diff="20"/>
-    <extra_files type="file" name='rgHaploViewtest1.ped.TAGS' value="rgtestouts/rgHaploView/rgHaploViewtest1.ped.TAGS" compare="diff"
-            lines_diff="20" />
- </output>
- </test>
-</tests>
-
-<help>
-
-.. class:: infomark
-
-**Note**
-
-The input file must be in linkage ped format. A suitable file can be chosen from the system library,
-or from the files already imported into your current history. Use either one of the selection boxes to
-make your choice.
-
------
-
-**Syntax**
-
-- **Library Linkage Ped** is a linkage format pedigree file chosen from the system file Library
-- **History Linkage Ped** is a linkage format pedigree file chosen from your current Galaxy History
-- **Region** is the genomic region cut and paste from a UCSC browser location window
-- **Genome Build** is the version of the genome your markers are from - use hg18 for CAMP illumina data
-
------
-
-**Summary**
-
-This tool is a special purpose tool to estimate and plot linkage disequilibrium estimated
-from genotype data in linkage pedigree format (separate map file). All markers in the input file
-are used as the default. To limit the calculations to a subset of the input data, supply
-a specified genomic region in UCSC browser location format or a list of specific marker IDs.
-
-Note that you can choose either a file of the correct type (linkage pedigree - lped) from
-your current history **or** from the system library
-
-This tool currently calls Haploview for estimation and plots. For full attribution, source code and documentation, see
-http://www.broad.mit.edu/mpg/haploview/index.php
-
-Copyright, Ross Lazarus, April 2008 for the Rgenetics project
-Released under the LGPL. See http://www.gnu.org/licenses/lgpl.html for license terms.
-
-</help>
-</tool>
--- a/tools/rgenetics/rgLDIndep.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,182 +0,0 @@
-"""
-# oct 2009 - must make a map file in case later usage requires it...
-# galaxy tool xml files can define a galaxy supplied output filename
-# that must be passed to the tool and used to return output
-# here, the plink log file is copied to that file and removed
-# took a while to figure this out!
-# use exec_before_job to give files sensible names
-#
-# ross april 14 2007
-# plink cleanup script
-# ross lazarus March 2007 for camp illumina whole genome data
-# note problems with multiple commands being ignored - eg --freq --missing --mendel
-# only the first seems to get done...
-#
-##Summary statistics versus inclusion criteria
-##
-##Feature                         As summary statistic    As inclusion criteria
-##Missingness per individual      --missing               --mind N
-##Missingness per marker          --missing               --geno N
-##Allele frequency                --freq                  --maf N
-##Hardy-Weinberg equilibrium      --hardy                 --hwe N
-##Mendel error rates              --mendel                --me N M
-#
-# this is rgLDIndep.py - main task is to decrease LD by filtering high LD pairs
-# remove that function from rgClean.py as it may not be needed.
-
-"""
-import sys,shutil,os,subprocess, glob, string, tempfile, time
-from rgutils import plinke, timenow, galhtmlprefix
-
-prog = os.path.split(sys.argv[0])[-1]
-myversion = 'January 4 2010'
-
-
-def pruneld(plinktasks=[] ,cd='./',vclbase = []):
-    """
-    plink blathers when doing pruning - ignore
-    Linkage disequilibrium based SNP pruning
-    if a million snps in 3 billion base pairs, have mean 3k spacing
-    assume 40-60k of ld in ceu, a window of 120k width is about 40 snps
-    so lots more is perhaps less efficient - each window computational cost is
-    ON^2 unless the code is smart enough to avoid unecessary computation where
-    allele frequencies make it impossible to see ld > the r^2 cutoff threshold
-    So, do a window and move forward 20?
-    from the plink docs at http://pngu.mgh.harvard.edu/~purcell/plink/summary.shtml#prune
-
-Sometimes it is useful to generate a pruned subset of SNPs that are in approximate linkage equilibrium with each other. This can be achieved via two commands: --indep which prunes based on the variance inflation factor (VIF), which recursively removes SNPs within a sliding window; second, --indep-pairwise which is similar, except it is based only on pairwise genotypic correlation.
-
-Hint The output of either of these commands is two lists of SNPs: those that are pruned out and those that are not. A separate command using the --extract or --exclude option is necessary to actually perform the pruning.
-
-The VIF pruning routine is performed:
-plink --file data --indep 50 5 2
-
-will create files
-
-     plink.prune.in
-     plink.prune.out
-
-Each is a simlpe list of SNP IDs; both these files can subsequently be specified as the argument for
-a --extract or --exclude command.
-
-The parameters for --indep are: window size in SNPs (e.g. 50), the number of SNPs to shift the
-window at each step (e.g. 5), the VIF threshold. The VIF is 1/(1-R^2) where R^2 is the multiple correlation coefficient for a SNP being regressed on all other SNPs simultaneously. That is, this considers the correlations between SNPs but also between linear combinations of SNPs. A VIF of 10 is often taken to represent near collinearity problems in standard multiple regression analyses (i.e. implies R^2 of 0.9). A VIF of 1 would imply that the SNP is completely independent of all other SNPs. Practically, values between 1.5 and 2 should probably be used; particularly in small samples, if this threshold is too low and/or the window size is too large, too many SNPs may be removed.
-
-The second procedure is performed:
-plink --file data --indep-pairwise 50 5 0.5
-
-This generates the same output files as the first version; the only difference is that a
-simple pairwise threshold is used. The first two parameters (50 and 5) are the same as above (window size and step); the third parameter represents the r^2 threshold. Note: this represents the pairwise SNP-SNP metric now, not the multiple correlation coefficient; also note, this is based on the genotypic correlation, i.e. it does not involve phasing.
-
-To give a concrete example: the command above that specifies 50 5 0.5 would a) consider a
-window of 50 SNPs, b) calculate LD between each pair of SNPs in the window, b) remove one of a pair of SNPs if the LD is greater than 0.5, c) shift the window 5 SNPs forward and repeat the procedure.
-
-To make a new, pruned file, then use something like (in this example, we also convert the
-standard PED fileset to a binary one):
-plink --file data --extract plink.prune.in --make-bed --out pruneddata
-    """
-    logres = ['## Rgenetics %s: http://rgenetics.org Galaxy Tools rgLDIndep.py Plink pruneLD runner\n' % myversion,]
-    for task in plinktasks: # each is a list
-        fplog,plog = tempfile.mkstemp()
-        sto = open(plog,'w') # to catch the blather
-        vcl = vclbase + task
-        s = '## ldindep now executing %s\n' % ' '.join(vcl)
-        print s
-        logres.append(s)
-        x = subprocess.Popen(' '.join(vcl),shell=True,stdout=sto,stderr=sto,cwd=cd)
-        retval = x.wait()
-        sto.close()
-        sto = open(plog,'r') # read
-        try:
-            lplog = sto.readlines()
-            lplog = [x for x in lplog if x.find('Pruning SNP') == -1]
-            logres += lplog
-            logres.append('\n')
-        except:
-            logres.append('### %s Strange - no std out from plink when running command line\n%s' % (timenow(),' '.join(vcl)))
-        sto.close()
-        os.unlink(plog) # no longer needed
-    return logres
-
-
-
-def clean():
-    """
-    """
-    if len(sys.argv) < 14:
-        print >> sys.stdout, '## %s expected 14 params in sys.argv, got %d - %s' % (prog,len(sys.argv),sys.argv)
-        print >> sys.stdout, """this script will filter a linkage format ped
-        and map file containing genotypes. It takes 14 parameters - the plink --f parameter and"
-        a new filename root for the output clean data followed by the mind,geno,hwe,maf, mef and mei"
-        documented in the plink docs plus the file to be returned to Galaxy
-        Called as:
-        <command interpreter="python">
-        rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
-        '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1'
-        '$out_file1.extra_files_path'  '$window' '$step' '$r2'
-        </command>
-        """
-        sys.exit(1)
-    plog = ['## Rgenetics: http://rgenetics.org Galaxy Tools rgLDIndep.py started %s\n' % timenow()]
-    inpath = sys.argv[1]
-    inbase = sys.argv[2]
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    title = sys.argv[3].translate(trantab)
-    mind = sys.argv[4]
-    geno = sys.argv[5]
-    hwe = sys.argv[6]
-    maf = sys.argv[7]
-    me1 = sys.argv[8]
-    me2 = sys.argv[9]
-    outfname = sys.argv[10]
-    outfpath = sys.argv[11]
-    winsize = sys.argv[12]
-    step = sys.argv[13]
-    r2 = sys.argv[14]
-    output = os.path.join(outfpath,outfname)
-    outpath = os.path.join(outfpath,title)
-    outprunepath = os.path.join(outfpath,'ldprune_%s' % title)
-    try:
-      os.makedirs(outfpath)
-    except:
-      pass
-    bfile = os.path.join(inpath,inbase)
-    filterout = os.path.join(outpath,'filtered_%s' % inbase)
-    outf = file(outfname,'w')
-    outf.write(galhtmlprefix % prog)
-    ldin = bfile
-    plinktasks = [['--bfile',ldin,'--indep-pairwise %s %s %s' % (winsize,step,r2),'--out',outpath,
-    '--mind',mind,'--geno',geno,'--maf',maf,'--hwe',hwe,'--me',me1,me2,],
-    ['--bfile',ldin,'--extract %s.prune.in --make-bed --out %s' % (outpath,outpath)],
-    ['--bfile',outpath,'--recode --out',outpath]] # make map file - don't really need ped but...
-    # subset of ld independent markers for eigenstrat and other requirements
-    vclbase = [plinke,'--noweb']
-    prunelog = pruneld(plinktasks=plinktasks,cd=outfpath,vclbase = vclbase)
-    """This generates the same output files as the first version;
-    the only difference is that a simple pairwise threshold is used.
-    The first two parameters (50 and 5) are the same as above (window size and step);
-    the third parameter represents the r^2 threshold.
-    Note: this represents the pairwise SNP-SNP metric now, not the
-    multiple correlation coefficient; also note, this is based on the
-    genotypic correlation, i.e. it does not involve phasing.
-    """
-    plog += prunelog
-    flog = '%s.log' % outpath
-    flogf = open(flog,'w')
-    flogf.write(''.join(plog))
-    flogf.write('\n')
-    flogf.close()
-    globme = os.path.join(outfpath,'*')
-    flist = glob.glob(globme)
-    flist.sort()
-    for i, data in enumerate( flist ):
-        outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
-    outf.write('</ol></div>\n')
-    outf.write("</div></body></html>")
-    outf.close()
-
-
-if __name__ == "__main__":
-    clean()
-
--- a/tools/rgenetics/rgLDIndep.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,158 +0,0 @@
-<tool id="rgLDIndep1" name="LD Independent:">
-    <code file="rgLDIndep_code.py"/>
-
-    <description>filter high LD pairs - decrease redundancy</description>
-
-    <command interpreter="python">
-        rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title1' '$mind'
-        '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1'
-        '$out_file1.files_path'  '$window' '$step' '$r2'
-    </command>
-
-    <inputs>
-       <param name="input_file"  type="data" label="RGenetics genotype data from your current history"
-         size="80" format="pbed" />
-       <param name="title1" type="text" size="80" label="Descriptive title for cleaned genotype file" value="LD_Independent"/>
-       <param name="r2" type="float" value = "0.1"
-       label="r2 threshold: Select only pairs at or below this r^2 threshold (eg 0.1)"
-       help="LD threshold defining LD independent markers" />
-       <param name="window" type="integer" value = "40" label="Window: Window size to limit LD pairwise"
-       help = "Bigger is better but time taken blows up exponentially as the window grows!" />
-       <param name="step" type="integer" value = "30" label="Step: Move window this far and recompute"
-       help = "Smaller is better but of course, time increases..." />
-       <param name="geno"  type="float" label="Maximum Missing Fraction: Markers" value="1.0" />
-       <param name="mind" type="float" value="1.0" label="Maximum Missing Fraction: Subjects"/>
-       <param name="mef"  type="float" label="Maximum Mendel Error Rate: Family" value="1.0"/>
-       <param name="mei"  type="float" label="Maximum Mendel Error Rate: Marker" value="1.0"/>
-       <param name="hwe" type="float" value="0.0" label="Smallest HWE p value (set to 0 for all)" />
-       <param name="maf" type="float" value="0.0"
-       label="Smallest Allowable Minor Allele Frequency (set to 0.0 for all)"/>
-
-   </inputs>
-
-   <outputs>
-       <data format="pbed" name="out_file1" metadata_source="input_file"  />
-   </outputs>
-<tests>
- <test>
-
-    <param name='input_file' value='tinywga' ftype='pbed' >
-    <metadata name='base_name' value='tinywga' />
-    <composite_data value='tinywga.bim' />
-    <composite_data value='tinywga.bed' />
-    <composite_data value='tinywga.fam' />
-    <edit_attributes type='name' value='tinywga' />
-    </param>
-    <param name='title1' value='rgLDIndeptest1' />
-    <param name="mind" value="1" />
-    <param name="geno" value="1" />
-    <param name="hwe" value="0" />
-    <param name="maf" value="0" />
-    <param name="mef" value="1" />
-    <param name="mei" value="1" />
-    <param name="window" value="10000" />
-    <param name="step" value="5000" />
-    <param name="r2" value="0.1" />
-    <output name='out_file1' file='rgtestouts/rgLDIndep/rgLDIndeptest1.pbed' ftype='pbed' compare="diff" lines_diff='7'>
-    <extra_files type="file" name='rgLDIndeptest1.bim' value="rgtestouts/rgLDIndep/rgLDIndeptest1.bim" compare="sim_size" delta="1000"/>
-    <extra_files type="file" name='rgLDIndeptest1.fam' value="rgtestouts/rgLDIndep/rgLDIndeptest1.fam" compare="diff" />
-    <extra_files type="file" name='rgLDIndeptest1.bed' value="rgtestouts/rgLDIndep/rgLDIndeptest1.bed" compare="sim_size" delta = "1000" />
-    </output>
- </test>
-</tests>
-<help>
-
-.. class:: infomark
-
-**Attribution**
-
-This tool relies on Plink from Shaun Purcell. For full documentation, please see his web site
-at http://pngu.mgh.harvard.edu/~purcell/plink/ where there is excellent documentation describing
-the parameters you can set here.
-
-Rgenetics merely exposes them, wrapping Plink so you can use it in Galaxy.
-
-**Summary**
-
-In addition to filtering some marker and sample quality measures,
-this tool reduces the amount of overlapping information, by removing
-most of the duplicate information contained in linkage disequilibrium. This is
-a lossy process and for some methods, signal may be lost. However, this makes
-the dataset far more compact (eg 10% of the original storage size) while still
-being highly informative and less biased for some (note NOT all!) statistical methods.
-This is the Clean tool with additional data reduction via Plink LD pruning.
-Use the Clean tool if you don't want LD pruning - which you don't for most statistical testing.
-For ancestry and relatedness, you may well want LD pruned data as it has
-some specific desirable properties.
-
-**LD**
-
-Pairwise Linkage disequilibrium (LD) measures the extent to which the genotype at one locus
-predicts the state of another locus at the level of an entire population.
-When population LD between a pair of markers is high,
-knowing an individual's genotype at one locus allows confident prediction of the genotype at the other.
-In other words, high LD means information redundancy between markers. For some
-purposes, removing some of this redundancy can improve the performance of some analyses.
-Executing this tool will create a new genotype dataset in your current history containing
-LD independent markers - most of the genetic information is retained but without as much redundancy.
-
-Set a pairwise LD threshold (eg r^2 = 0.2) and the (smaller) resulting dataset will have no
-pairs of marker with r^2 greater than 0.2. Additional filters are available to remove markers
-below a specific minor allele frequency, or above a specific level of missingness,
-and to remove subjects using similar criteria. Subjects and markers for family data can be
-filtered by proportions of Mendelian errors in observed transmission.
-
------
-
-**Syntax**
-
-- **Genotype data** is the input pedfile chosen from available library files
-- **New name** is the name to use for the filtered output file
-- **Missfrac threshold: subjects** is the threshold for missingness by subject. Subjects with more than this fraction missing will be excluded from the import
-- **Missfrac threshold: markers** is the threshold for missingness by marker. Markers with more than this fraction missing will be excluded from the import
-- **MaxMendel Individuals** Mendel error fraction above which to exclude subjects with more than the specified fraction of mendelian errors in transmission (for family data only)
-- **MaxMendel Families** Mendel error fraction above which to exclude families with more than the specified fraction of mendelian errors in transmission (for family data only)
-- **HWE** is the threshold for HWE test p values below which the marker will not be imported. Set this to -1 and all markers will be imported regardless of HWE p value
-- **MAF** is the threshold for minor allele frequency - SNPs with lower MAF will be excluded
-- **r^2** is the pairwise LD threshold as r^2. Lower -> less marker redundancy -> fewer markers
-- **Window** is the window width for LD threshold. Bigger -> slower -> more complete
-- **Skip** is the distance to move the window along the genome. Should be window or less.
-
------
-
-**Disclaimer**
-
-This tool relies on Plink from Shaun Purcell. For full documentation, please see his web site
-at http://pngu.mgh.harvard.edu/~purcell/plink/ where thereis excellent documentation describing
-the parameters you can set here. Rgenetics merely exposes them, and wraps Plink so you can use it in Galaxy.
-
-This tool is designed to create genotype data files with more or less LD independent sets of markers. These
-reduced genotype data files are particularly useful for purposes such as evaluating
-ancestry (eg eigenstrat) or relatedness (eg rgGRR)
-
-LD pruning decreases redundancy among the genotype data by removing one of each pair of markers
-in strong LD (above the r^2 threshold) over successive genomic windows (the Window parameter),
-skipping (the Skip parameter bases between windows. The defaults should produce useable outputs.
-
-This might be more efficient for rgGRR and
-eigenstrat...The core quote is
-
-    "This generates the same output files as the first version;
-    the only difference is that a simple pairwise threshold is used.
-    The first two parameters (50 and 5) are the same as above (window size and step);
-    the third parameter represents the r^2 threshold.
-    Note: this represents the pairwise SNP-SNP metric now, not the
-    multiple correlation coefficient; also note, this is based on the
-    genotypic correlation, i.e. it does not involve phasing.
-    "
-
------
-
-
-
-This Galaxy tool was written by Ross Lazarus for the Rgenetics project
-It uses Plink for most calculations - for full Plink attribution, source code and documentation,
-please see http://pngu.mgh.harvard.edu/~purcell/plink/ plus some custom python code
-
-</help>
-</tool>
--- a/tools/rgenetics/rgLDIndep_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-from galaxy import app
-import os, string, time
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    name,data = out_data.items()[0]
-    basename = param_dict['title1']
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    title = basename.encode().translate(trantab)
-    info = '%s filtered by rgLDIndep.py at %s' % (title,timenow())
-    data.file_name = data.file_name
-    data.metadata.base_name = title
-    data.name = '%s.pbed' % title
-    data.info = info
-    app.model.context.flush()
-
-
-
--- a/tools/rgenetics/rgManQQ.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,334 +0,0 @@
-#!/usr/local/bin/python
-# This is a truly ghastly hack
-# all of the heavy data cleaning lifting is done in R which is a really dumb place IMHO
-# Making a new file seems a waste but it would be far easier to set everything up in python
-# seems to work so I'm leaving it alone
-# sigh. Should really move this gig to rpy - writing a robust R script is hard.
-# updated to compress pdf using gs since millions of points = horsechoker pdfs and pdfs are good
-# updated july 20 to fix sort order - R unique() sorts into strict collating order
-# so need to sort after unique to revert to lexicographic order for x axis on Manhattan
-# rgmanqq updated july 19 to deal with x,y and mt
-# lots of fixes
-# ross lazarus
-import sys,math,shutil,subprocess,os,time,tempfile,string
-from os.path import abspath
-from rgutils import timenow, RRun, galhtmlprefix, galhtmlpostfix, galhtmlattr
-progname = os.path.split(sys.argv[0])[1]
-myversion = 'V000.1 March 2010'
-verbose = False
-debug = False
-
-rcode="""
-# generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics
-# Originally created as qqman with the following
-# attribution:
-#--------------
-# Stephen Turner
-# http://StephenTurner.us/
-# http://GettingGeneticsDone.blogspot.com/
-
-# Last updated: 19 July 2011 by Ross Lazarus
-# R code for making manhattan plots and QQ plots from plink output files.
-# With GWAS data this can take a lot of memory. Recommended for use on
-# 64bit machines only, for now.
-
-#
-
-library(ggplot2)
-
-coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen')
-# not too ugly but need a colour expert please...
-
-
-DrawManhattan = function(pvals=Null,chrom=Null,offset=Null,title=NULL, max.y="max",suggestiveline=0, genomewide=T, size.x.labels=9,
-              size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) {
-        if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!")
-        genomewideline=NULL # was genomewideline=-log10(5e-8)
-        n = length(pvals)
-        if (genomewide) { # use bonferroni since might be only a small region?
-            genomewideline = -log10(0.05/n) }
-        offset = as.integer(offset)
-        if (n > 1000000) { offset = offset/10000 }
-        else if (n > 10000) { offset = offset/1000}
-        chro = as.integer(chrom) # already dealt with X and friends?
-        pvals = as.double(pvals)
-        d=data.frame(CHR=chro,BP=offset,P=pvals)
-        if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) {
-                d=d[!is.na(d$P), ]
-                d=d[!is.na(d$BP), ]
-                d=d[!is.na(d$CHR), ]
-                #limit to only chrs 1-22, x=23,y=24,Mt=25?
-                d=d[d$CHR %in% 1:25, ]
-                d=d[d$P>0 & d$P<=1, ]
-                d$logp = as.double(-log10(d$P))
-                dlen = length(d$P)
-                d$pos=NA
-                ticks=NULL
-                lastbase=0
-                chrlist = unique(d$CHR)
-                chrlist = as.integer(chrlist)
-                chrlist = sort(chrlist) # returns lexical ordering
-                if (max.y=="max") { maxy = ceiling(max(d$logp)) }
-                   else { maxy = max.y }
-                nchr = length(chrlist) # may be any number?
-                maxy = max(maxy,1.1*genomewideline)
-                if (nchr >= 2) {
-                    for (x in c(1:nchr)) {
-                        i = chrlist[x] # need the chrom number - may not == index
-                        if (x == 1) { # first time
-                            d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP # initialize to first BP of chr1
-                            dsub = subset(d,CHR==i)
-                            dlen = length(dsub$P)
-                            lastbase = max(dsub$pos) # last one
-                            tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]
-                            lastchr = i
-                        } else {
-                            d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP+lastbase # one humongous contig
-                            if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) {
-                                cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos))
-                             }
-                            tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1])
-                            lastchr = i
-                            dsub = subset(d,CHR==i)
-                            lastbase = max(dsub$pos) # last one
-                        }
-                    ticklim=c(min(d$pos),max(d$pos))
-                    xlabs = chrlist
-                    }
-                } else { # nchr is 1
-                   nticks = 10
-                   last = max(d$BP)
-                   first = min(d$BP)
-                   tks = c(first)
-                   t = (last-first)/nticks # units per tick
-                   for (x in c(1:(nticks))) {
-                        tks = c(tks,round(x*t)+first) }
-                   ticklim = c(first,last)
-                } # else
-                if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR))
-                           } else {
-                           mycols=rep(coloursTouse,max(d$CHR))
-                           }
-                dlen = length(d$P)
-                d$pranks = rank(d$P)/dlen
-                d$centiles = 100*d$pranks # small are interesting
-                d$sizes = ifelse((d$centile < 1),2,1)
-                if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ]
-                if (nchr >= 2) {
-                        manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR),size=factor(sizes))
-                        manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs,limits=ticklim)
-                        manplot=manplot+scale_size_manual(values = c(0.5,1.5)) # requires discreet scale - eg factor
-                        #manplot=manplot+scale_size(values=c(0.5,2)) # requires continuous
-                        }
-                else {
-                        manplot=qplot(BP,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR))
-                        manplot=manplot+scale_x_continuous(name=paste("Chromosome",chrlist[1]), breaks=tks, labels=tks,limits=ticklim)
-                     }
-                manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy)
-                manplot=manplot+scale_colour_manual(value=mycols)
-                if (annotate) {  manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) }
-                manplot=manplot + opts(legend.position = "none")
-                manplot=manplot + opts(title=title)
-                manplot=manplot+opts(
-                     panel.background=theme_blank(),
-                     axis.text.x=theme_text(size=size.x.labels, colour="grey50"),
-                     axis.text.y=theme_text(size=size.y.labels, colour="grey50"),
-                     axis.ticks=theme_segment(colour=NA)
-                )
-                if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3))
-                if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red")
-                manplot
-        }       else {
-                stop("Make sure your data frame contains columns CHR, BP, and P")
-        }
-}
-
-
-
-qq = function(pvector, title=NULL, spartan=F) {
-        # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values
-        o = -log10(sort(pvector,decreasing=F))
-        e = -log10( 1:length(o)/length(o) )
-        # you could use base graphics
-        # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))),
-        # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e)))
-        # lines(e,e,col="red")
-        #You'll need ggplot2 installed to do the rest
-        qq=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red")
-        qq=qq+opts(title=title)
-        qq=qq+scale_x_continuous(name=expression(Expected~~-log[10](italic(p))))
-        qq=qq+scale_y_continuous(name=expression(Observed~~-log[10](italic(p))))
-        if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank())
-        qq
-}
-
-"""
-
-# we need another string to avoid confusion over string substitutions with %in%
-# instantiate rcode2 string with infile,chromcol,offsetcol,pvalscols,title before saving and running
-
-rcode2 = """rgqqMan = function(infile="%s",chromcolumn=%d, offsetcolumn=%d, pvalscolumns=c(%s),
-title="%s",grey=%d) {
-rawd = read.table(infile,head=T,sep='\\t')
-dn = names(rawd)
-cc = dn[chromcolumn]
-oc = dn[offsetcolumn]
-rawd[,cc] = sub('chr','',rawd[,cc],ignore.case = T) # just in case
-rawd[,cc] = sub(':','',rawd[,cc],ignore.case = T) # ugh
-rawd[,cc] = sub('X',23,rawd[,cc],ignore.case = T)
-rawd[,cc] = sub('Y',24,rawd[,cc],ignore.case = T)
-rawd[,cc] = sub('Mt',25,rawd[,cc], ignore.case = T)
-nams = c(cc,oc) # for sorting
-plen = length(rawd[,1])
-print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' '))
-rawd = rawd[do.call(order,rawd[nams]),]
-# mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.html
-# in case not yet ordered
-if (plen > 0) {
-  for (pvalscolumn in pvalscolumns) {
-  if (pvalscolumn > 0)
-     {
-     cname = names(rawd)[pvalscolumn]
-     mytitle = paste('p=',cname,', ',title,sep='')
-     myfname = chartr(' ','_',cname)
-     myqqplot = qq(rawd[,pvalscolumn],title=mytitle)
-     ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=6,dpi=96)
-     ggsave(filename=paste(myfname,"qqplot.pdf",sep='_'),myqqplot,width=8,height=6,dpi=96)
-     print(paste('## qqplot on',cname,'done'))
-     if ((chromcolumn > 0) & (offsetcolumn > 0)) {
-         print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn))
-         mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey)
-         ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=8,height=6,dpi=96)
-         ggsave(filename=paste(myfname,"manhattan.pdf",sep='_'),mymanplot,width=8,height=6,dpi=96)
-         print(paste('## manhattan plot on',cname,'done'))
-         }
-         else {
-              print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn,
-              'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required'))
-              }
-     }
-  else {
-        print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible'))
-      }
-  } # for pvalscolumn
- } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') }
-}
-
-rgqqMan()
-# execute with defaults as substituted
-"""
-
-
-def doManQQ(input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir,beTidy=False):
-    """
-    we may have an interval file or a tabular file - if interval, will have chr1... so need to adjust
-    to chrom numbers
-    draw a qq for pvals and a manhattan plot if chrom/offset <> 0
-    contains some R scripts as text strings - we substitute defaults into the calls
-    to make them do our bidding - and save the resulting code for posterity
-    this can be called externally, I guess...for QC eg?
-    """
-    if debug:
-        print 'doManQQ',input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir
-    rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey))
-    if debug:
-        print 'running\n%s\n' % rcmd
-    rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir)
-    rlog.append('## R script=')
-    rlog.append(rcmd)
-    return rlog,flist
-
-def compressPDF(inpdf=None):
-    """need absolute path to pdf
-    """
-    assert os.path.isfile(inpdf), "## Input %s supplied to compressPDF not found" % inpdf
-    outpdf = '%s_compressed' % inpdf
-    cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
-    retval = subprocess.call(cl)
-    if retval == 0:
-        os.unlink(inpdf)
-        shutil.move(outpdf,inpdf)
-    return retval
-
-def main():
-    u = """<command interpreter="python">
-        rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col'
-    </command>
-    """
-    npar = 8
-    if len(sys.argv) < npar:
-            print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar
-            print >> sys.stdout, u
-            sys.exit(1)
-    input_fname = sys.argv[1]
-    title = sys.argv[2]
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    ctitle = title.translate(trantab)
-    outhtml = sys.argv[3]
-    outdir = sys.argv[4]
-    try:
-         chrom_col = int(sys.argv[5])
-    except:
-         chrom_col = -1
-    try:
-        offset_col = int(sys.argv[6])
-    except:
-        offset_col = -1
-    p = sys.argv[7].strip().split(',')
-    try:
-        q = [int(x) for x in p]
-    except:
-        p = -1
-    if chrom_col == -1 or offset_col == -1: # was passed as zero - do not do manhattan plots
-        chrom_col = -1
-        offset_col = -1
-    grey = 0
-    if (sys.argv[8].lower() in ['1','true']):
-       grey = 1
-    if p == -1:
-        print >> sys.stderr,'## Cannot run rgManQQ - missing pval column'
-        sys.exit(1)
-    p = ['%d' % (int(x) + 1) for x in p]
-    rlog,flist = doManQQ(input_fname,chrom_col+1,offset_col+1,','.join(p),title,grey,ctitle,outdir)
-    flist.sort()
-    html = [galhtmlprefix % progname,]
-    html.append('<h1>%s</h1>' % title)
-    if len(flist) > 0:
-        html.append('<table>\n')
-        for row in flist:
-            fname,expl = row # RRun returns pairs of filenames fiddled for the log and R script
-            n,e = os.path.splitext(fname)
-            if e in ['.png','.jpg']:
-                pdf = '%s.pdf' % n
-                pdff = os.path.join(outdir,pdf)
-                if os.path.exists(pdff):
-                    rval = compressPDF(inpdf=pdff)
-                    if rval <> 0:
-                        pdf = '%s(not_compressed)' % pdf
-                else:
-                    pdf = '%s(not_found)' % pdf
-                s= '<tr><td><a href="%s"><img src="%s" title="%s" hspace="10" width="800"></a></td></tr>' \
-                 % (pdf,fname,expl)
-                html.append(s)
-            else:
-               html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname,expl))
-        html.append('</table>\n')
-    else:
-        html.append('<h2>### Error - R returned no files - please confirm that parameters are sane</h1>')
-    html.append('<h3>R log follows below</h3><hr><pre>\n')
-    html += rlog
-    html.append('</pre>\n')
-    html.append(galhtmlattr % (progname,timenow()))
-    html.append(galhtmlpostfix)
-    htmlf = file(outhtml,'w')
-    htmlf.write('\n'.join(html))
-    htmlf.write('\n')
-    htmlf.close()
-
-
-
-if __name__ == "__main__":
-    main()
-
-
--- a/tools/rgenetics/rgManQQ.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-<tool id="rgManQQ1" name="Manhattan/QQ:" version="1.0.3">
-    <code file="rgManQQ_code.py"/>
-
-    <description>Plots for WGA P values</description>
-
-    <command interpreter="python">
-        rgManQQ.py '$i' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col' '$grey'
-    </command>
-
-    <inputs>
-      <page>
-      <param name="i"  type="data" label="Tabular data from your current history"
-      format="tabular" refresh_on_change="true"/>
-      </page>
-      <page>
-       <param name='name' type='text' size="132" value='Manhattan and QQ plots' label="Title for this job"/>
-       <param name="pval_col" type='select' size="5" label = 'P value (0-1) column in input file'
-        dynamic_options="get_phecols(i,False,'pval')" refresh_on_change="true" multiple="true"
-        help="(Select multiple P value columns for multiple plots holding down the [Ctrl] key as you click)" />
-       <param name="chrom_col" type='select' label = 'Chromosome column in input file'
-        help='Select "None" if chromosome not available or no Manhattan plot required'
-        dynamic_options="get_phecols(i,True,'chr')" />
-       <param name="offset_col" type='select' label = 'Base pair offset column in input file'
-        help='Select "None" if offset not available or no Manhattan plot required'
-        dynamic_options="get_phecols(i,True,'offs')" />
-       <param name="grey" type="boolean" checked="false" truevalue="true" falsevalue="false"
-        label="Grey scale for Manhattan plot (default is colour"/>
-       </page>
-    </inputs>
-
-   <outputs>
-       <data format="html" name="out_html" />
-   </outputs>
-   <options refresh="True"/>
-
-<tests>
- <test>
- <param name='i' value='smallwgaP.xls' ftype='tabular' >
- </param>
- <param name='name' value='rgManQQtest1' />
- <param name='pval_col' value='7' />
- <param name='chrom_col' value='1' />
- <param name='offset_col' value='2' />
- <param name='grey' value='0' />
- <output name='out_html' file='rgtestouts/rgManQQ/rgManQQtest1.html' ftype='html' lines_diff='60'>
-   <extra_files type="file" name='Allelep_manhattan.png' value='rgtestouts/rgManQQ/Allelep_manhattan.png' compare="sim_size"
-     delta = "20000"/>
-   <extra_files type="file" name='Allelep_qqplot.png' value='rgtestouts/rgManQQ/Allelep_qqplot.png' compare="sim_size"
-     delta = "20000" />
-   <extra_files type="file" name='rgManQQtest1.R' value='rgtestouts/rgManQQ/rgManQQtest1.R' compare="diff" lines_diff="160"/>
- </output>
- </test>
-</tests>
-<help>
-
-.. class:: infomark
-
-**Syntax**
-
-- **Tabular Data** is a tab delimited header file with chromosome, offset and p values to be plotted
-- **Chromosome Column** is the column in that data containing the chromosome as an integer
-- **Offset Column** contains the offset within the chromosome
-- **P Value Column** contains the (untransformed) p values at that locus - choose multiple columns if needed
-
-NOTE - plotting millions of p values may take tens of minutes depending on
-how busy the server is - be patient please.
-
------
-
-.. class:: infomark
-
-**Summary**
-
-This tool will create a qq plot and a Manhattan plot for one or more GWA P value columns from a tabular
-dataset. For Manhattan plots, the data must include the chromosome (eg use 23,24,25 for x,y,mt...) and
-offset. Many analysis files contain the required fields but even without chromosome and offset, a qq plot
-can be created.
-
------
-
-.. class:: infomark
-
-**Explanation**
-
-A "Manhattan" plot shows -log10 p values ordered by offset and by chromosome. Regions with interestingly
-improbable p values are above the red line which is drawn at the Bonferroni FWER control level (0.05/n
-where n is the number of tests - this is highly conservative for correlated SNPs typical of GWA)
-
-.. image:: ./static/images/Armitagep_manhattan.png
-
-A quantile-quantile (QQ) plot is a good way to see systematic departures from the null expectation of
-uniform p-values from a genomic analysis. If the QQ plot shows departure from the null (ie a uniform 0-1
-distribution), you hope that this will be in the very smallest p-values suggesting that there might be some
-interesting results to look at. A log scale will help emphasise departures from the null at low p values
-more clear
-
-.. image:: ./static/images/Armitagep_qqplot.png
-
------
-
-.. class:: infomark
-
-**Attribution**
-
-This is a Galaxy tool written by Ross Lazarus. It relies on
-ggplot2, an R package from hadley wickham and some
-R code for manhattan and qq plots using ggplot2,
-borrowed from Stephen Turner found at http://GettingGeneticsDone.blogspot.com/
-
-copyright Ross Lazarus 2010
-Licensed under the terms of the LGPL as documented http://www.gnu.org/licenses/lgpl.html
-but is about as useful as a chocolate teapot without R and Galaxy which all have a
-twisty maze of little licenses, all different.
-
-I'm no lawyer, but it looks like at least LGPL if you create derived works from this code.
-Good luck.
-
-</help>
-</tool>
--- a/tools/rgenetics/rgManQQ_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-from galaxy import datatypes,model
-import sys,string,time
-
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-
-def get_phecols(i,addNone,hint):
-   """
-   return a list of phenotype columns for a multi-select list
-   """
-   hint = hint.lower()
-   fname = i.dataset.file_name
-   try:
-        f = open(fname,'r')
-   except:
-        return [('get_phecols unable to open file "%s"' % fname,'None',False),]
-   header = f.next()
-   h = header.strip().split()
-   dat = [(x,'%d' % i,False) for i,x in enumerate(h)]
-   matches = [i for i,x in enumerate(h) if x.lower().find(hint) <> -1]
-   if len(matches) > 0:
-       sel = matches[0]
-       dat[sel] = (dat[sel][0],dat[sel][1],True)
-   if addNone:
-        dat.insert(0,('None - no Manhattan plot','0', False ))
-   return dat
-
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    """Sets the name of the data
-       <outputs>
-       <data format="pdf" name="allqq" />
-       <data format="pdf" name="lowqq" parent="allqq"/>
-    </outputs>
-    """
-    outfile = 'out_html'
-    job_name = param_dict.get( 'name', 'Manhattan QQ plots' )
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    newname = '%s.html' % job_name.translate(trantab)
-    data = out_data[outfile]
-    data.name = newname
-    data.info='%s run at %s' % (job_name,timenow())
-    out_data[outfile] = data
-    app.model.context.flush()
-
--- a/tools/rgenetics/rgPedSub.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,305 +0,0 @@
-"""
-July 1 2009 added relatedness filter - fo/oo or all
-released under the terms of the LGPL
-copyright ross lazarus August 2007
-for the rgenetics project
-
-Special galaxy tool for the camp2007 data
-Allows grabbing genotypes from an arbitrary region
-
-Needs a mongo results file in the location hardwired below or could be passed in as
-a library parameter - but this file must have a very specific structure
-rs chrom offset float1...floatn
-
-called as
-
-    <command interpreter="python2.4">
-        campRGeno2.py $region "$rslist" "$title" $output1 $log_file $userId "$lpedIn" "$lhistIn"
-    </command>
-
-
-"""
-
-
-import sys, array, os, string
-from rgutils import galhtmlprefix,plinke,readMap
-
-progname = os.path.split(sys.argv[0])[1]
-
-
-atrandic = {'A':'1','C':'2','G':'3','T':'4','N':'0','-':'0','1':'1','2':'2','3':'3','4':'4','0':'0'}
-
-def doImport(outfile='test',flist=[]):
-    """ import into one of the new html composite data types for Rgenetics
-        Dan Blankenberg with mods by Ross Lazarus
-        October 2007
-    """
-    out = open(outfile,'w')
-    out.write(galhtmlprefix % progname)
-
-    if len(flist) > 0:
-        out.write('<ol>\n')
-        for i, data in enumerate( flist ):
-           out.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
-        out.write('</ol>\n')
-    else:
-           out.write('No files found')
-    out.write("</div></body></html>")
-    out.close()
-
-def setupPedFilter(relfilter='oo',dfile=None):
-    """ figure out who to drop to satisfy relative filtering
-    note single offspring only from each family id
-    ordering of pdict keys makes this 'random' as the first one only is
-    kept if there are multiple sibs from same familyid.
-    """
-    dropId = {}
-    keepoff = (relfilter == 'oo')
-    keepfounder = (relfilter == 'fo')
-    pdict = {}
-    for row in dfile:
-        rowl = row.strip().split()
-        if len(rowl) > 6:
-            idk = (rowl[0],rowl[1])
-            pa =  (rowl[0],rowl[2]) # key for father
-            ma = (rowl[0],rowl[3]) # and mother
-            pdict[idk] = (pa,ma)
-    dfile.seek(0) # rewind
-    pk = pdict.keys()
-    for p in pk:
-        parents = pdict[p]
-        if pdict.get(parents[0],None) or pdict.get(parents[1],None): # parents are in this file
-            if keepfounder:
-                dropId[p] = 1 # flag for removal
-        elif keepoff:
-            dropId[p] = 1 # flag for removal
-    if keepoff: # TODO keep only a random offspring if many - rely on pdict keys being randomly ordered...!
-        famseen = {}
-        for p in pk: # look for multiples from same family - drop all but first
-             famid = p[0]
-             if famseen.get(famid,None):
-                 dropId[p] = 1 # already got one from this family
-             famseen.setdefault(famid,1)
-    return dropId
-
-def writeFped(rslist=[],outdir=None,title='Title',basename='',dfile=None,wewant=[],dropId={},outfile=None,logfile=None):
-    """ fbat format version
-    """
-    outname = os.path.join(outdir,basename)
-    pedfname = '%s.ped' % outname
-    ofile = file(pedfname, 'w')
-    rsl = ' '.join(rslist) # rslist for fbat
-    ofile.write(rsl)
-    s = 'wrote %d marker header to %s - %s\n' % (len(rslist),pedfname,rsl[:50])
-    lf.write(s)
-    ofile.write('\n')
-    nrows = 0
-    for line in dfile:
-        line = line.strip()
-        if not line:
-            continue
-        line = line.replace('D','N')
-        fields = line.split()
-        preamble = fields[:6]
-        idk = (preamble[0],preamble[1])
-        dropme = dropId.get(idk,None)
-        if not dropme:
-            g = ['%s %s' % (fields[snpcol], fields[snpcol+1]) for snpcol in wewant]
-            g = ' '.join(g)
-            g = g.split() # we'll get there
-            g = [atrandic.get(x,'0') for x in g] # numeric alleles...
-            # hack for framingham ND
-            ofile.write('%s %s\n' % (' '.join(preamble), ' '.join(g)))
-            nrows += 1
-    ofile.close()
-    loglist = open(logfile,'r').readlines() # retrieve log to add to html file
-    doImport(outfile,[pedfname,],loglist=loglist)
-    return nrows,pedfname
-
-def writePed(markers=[],outdir=None,title='Title',basename='',dfile=None,wewant=[],dropId={},outfile=None,logfile=None):
-    """ split out
-    """
-    outname = os.path.join(outdir,basename)
-    mapfname = '%s.map' % outname
-    pedfname = '%s.ped' % outname
-    ofile = file(pedfname, 'w')
-    # make a map file in the lped library
-    mf = file(mapfname,'w')
-    map = ['%s\t%s\t0\t%d' % (x[0],x[2],x[1]) for x in markers] # chrom,abspos,snp in genomic order
-    mf.write('%s\n' % '\n'.join(map))
-    mf.close()
-    nrows = 0
-    for line in dfile:
-        line = line.strip()
-        if not line:
-            continue
-        #line = line.replace('D','N')
-        fields = line.split()
-        preamble = fields[:6]
-        idk = (preamble[0],preamble[1])
-        dropme = dropId.get(idk,None)
-        if not dropme:
-            g = ['%s %s' % (fields[snpcol], fields[snpcol+1]) for snpcol in wewant]
-            g = ' '.join(g)
-            g = g.split() # we'll get there
-            g = [atrandic.get(x,'0') for x in g] # numeric alleles...
-            # hack for framingham ND
-            ofile.write('%s %s\n' % (' '.join(preamble), ' '.join(g)))
-            nrows += 1
-    ofile.close()
-    loglist = open(logfile,'r').readlines() # retrieve log to add to html file
-    doImport(outfile,[mapfname,pedfname,logfile])
-    return nrows,pedfname
-
-def subset():
-    """  ### Sanity check the arguments
-    now passed in as
-    <command interpreter="python">
-        rgPedSub.py $script_file
-    </command>
-
-    with
-    <configfiles>
-    <configfile name="script_file">
-    title~~~~$title
-    output1~~~~$output1
-    log_file~~~~$log_file
-    userId~~~~$userId
-    outformat~~~~$outformat
-    outdir~~~~$output1.extra_files_path
-    relfilter~~~~$relfilter
-    #if $d.source=='library'
-    inped~~~~$d.lpedIn
-    #else
-    inped~~~~$d.lhistIn.extra_files_path/$d.lhistIn.metadata.base_name
-    #end if
-    #if $m.mtype=='grslist'
-    rslist~~~~$m.rslist
-    region~~~~
-    #else
-    rslist~~~~
-    region~~~~$m.region
-    #end if
-    </configfile>
-    </configfiles>
-    """
-    sep = '~~~~' # arbitrary choice
-    conf = {}
-    if len(sys.argv) < 2:
-        print >> sys.stderr, "No configuration file passed as a parameter - cannot run"
-        sys.exit(1)
-    configf = sys.argv[1]
-    config = file(configf,'r').readlines()
-    for row in config:
-        row = row.strip()
-        if len(row) > 0:
-            try:
-                key,valu = row.split(sep)
-                conf[key] = valu
-            except:
-                pass
-    ss = '%s%s' % (string.punctuation,string.whitespace)
-    ptran =  string.maketrans(ss,'_'*len(ss))
-    ### Figure out what genomic region we are interested in
-    region = conf.get('region','')
-    orslist = conf.get('rslist','').replace('X',' ').lower()
-    orslist = orslist.replace(',',' ').lower()
-    # galaxy replaces newlines with XX - go figure
-    title = conf.get('title','').translate(ptran) # for outputs
-    outfile = conf.get('output1','')
-    outdir = conf.get('outdir','')
-    try:
-        os.makedirs(outdir)
-    except:
-        pass
-    outformat = conf.get('outformat','lped')
-    basename = conf.get('basename',title)
-    logfile = os.path.join(outdir,'%s.log' % title)
-    userId = conf.get('userId','') # for library
-    pedFileBase = conf.get('inped','')
-    relfilter = conf.get('relfilter','')
-    MAP_FILE = '%s.map' % pedFileBase
-    DATA_FILE = '%s.ped' % pedFileBase
-    title = conf.get('title','lped subset')
-    lf = file(logfile,'w')
-    lf.write('config file %s = \n' % configf)
-    lf.write(''.join(config))
-    c = ''
-    spos = epos = 0
-    rslist = []
-    rsdict = {}
-    if region > '':
-        try: # TODO make a regexp?
-            c,rest = region.split(':')
-            c = c.replace('chr','')
-            rest = rest.replace(',','') # remove commas
-            spos,epos = rest.split('-')
-            spos = int(spos)
-            epos = int(epos)
-            s = '## %s parsing chrom %s from %d to %d\n' % (progname,c,spos,epos)
-            lf.write(s)
-        except:
-            s = '##! %s unable to parse region %s - MUST look like "chr8:10,000-100,000\n' % (progname,region)
-            lf.write(s)
-            lf.close()
-            sys.exit(1)
-    else:
-        rslist = orslist.split() # galaxy replaces newlines with XX - go figure
-        rsdict = dict(zip(rslist,rslist))
-    allmarkers = False
-    if len(rslist) == 0 and epos == 0: # must be a full extract - presumably remove relateds or something
-        allmarkers = True
-    ### Figure out which markers are in this region
-    markers,snpcols,rslist,rsdict = readMap(mapfile=MAP_FILE,allmarkers=allmarkers,rsdict=rsdict,c=c,spos=spos,epos=epos)
-    if len(rslist) == 0:
-            s = '##! %s found no rs numbers in %s\n' % (progname,sys.argv[1:3])
-            lf.write(s)
-            lf.write('\n')
-            lf.close()
-            sys.exit(1)
-    s = '## %s looking for %d rs (%s....etc)\n' % (progname,len(rslist),rslist[:5])
-    lf.write(s)
-    try:
-        dfile = open(DATA_FILE, 'r')
-    except: # bad input file name?
-        s = '##! rgPedSub unable to open file %s\n' % (DATA_FILE)
-        lf.write(s)
-        lf.write('\n')
-        lf.close()
-        print >> sys.stdout, s
-        raise
-        sys.exit(1)
-    if relfilter <> 'all': # must read pedigree and figure out who to drop
-        dropId = setupPedFilter(relfilter=relfilter,dfile=dfile)
-    else:
-        dropId = {}
-    wewant = [(6+(2*snpcols[x])) for x in rslist] #
-    # column indices of first geno of each marker pair to get the markers into genomic
-    ### ... and then parse the rest of the ped file to pull out
-    ### the genotypes for all subjects for those markers
-    # /usr/local/galaxy/data/rg/1/lped/
-    if len(dropId.keys()) > 0:
-        s = '## dropped the following subjects to satisfy requirement that relfilter = %s\n' % relfilter
-        lf.write(s)
-        if relfilter == 'oo':
-            s = '## note that one random offspring from each family was kept if there were multiple offspring\n'
-            lf.write(s)
-        s = 'FamilyId\tSubjectId\n'
-        lf.write(s)
-        dk = dropId.keys()
-        dk.sort()
-        for k in dk:
-            s = '%s\t%s\n' % (k[0],k[1])
-            lf.write(s)
-    lf.write('\n')
-    lf.close()
-    if outformat == 'lped':
-        nrows,pedfname=writePed(markers=markers,outdir=outdir,title=title,basename=basename,dfile=dfile,
-                 wewant=wewant,dropId=dropId,outfile=outfile,logfile=logfile)
-    elif outformat == 'fped':
-        nrows,pedfname=writeFped(rslist=rslist,outdir=outdir,title=title,basename=basename,dfile=dfile,
-                  wewant=wewant,dropId=dropId,outfile=outfile,logfile=logfile)
-    dfile.close()
-
-if __name__ == "__main__":
-    subset()
--- a/tools/rgenetics/rgPedSub.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-<tool id="rgPedSub1" name="Subset markers:">
-
-    <description>region or rs list</description>
-
-    <command interpreter="python">
-        rgPedSub.py $script_file
-    </command>
-
-    <inputs>
-        <page>
-       <param name="title" type="text" size="80" label="Title for output files"
-        help="Descriptive title for new genotype/map files" value="Genotype_Subset" />
-       <param name="input1" type="data" format="lped"
-   		label="Current history lPed format data" optional="false"
-    	size="120" help="Choose a Linkage Ped format data from your current history" />
-       <param name='relfilter' label = "Filter out family relatedness" type="select"
-   	     optional="false" size="132"
-         help="Optionally remove related subjects if pedigree identifies founders and their offspring">
-         <option value="all" selected='true'>No filter on relatedness - all subjects passed through</option>
-         <option value="fo" >Founders only (pedigree mother and father ID = "0")</option>
-         <option value="oo" >Offspring only (one randomly chosen if >1 sibs in family)</option>
-   		</param>
-
-        </page><page>
-       <conditional name="m">
-	        <param name="mtype" type="select"  label="Markers in a genomic interval,or as an rs list?" refresh_on_change='true'
-	        help="Indicate the markers to be saved - as a list or as genomic region coordinates">
-	          <option value="grslist" >Cut and paste a list of marker ids as rs numbers</option>
-	          <option value="gregion" selected='true'>Supply genomic coordinates for a region (as UCSC location)</option>
-	        </param>
-	        <when value="gregion">
-	         <param name="region" type="text" label="Genomic refseq coordinates - chromosome:start-end"
-        		size="120" help="Region to be saved as chr9:119,506,000-119,518,000"/>
-	       	  <param name="rslist" type="hidden" value='' />
-	        </when>
-	        <when value="grslist">
-	          <param name="region" value="" type="hidden"/>
-	       	    <param name="rslist" type="text" area='true' size='15x20' label="marker id (rs) list"
-       			help="Cut and paste, or type a list of marker ids separated by spaces"  />
-	        </when>
-        </conditional>
-        </page>
-   </inputs>
-
-   <outputs>
-       <data format="lped" name="output1" metadata_source="input1" label="${title}.lped"/>
-   </outputs>
-
-<configfiles>
-<configfile name="script_file">
-title~~~~$title
-output1~~~~$output1
-userId~~~~$userId
-outformat~~~~lped
-basename~~~~$input1.metadata.base_name
-inped~~~~$input1.extra_files_path/$input1.metadata.base_name
-outdir~~~~$output1.files_path
-relfilter~~~~$relfilter
-#if $m.mtype=='grslist'
-rslist~~~~$m.rslist
-region~~~~
-#else
-rslist~~~~
-region~~~~$m.region
-#end if
-</configfile>
-</configfiles>
-
-<tests>
- <test>
-    <param name='input1' value='tinywga' ftype='lped' >
-    <metadata name='base_name' value='tinywga' />
-    <composite_data value='tinywga.ped' />
-    <composite_data value='tinywga.map' />
-    <edit_attributes type='name' value='tinywga' />
-    </param>
-    <param name='title' value='rgPedSubtest1' />
-    <param name="mtype" value="grslist" />
-    <param name="region" value="" />
-    <param name="rslist" value="rs2283802Xrs2267000Xrs16997606Xrs4820537Xrs3788347Xrs756632Xrs4820539Xrs2283804Xrs2267006Xrs4822363X" />
-    <param name="relfilter" value="all" />
-    <output name='output1' file='rgtestouts/rgPedSub/rgPedSubtest1.lped' ftype='lped' linesDiff='7'/>
- </test>
-</tests>
-
-<help>
-
-.. class:: infomark
-
-**Note**
-
-There are 2 forms to complete before the job is ready to be run
-
-  **Page 1**
-
-     give the job a mnemonic descriptive title and select the output format.
-
-     Choose a file containing genotypes and a pedigree from your current history
-
-     The input file must be in linkage ped format.
-
-     If the data are not yet in your history, import from one of the system libraries or upload from your computer using the get data tool
-
-  **Page 2**
-
-     Define the markers to be used. You can supply a UCSC style location as chr:start_offset-end_offset
-
-     or a list of marker ids - rs numbers. You can flip between marker input style by changing the select box.
-
-     If you supply a list, the markers must all be from the same chromosome or region for sensible results.
-
-Run the job and the subset file will eventually appear in your history ready to be used with other tools.
-
------
-
-**Syntax**
-
-- **Library Linkage Ped** is a linkage format pedigree file chosen from the system file Library
-- **History Linkage Ped** is a linkage format pedigree file chosen from your current Galaxy History
-- **Region** is the genomic region cut and paste from a UCSC browser location window
-- **Genome Build** is the version of the genome your markers are from - use hg18 for CAMP illumina data
-
------
-
-.. class:: infomark
-
-**Summary**
-
-This tool is a special purpose tool to extract genotypes from genotype data in linkage
-pedigree format (separate map file) over a specified genomic region
-The region to be extracted can be described as UCSC browser location, or as a list of
-markers.
-
-It is possible to retain ALL markers by leaving the rslist and region empty if you just want to remove
-all offspring from a pedigree for example
-
-The extracted data will appear in your current history as a new lped data set
-
-Copyright, Ross Lazarus, March 2008 for the Rgenetics project
-Released under the LGPL. See http://www.gnu.org/licenses/lgpl.html for license terms.
-
-</help>
-</tool>
--- a/tools/rgenetics/rgQC.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1354 +0,0 @@
-# oct 15 rpy replaced - temp fix until we get gnuplot working
-# rpy deprecated - replace with RRun
-# fixes to run functional test! oct1 2009
-# needed to expand our path with os.path.realpath to get newpath working with
-# all the fancy pdfnup stuff
-# and a fix to pruneld to write output to where it should be
-# smallish data in test-data/smallwga in various forms
-# python ../tools/rgenetics/rgQC.py -i smallwga -o smallwga -s smallwga/smallwga.html -p smallwga
-# child files are deprecated and broken as at july 19 2009
-# need to move them to the html file extrafiles path
-# found lots of corner cases with some illumina data where cnv markers were
-# included
-# and where affection status was all missing !
-# added links to tab files showing worst 1/keepfrac markers and subjects
-# ross lazarus january 2008
-#
-# added named parameters
-# to ensure no silly slippages if non required parameter in the most general case
-# some potentially useful things here reusable in complex scripts
-# with lots'o'html (TM)
-# aug 17 2007 rml
-#
-# added marker and subject and parenting april 14 rml
-# took a while to get the absolute paths right for all the file munging
-# as of april 16 seems to work..
-# getting galaxy to serve images in html reports is a little tricky
-# we don't want QC reports to be dozens of individual files, so need
-# to use the url /static/rg/... since galaxy's web server will happily serve images
-# from there
-# galaxy passes output files as relative paths
-# these have to be munged by rgQC.py before calling this
-# galaxy will pass in 2 file names - one for the log
-# and one for the final html report
-# of the form './database/files/dataset_66.dat'
-# we need to be working in that directory so our plink output files are there
-# so these have to be munged by rgQC.py before calling this
-# note no ped file passed so had to remove the -l option
-# for plinkParse.py that makes a heterozygosity report from the ped
-# file - needs fixing...
-# new: importing manhattan/qqplot plotter
-# def doManQQ(input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir):
-#    """ draw a qq for pvals and a manhattan plot if chrom/offset <> 0
-#    contains some R scripts as text strings - we substitute defaults into the calls
-#    to make them do our bidding - and save the resulting code for posterity
-#    this can be called externally, I guess...for QC eg?
-#    """
-#
-#    rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey))
-#    rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir)
-#    return rlog,flist
-
-
-from optparse import OptionParser
-
-import sys,os,shutil, glob, math, subprocess, time, operator, random, tempfile, copy, string
-from os.path import abspath
-from rgutils import galhtmlprefix, galhtmlpostfix, RRun, timenow, plinke, rexe, runPlink, pruneLD
-import rgManQQ
-
-prog = os.path.split(sys.argv[0])[1]
-vers = '0.4 april 2009 rml'
-idjoiner = '_~_~_' # need something improbable..
-# many of these may need fixing for a new install
-
-myversion = vers
-keepfrac = 20 # fraction to keep after sorting by each interesting value
-
-missvals = {'0':'0','N':'N','-9':'-9','-':'-'} # fix me if these change!
-
-mogresize = "x300" # this controls the width for jpeg thumbnails
-
-
-
-
-def makePlots(markers=[],subjects=[],newfpath='.',basename='test',nbreaks='20',nup=3,height=10,width=8,rgbin=''):
-    """
-    marker rhead = ['snp','chrom','maf','a1','a2','missfrac',
-    'p_hwe_all','logp_hwe_all','p_hwe_unaff','logp_hwe_unaff','N_Mendel']
-    subject rhead = ['famId','iId','FracMiss','Mendel_errors','Ped_sex','SNP_sex','Status','Fest']
-    """
-
-
-    def rHist(plotme=[],outfname='',xlabname='',title='',basename='',nbreaks=50):
-        """   rHist <- function(plotme,froot,plotname,title,mfname,nbreaks=50)
-        # generic histogram and vertical boxplot in a 3:1 layout
-        # returns the graphic file name for inclusion in the web page
-        # broken out here for reuse
-        # ross lazarus march 2007
-        """
-        screenmat = (1,2,1,2) # create a 2x2 cabvas
-        widthlist = (80,20) # change to 4:1 ratio for histo and boxplot
-        rpy.r.pdf( outfname, height , width  )
-        #rpy.r.layout(rpy.r.matrix(rpy.r.c(1,1,1,2), 1, 4, byrow = True)) # 3 to 1 vertical plot
-        m = rpy.r.matrix((1,1,1,2),nrow=1,ncol=4,byrow=True)
-        # in R, m = matrix(c(1,2),nrow=1,ncol=2,byrow=T)
-        rpy.r("layout(matrix(c(1,1,1,2),nrow=1,ncol=4,byrow=T))") # 4 to 1 vertical plot
-        maint = 'QC for %s - %s' % (basename,title)
-        rpy.r.hist(plotme,main=maint, xlab=xlabname,breaks=nbreaks,col="maroon",cex=0.8)
-        rpy.r.boxplot(plotme,main='',col="maroon",outline=False)
-        rpy.r.dev_off()
-
-    def rCum(plotme=[],outfname='',xlabname='',title='',basename='',nbreaks=100):
-        """
-        Useful to see what various cutoffs yield - plot percentiles
-        """
-        n = len(plotme)
-        maxveclen = 1000.0 # for reasonable pdf sizes!
-        yvec = copy.copy(plotme)
-        # arrives already in decending order of importance missingness or mendel count by subj or marker
-        xvec = range(n)
-        xvec = [100.0*(n-x)/n for x in xvec] # convert to centile
-        # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-        if n > maxveclen: # oversample part of the distribution
-            always = min(1000,n/20) # oversample smaller of lowest few hundred items or 5%
-            skip = int(n/maxveclen) # take 1 in skip to get about maxveclen points
-            samplei = [i for i in range(n) if (i % skip == 0) or (i < always)] # always oversample first sorted values
-            yvec = [yvec[i] for i in samplei] # always get first and last
-            xvec = [xvec[i] for i in samplei] # always get first and last
-        # need to supply the x axis or else rpy prints the freaking vector on the pdf - go figure
-        rpy.r.pdf( outfname, height , width  )
-        maint = 'QC for %s - %s' % (basename,title)
-        rpy.r("par(lab=c(10,10,10))") # so our grid is denser than the default 5
-        rpy.r.plot(xvec,yvec,type='p',main=maint, ylab=xlabname, xlab='Sample Percentile',col="maroon",cex=0.8)
-        rpy.r.grid(nx = None, ny = None, col = "lightgray", lty = "dotted")
-        rpy.r.dev_off()
-
-    def rQQ(plotme=[], outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        y is data for a qq plot and ends up on the x axis go figure
-        if sampling, oversample low values - all the top 1% ?
-        this version called with -log10 transformed hwe
-        """
-        nrows = len(plotme)
-        fn = float(nrows)
-        xvec = [-math.log10(x/fn) for x in range(1,(nrows+1))]
-        mx = [0,math.log10(fn)] # if 1000, becomes 3 for the null line
-        maxveclen = 3000
-        yvec = copy.copy(plotme)
-        if nrows > maxveclen:
-            # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-            # oversample part of the distribution
-            always = min(1000,nrows/20) # oversample smaller of lowest few hundred items or 5%
-            skip = int(nrows/float(maxveclen)) # take 1 in skip to get about maxveclen points
-            samplei = [i for i in range(nrows) if (i < always) or (i % skip == 0)]
-            # always oversample first sorted (here lowest) values
-            yvec = [yvec[i] for i in samplei] # always get first and last
-            xvec = [xvec[i] for i in samplei] # and sample xvec same way
-            maint='Log QQ Plot (random %d of %d)' % (len(yvec),nrows)
-        else:
-            maint='Log QQ Plot(n=%d)' % (nrows)
-        mx = [0,math.log10(fn)] # if 1000, becomes 3 for the null line
-        ylab = '%s' % xlabname
-        xlab = '-log10(Uniform 0-1)'
-        # need to supply the x axis or else rpy prints the freaking vector on the pdf - go figure
-        rpy.r.pdf( outfname, height , width  )
-        rpy.r("par(lab=c(10,10,10))") # so our grid is denser than the default 5
-        rpy.r.qqplot(xvec,yvec,xlab=xlab,ylab=ylab,main=maint,sub=title,pch=19,col="maroon",cex=0.8)
-        rpy.r.points(mx,mx,type='l')
-        rpy.r.grid(nx = None, ny = None, col = "lightgray", lty = "dotted")
-        rpy.r.dev_off()
-
-    def rMultiQQ(plotme = [],nsplits=5, outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        data must contain p,x,y as data for a qq plot, quantiles of x and y axis used to create a
-        grid of qq plots to show departure from null at extremes of data quality
-        Need to plot qqplot(p,unif) where the p's come from one x and one y quantile
-        and ends up on the x axis go figure
-        if sampling, oversample low values - all the top 1% ?
-        """
-        data = copy.copy(plotme)
-        nvals = len(data)
-        stepsize = nvals/nsplits
-        logstep = math.log10(stepsize) # so is 3 for steps of 1000
-        quints = range(0,nvals,stepsize) # quintile cutpoints for each layer
-        data.sort(key=itertools.itemgetter(1)) # into x order
-        rpy.r.pdf( outfname, height , width  )
-        rpy.r("par(mfrow = c(%d,%d))" % (nsplits,nsplits))
-        yvec = [-math.log10(random.random()) for x in range(stepsize)]
-        yvec.sort() # size of each step is expected range for xvec under null?!
-        for rowstart in quints:
-            rowend = rowstart + stepsize
-            if nvals - rowend < stepsize: # finish last split
-                rowend = nvals
-            row = data[rowstart:rowend]
-            row.sort(key=itertools.itemgetter(2)) # into y order
-            for colstart in quints:
-                colend = colstart + stepsize
-                if nvals - colend < stepsize: # finish last split
-                    colend = nvals
-                cell = row[colstart:colend]
-                xvec = [-math.log10(x[0]) for x in cell] # all the pvalues for this cell
-                rpy.r.qqplot(xvec,yvec,xlab=xlab,ylab=ylab,pch=19,col="maroon",cex=0.8)
-                rpy.r.points(c(0,logstep),c(0,logstep),type='l')
-        rpy.r.dev_off()
-
-
-    def rQQNorm(plotme=[], outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        y is data for a qqnorm plot
-        if sampling, oversample low values - all the top 1% ?
-        """
-        rangeunif = len(plotme)
-        nunif = 1000
-        maxveclen = 3000
-        nrows = len(plotme)
-        data = copy.copy(plotme)
-        if nrows > maxveclen:
-            # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-            # oversample part of the distribution
-            always = min(1000,nrows/20) # oversample smaller of lowest few hundred items or 5%
-            skip = int((nrows-always)/float(maxveclen)) # take 1 in skip to get about maxveclen points
-            samplei = [i for i in range(nrows) if (i % skip == 0) or (i < always)]
-            # always oversample first sorted (here lowest) values
-            yvec = [data[i] for i in samplei] # always get first and last
-            maint='Log QQ Plot (random %d of %d)' % (len(yvec),nrows)
-        else:
-            yvec = data
-            maint='Log QQ Plot(n=%d)' % (nrows)
-        n = 1000
-        ylab = '%s' % xlabname
-        xlab = 'Normal'
-        # need to supply the x axis or else rpy prints the freaking vector on the pdf - go figure
-        rpy.r.pdf( outfname, height , width  )
-        rpy.r("par(lab=c(10,10,10))") # so our grid is denser than the default 5
-        rpy.r.qqnorm(yvec,xlab=xlab,ylab=ylab,main=maint,sub=title,pch=19,col="maroon",cex=0.8)
-        rpy.r.grid(nx = None, ny = None, col = "lightgray", lty = "dotted")
-        rpy.r.dev_off()
-
-    def rMAFMissqq(plotme=[], outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        layout qq plots for pvalues within rows of increasing MAF and columns of increasing missingness
-        like the GAIN qc tools
-        y is data for a qq plot and ends up on the x axis go figure
-        if sampling, oversample low values - all the top 1% ?
-        """
-        rangeunif = len(plotme)
-        nunif = 1000
-        fn = float(rangeunif)
-        xvec = [-math.log10(x/fn) for x in range(1,(rangeunif+1))]
-        skip = max(int(rangeunif/fn),1)
-        # force include last points
-        mx = [0,math.log10(fn)] # if 1000, becomes 3 for the null line
-        maxveclen = 2000
-        nrows = len(plotme)
-        data = copy.copy(plotme)
-        data.sort() # low to high - oversample low values
-        if nrows > maxveclen:
-            # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-            # oversample part of the distribution
-            always = min(1000,nrows/20) # oversample smaller of lowest few hundred items or 5%
-            skip = int(nrows/float(maxveclen)) # take 1 in skip to get about maxveclen points
-            samplei = [i for i in range(nrows) if (i % skip == 0) or (i < always)]
-            # always oversample first sorted (here lowest) values
-            yvec = [data[i] for i in samplei] # always get first and last
-            xvec = [xvec[i] for i in samplei] # and sample xvec same way
-            maint='Log QQ Plot (random %d of %d)' % (len(yvec),nrows)
-        else:
-            yvec = data
-            maint='Log QQ Plot(n=%d)' % (nrows)
-        n = 1000
-        mx = [0,log10(fn)] # if 1000, becomes 3 for the null line
-        ylab = '%s' % xlabname
-        xlab = '-log10(Uniform 0-1)'
-        # need to supply the x axis or else rpy prints the freaking vector on the pdf - go figure
-        rpy.r.pdf( outfname, height , width  )
-        rpy.r("par(lab=c(10,10,10))") # so our grid is denser than the default 5
-        rpy.r.qqplot(xvec,yvec,xlab=xlab,ylab=ylab,main=maint,sub=title,pch=19,col="maroon",cex=0.8)
-        rpy.r.points(mx,mx,type='l')
-        rpy.r.grid(nx = None, ny = None, col = "lightgray", lty = "dotted")
-        rpy.r.dev_off()
-
-
-    fdsto,stofile = tempfile.mkstemp()
-    sto = open(stofile,'w')
-    import rpy # delay to avoid rpy stdout chatter replacing galaxy file blurb
-    mog = 'mogrify'
-    pdfnup = 'pdfnup'
-    pdfjoin = 'pdfjoin'
-    shead = subjects.pop(0) # get rid of head
-    mhead = markers.pop(0)
-    maf = mhead.index('maf')
-    missfrac = mhead.index('missfrac')
-    logphweall = mhead.index('logp_hwe_all')
-    logphweunaff = mhead.index('logp_hwe_unaff')
-    # check for at least some unaffected rml june 2009
-    m_mendel = mhead.index('N_Mendel')
-    fracmiss = shead.index('FracMiss')
-    s_mendel = shead.index('Mendel_errors')
-    s_het = shead.index('F_Stat')
-    params = {}
-    hweres = [float(x[logphweunaff]) for x in markers if len(x[logphweunaff]) >= logphweunaff
-         and x[logphweunaff].upper() <> 'NA']
-    if len(hweres) <> 0:
-        xs = [logphweunaff, missfrac, maf, m_mendel, fracmiss, s_mendel, s_het]
-        # plot for each of these cols
-    else: # try hwe all instead - maybe no affection status available
-        xs = [logphweall, missfrac, maf, m_mendel, fracmiss, s_mendel, s_het]
-    ordplotme = [1,1,1,1,1,1,1] # ordered plots for everything!
-    oreverseme = [1,1,0,1,1,1,0] # so larger values are oversampled
-    qqplotme = [1,0,0,0,0,0,0] #
-    qnplotme = [0,0,0,0,0,0,1] #
-    nplots = len(xs)
-    xlabnames = ['log(p) HWE (unaff)', 'Missing Rate: Markers', 'Minor Allele Frequency',
-                 'Marker Mendel Error Count', 'Missing Rate: Subjects',
-                 'Subject Mendel Error Count','Subject Inbreeding (het) F Statistic']
-    plotnames = ['logphweunaff', 'missfrac', 'maf', 'm_mendel', 'fracmiss', 's_mendel','s_het']
-    ploturls = ['%s_%s.pdf' % (basename,x) for x in plotnames] # real plotnames
-    ordplotnames = ['%s_cum' % x for x in plotnames]
-    ordploturls = ['%s_%s.pdf' % (basename,x) for x in ordplotnames] # real plotnames
-    outfnames = [os.path.join(newfpath,ploturls[x]) for x in range(nplots)]
-    ordoutfnames = [os.path.join(newfpath,ordploturls[x]) for x in range(nplots)]
-    datasources = [markers,markers,markers,markers,subjects,subjects,subjects] # use this table
-    titles = ["Marker HWE","Marker Missing Genotype", "Marker MAF","Marker Mendel",
-        "Subject Missing Genotype","Subject Mendel",'Subject F Statistic']
-    html = []
-    pdflist = []
-    for n,column in enumerate(xs):
-        dat = [float(x[column]) for x in datasources[n] if len(x) >= column
-               and x[column][:2].upper() <> 'NA'] # plink gives both!
-        if sum(dat) <> 0: # eg nada for mendel if case control?
-            rHist(plotme=dat,outfname=outfnames[n],xlabname=xlabnames[n],
-              title=titles[n],basename=basename,nbreaks=nbreaks)
-            row = [titles[n],ploturls[n],outfnames[n] ]
-            html.append(row)
-            pdflist.append(outfnames[n])
-            if ordplotme[n]: # for missingness, hwe - plots to see where cutoffs will end up
-                otitle = 'Ranked %s' % titles[n]
-                dat.sort()
-                if oreverseme[n]:
-                    dat.reverse()
-                rCum(plotme=dat,outfname=ordoutfnames[n],xlabname='Ordered %s' % xlabnames[n],
-                  title=otitle,basename=basename,nbreaks=1000)
-                row = [otitle,ordploturls[n],ordoutfnames[n]]
-                html.append(row)
-                pdflist.append(ordoutfnames[n])
-            if qqplotme[n]: #
-                otitle = 'LogQQ plot %s' % titles[n]
-                dat.sort()
-                dat.reverse()
-                ofn = os.path.split(ordoutfnames[n])
-                ofn = os.path.join(ofn[0],'QQ%s' % ofn[1])
-                ofu = os.path.split(ordploturls[n])
-                ofu = os.path.join(ofu[0],'QQ%s' % ofu[1])
-                rQQ(plotme=dat,outfname=ofn,xlabname='QQ %s' % xlabnames[n],
-                  title=otitle,basename=basename)
-                row = [otitle,ofu,ofn]
-                html.append(row)
-                pdflist.append(ofn)
-            elif qnplotme[n]:
-                otitle = 'F Statistic %s' % titles[n]
-                dat.sort()
-                dat.reverse()
-                ofn = os.path.split(ordoutfnames[n])
-                ofn = os.path.join(ofn[0],'FQNorm%s' % ofn[1])
-                ofu = os.path.split(ordploturls[n])
-                ofu = os.path.join(ofu[0],'FQNorm%s' % ofu[1])
-                rQQNorm(plotme=dat,outfname=ofn,xlabname='F QNorm %s' % xlabnames[n],
-                  title=otitle,basename=basename)
-                row = [otitle,ofu,ofn]
-                html.append(row)
-                pdflist.append(ofn)
-        else:
-            print '#$# no data for # %d - %s, data[:10]=%s' % (n,titles[n],dat[:10])
-    if nup>0:
-        # pdfjoin --outfile chr1test.pdf `ls database/files/dataset_396_files/*.pdf`
-        # pdfnup chr1test.pdf --nup 3x3 --frame true --outfile chr1test3.pdf
-        filestojoin = ' '.join(pdflist) # all the file names so far
-        afname = '%s_All_Paged.pdf' % (basename)
-        fullafname = os.path.join(newfpath,afname)
-        expl = 'All %s QC Plots joined into a single pdf' % basename
-        vcl = '%s %s --outfile %s ' % (pdfjoin,filestojoin, fullafname)
-        # make single page pdf
-        x=subprocess.Popen(vcl,shell=True,cwd=newfpath,stderr=sto,stdout=sto)
-        retval = x.wait()
-        row = [expl,afname,fullafname]
-        html.insert(0,row) # last rather than second
-        nfname = '%s_All_%dx%d.pdf' % (basename,nup,nup)
-        fullnfname = os.path.join(newfpath,nfname)
-        expl = 'All %s QC Plots %d by %d to a page' % (basename,nup,nup)
-        vcl = '%s %s --nup %dx%d --frame true --outfile %s' % (pdfnup,afname,nup,nup,fullnfname)
-        # make thumbnail images
-        x=subprocess.Popen(vcl,shell=True,cwd=newfpath,stderr=sto,stdout=sto)
-        retval = x.wait()
-        row = [expl,nfname,fullnfname]
-        html.insert(1,row) # this goes second
-    vcl = '%s -format jpg -resize %s %s' % (mog, mogresize, os.path.join(newfpath,'*.pdf'))
-    # make thumbnail images
-    x=subprocess.Popen(vcl,shell=True,cwd=newfpath,stderr=sto,stdout=sto)
-    retval = x.wait()
-    sto.close()
-    cruft = open(stofile,'r').readlines()
-    return html,cruft # elements for an ordered list of urls or whatever..
-
-
-def RmakePlots(markers=[],subjects=[],newfpath='.',basename='test',nbreaks='100',nup=3,height=8,width=10,rexe=''):
-    """
-    nice try but the R scripts are huge and take forever to run if there's a lot of data
-    marker rhead = ['snp','chrom','maf','a1','a2','missfrac',
-    'p_hwe_all','logp_hwe_all','p_hwe_unaff','logp_hwe_unaff','N_Mendel']
-    subject rhead = ['famId','iId','FracMiss','Mendel_errors','Ped_sex','SNP_sex','Status','Fest']
-    """
-    colour = "maroon"
-
-    def rHist(plotme='',outfname='',xlabname='',title='',basename='',nbreaks=nbreaks):
-        """   rHist <- function(plotme,froot,plotname,title,mfname,nbreaks=50)
-        # generic histogram and vertical boxplot in a 3:1 layout
-        # returns the graphic file name for inclusion in the web page
-        # broken out here for reuse
-        # ross lazarus march 2007
-        """
-        R = []
-        maint = 'QC for %s - %s' % (basename,title)
-        screenmat = (1,2,1,2) # create a 2x2 canvas
-        widthlist = (80,20) # change to 4:1 ratio for histo and boxplot
-        R.append('pdf("%s",h=%d,w=%d)' % (outfname,height,width))
-        R.append("layout(matrix(c(1,1,1,2),nrow=1,ncol=4,byrow=T))")
-        R.append("plotme = read.table(file='%s',head=F,sep='\t')" % plotme)
-        R.append('hist(plotme, main="%s",xlab="%s",breaks=%d,col="%s")' % (maint,xlabname,nbreaks,colour))
-        R.append('boxplot(plotme,main="",col="%s",outline=F)' % (colour) )
-        R.append('dev.off()')
-        return R
-
-    def rCum(plotme='',outfname='',xlabname='',title='',basename='',nbreaks=100):
-        """
-        Useful to see what various cutoffs yield - plot percentiles
-        """
-        R = []
-        n = len(plotme)
-        R.append("plotme = read.table(file='%s',head=T,sep='\t')" % plotme)
-        # arrives already in decending order of importance missingness or mendel count by subj or marker
-        maint = 'QC for %s - %s' % (basename,title)
-        R.append('pdf("%s",h=%d,w=%d)' % (outfname,height,width))
-        R.append("par(lab=c(10,10,10))")
-        R.append('plot(plotme$xvec,plotme$yvec,type="p",main="%s",ylab="%s",xlab="Sample Percentile",col="%s")' % (maint,xlabname,colour))
-        R.append('dev.off()')
-        return R
-
-    def rQQ(plotme='', outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        y is data for a qq plot and ends up on the x axis go figure
-        if sampling, oversample low values - all the top 1% ?
-        this version called with -log10 transformed hwe
-        """
-        R = []
-        nrows = len(plotme)
-        fn = float(nrows)
-        xvec = [-math.log10(x/fn) for x in range(1,(nrows+1))]
-        #mx = [0,math.log10(fn)] # if 1000, becomes 3 for the null line
-        maxveclen = 3000
-        yvec = copy.copy(plotme)
-        if nrows > maxveclen:
-            # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-            # oversample part of the distribution
-            always = min(1000,nrows/20) # oversample smaller of lowest few hundred items or 5%
-            skip = int(nrows/float(maxveclen)) # take 1 in skip to get about maxveclen points
-            if skip < 2:
-                skip = 2
-            samplei = [i for i in range(nrows) if (i < always) or (i % skip == 0)]
-            # always oversample first sorted (here lowest) values
-            yvec = [yvec[i] for i in samplei] # always get first and last
-            xvec = [xvec[i] for i in samplei] # and sample xvec same way
-            maint='Log QQ Plot (random %d of %d)' % (len(yvec),nrows)
-        else:
-            maint='Log QQ Plot(n=%d)' % (nrows)
-        mx = [0,math.log10(fn)] # if 1000, becomes 3 for the null line
-        ylab = '%s' % xlabname
-        xlab = '-log10(Uniform 0-1)'
-        # need to supply the x axis or else rpy prints the freaking vector on the pdf - go figure
-        x = ['%f' % x for x in xvec]
-        R.append('xvec = c(%s)' % ','.join(x))
-        y = ['%f' % x for x in yvec]
-        R.append('yvec = c(%s)' % ','.join(y))
-        R.append('mx = c(0,%f)' % (math.log10(fn)))
-        R.append('pdf("%s",h=%d,w=%d)' % (outfname,height,width))
-        R.append("par(lab=c(10,10,10))")
-        R.append('qqplot(xvec,yvec,xlab="%s",ylab="%s",main="%s",sub="%s",pch=19,col="%s",cex=0.8)' % (xlab,ylab,maint,title,colour))
-        R.append('points(mx,mx,type="l")')
-        R.append('grid(col="lightgray",lty="dotted")')
-        R.append('dev.off()')
-        return R
-
-    def rMultiQQ(plotme = '',nsplits=5, outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        data must contain p,x,y as data for a qq plot, quantiles of x and y axis used to create a
-        grid of qq plots to show departure from null at extremes of data quality
-        Need to plot qqplot(p,unif) where the p's come from one x and one y quantile
-        and ends up on the x axis go figure
-        if sampling, oversample low values - all the top 1% ?
-        """
-        data = copy.copy(plotme)
-        nvals = len(data)
-        stepsize = nvals/nsplits
-        logstep = math.log10(stepsize) # so is 3 for steps of 1000
-        R.append('mx = c(0,%f)' % logstep)
-        quints = range(0,nvals,stepsize) # quintile cutpoints for each layer
-        data.sort(key=itertools.itemgetter(1)) # into x order
-        #rpy.r.pdf( outfname, h , w  )
-        #rpy.r("par(mfrow = c(%d,%d))" % (nsplits,nsplits))
-        R.append('pdf("%s",h=%d,w=%d)' % (outfname,height,width))
-        yvec = [-math.log10(random.random()) for x in range(stepsize)]
-        yvec.sort() # size of each step is expected range for xvec under null?!
-        y = ['%f' % x for x in yvec]
-        R.append('yvec = c(%s)' % ','.join(y))
-        for rowstart in quints:
-            rowend = rowstart + stepsize
-            if nvals - rowend < stepsize: # finish last split
-                rowend = nvals
-            row = data[rowstart:rowend]
-            row.sort(key=itertools.itemgetter(2)) # into y order
-            for colstart in quints:
-                colend = colstart + stepsize
-                if nvals - colend < stepsize: # finish last split
-                    colend = nvals
-                cell = row[colstart:colend]
-                xvec = [-math.log10(x[0]) for x in cell] # all the pvalues for this cell
-                x = ['%f' % x for x in xvec]
-                R.append('xvec = c(%s)' % ','.join(x))
-                R.append('qqplot(xvec,yvec,xlab="%s",ylab="%s",main="%s",sub="%s",pch=19,col="%s",cex=0.8)' % (xlab,ylab,maint,title,colour))
-                R.append('points(mx,mx,type="l")')
-                R.append('grid(col="lightgray",lty="dotted")')
-                #rpy.r.qqplot(xvec,yvec,xlab=xlab,ylab=ylab,pch=19,col="maroon",cex=0.8)
-                #rpy.r.points(c(0,logstep),c(0,logstep),type='l')
-        R.append('dev.off()')
-        #rpy.r.dev_off()
-        return R
-
-
-    def rQQNorm(plotme=[], outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        y is data for a qqnorm plot
-        if sampling, oversample low values - all the top 1% ?
-        """
-        rangeunif = len(plotme)
-        nunif = 1000
-        maxveclen = 3000
-        nrows = len(plotme)
-        data = copy.copy(plotme)
-        if nrows > maxveclen:
-            # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-            # oversample part of the distribution
-            always = min(1000,nrows/20) # oversample smaller of lowest few hundred items or 5%
-            skip = int((nrows-always)/float(maxveclen)) # take 1 in skip to get about maxveclen points
-            samplei = [i for i in range(nrows) if (i % skip == 0) or (i < always)]
-            # always oversample first sorted (here lowest) values
-            yvec = [data[i] for i in samplei] # always get first and last
-            maint='Log QQ Plot (random %d of %d)' % (len(yvec),nrows)
-        else:
-            yvec = data
-            maint='Log QQ Plot(n=%d)' % (nrows)
-        n = 1000
-        ylab = '%s' % xlabname
-        xlab = 'Normal'
-        # need to supply the x axis or else rpy prints the freaking vector on the pdf - go figure
-        #rpy.r.pdf( outfname, h , w  )
-        #rpy.r("par(lab=c(10,10,10))") # so our grid is denser than the default 5
-        #rpy.r.qqnorm(yvec,xlab=xlab,ylab=ylab,main=maint,sub=title,pch=19,col="maroon",cex=0.8)
-        #rpy.r.grid(nx = None, ny = None, col = "lightgray", lty = "dotted")
-        #rpy.r.dev_off()
-        y = ['%f' % x for x in yvec]
-        R.append('yvec = c(%s)' % ','.join(y))
-        R.append('pdf("%s",h=%d,w=%d)' % (outfname,height,width))
-        R.append("par(lab=c(10,10,10))")
-        R.append('qqnorm(yvec,xlab="%s",ylab="%s",main="%s",sub="%s",pch=19,col="%s",cex=0.8)' % (xlab,ylab,maint,title,colour))
-        R.append('grid(col="lightgray",lty="dotted")')
-        R.append('dev.off()')
-        return R
-
-    def rMAFMissqq(plotme=[], outfname='fname',title='title',xlabname='Sample',basename=''):
-        """
-        layout qq plots for pvalues within rows of increasing MAF and columns of increasing missingness
-        like the GAIN qc tools
-        y is data for a qq plot and ends up on the x axis go figure
-        if sampling, oversample low values - all the top 1% ?
-        """
-        rangeunif = len(plotme)
-        nunif = 1000
-        fn = float(rangeunif)
-        xvec = [-math.log10(x/fn) for x in range(1,(rangeunif+1))]
-        skip = max(int(rangeunif/fn),1)
-        # force include last points
-        mx = [0,math.log10(fn)] # if 1000, becomes 3 for the null line
-        maxveclen = 2000
-        nrows = len(plotme)
-        data = copy.copy(plotme)
-        data.sort() # low to high - oversample low values
-        if nrows > maxveclen:
-            # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-            # oversample part of the distribution
-            always = min(1000,nrows/20) # oversample smaller of lowest few hundred items or 5%
-            skip = int(nrows/float(maxveclen)) # take 1 in skip to get about maxveclen points
-            samplei = [i for i in range(nrows) if (i % skip == 0) or (i < always)]
-            # always oversample first sorted (here lowest) values
-            yvec = [data[i] for i in samplei] # always get first and last
-            xvec = [xvec[i] for i in samplei] # and sample xvec same way
-            maint='Log QQ Plot (random %d of %d)' % (len(yvec),nrows)
-        else:
-            yvec = data
-            maint='Log QQ Plot(n=%d)' % (nrows)
-        n = 1000
-        mx = [0,log10(fn)] # if 1000, becomes 3 for the null line
-        ylab = '%s' % xlabname
-        xlab = '-log10(Uniform 0-1)'
-        R.append('mx = c(0,%f)' % (math.log10(fn)))
-        x = ['%f' % x for x in xvec]
-        R.append('xvec = c(%s)' % ','.join(x))
-        y = ['%f' % x for x in yvec]
-        R.append('yvec = c(%s)' % ','.join(y))
-        R.append('pdf("%s",h=%d,w=%d)' % (outfname,height,width))
-        R.append("par(lab=c(10,10,10))")
-        R.append('qqplot(xvec,yvec,xlab="%s",ylab="%s",main="%s",sub="%s",pch=19,col="%s",cex=0.8)' % (xlab,ylab,maint,title,colour))
-        R.append('points(mx,mx,type="l")')
-        R.append('grid(col="lightgray",lty="dotted")')
-        R.append('dev.off()')
-
-
-    shead = subjects.pop(0) # get rid of head
-    mhead = markers.pop(0)
-    maf = mhead.index('maf')
-    missfrac = mhead.index('missfrac')
-    logphweall = mhead.index('logp_hwe_all')
-    logphweunaff = mhead.index('logp_hwe_unaff')
-    # check for at least some unaffected rml june 2009
-    m_mendel = mhead.index('N_Mendel')
-    fracmiss = shead.index('FracMiss')
-    s_mendel = shead.index('Mendel_errors')
-    s_het = shead.index('F_Stat')
-    params = {}
-    h = [float(x[logphweunaff]) for x in markers if len(x[logphweunaff]) >= logphweunaff
-         and x[logphweunaff].upper() <> 'NA']
-    if len(h) <> 0:
-        xs = [logphweunaff, missfrac, maf, m_mendel, fracmiss, s_mendel, s_het]
-        # plot for each of these cols
-    else: # try hwe all instead - maybe no affection status available
-        xs = [logphweall, missfrac, maf, m_mendel, fracmiss, s_mendel, s_het]
-    ordplotme = [1,1,1,1,1,1,1] # ordered plots for everything!
-    oreverseme = [1,1,0,1,1,1,0] # so larger values are oversampled
-    qqplotme = [1,0,0,0,0,0,0] #
-    qnplotme = [0,0,0,0,0,0,1] #
-    nplots = len(xs)
-    xlabnames = ['log(p) HWE (unaff)', 'Missing Rate: Markers', 'Minor Allele Frequency',
-                 'Marker Mendel Error Count', 'Missing Rate: Subjects',
-                 'Subject Mendel Error Count','Subject Inbreeding (het) F Statistic']
-    plotnames = ['logphweunaff', 'missfrac', 'maf', 'm_mendel', 'fracmiss', 's_mendel','s_het']
-    ploturls = ['%s_%s.pdf' % (basename,x) for x in plotnames] # real plotnames
-    ordplotnames = ['%s_cum' % x for x in plotnames]
-    ordploturls = ['%s_%s.pdf' % (basename,x) for x in ordplotnames] # real plotnames
-    outfnames = [os.path.join(newfpath,ploturls[x]) for x in range(nplots)]
-    ordoutfnames = [os.path.join(newfpath,ordploturls[x]) for x in range(nplots)]
-    datasources = [markers,markers,markers,markers,subjects,subjects,subjects] # use this table
-    titles = ["Marker HWE","Marker Missing Genotype", "Marker MAF","Marker Mendel",
-        "Subject Missing Genotype","Subject Mendel",'Subject F Statistic']
-    html = []
-    pdflist = []
-    R = []
-    for n,column in enumerate(xs):
-        dfn = '%d_%s.txt' % (n,titles[n])
-        dfilepath = os.path.join(newfpath,dfn)
-        dat = [float(x[column]) for x in datasources[n] if len(x) >= column
-               and x[column][:2].upper() <> 'NA'] # plink gives both!
-        if sum(dat) <> 0: # eg nada for mendel if case control?
-            plotme = file(dfilepath,'w')
-            plotme.write('\n'.join(['%f' % x for x in dat])) # pass as a file - copout!
-            tR = rHist(plotme=dfilepath,outfname=outfnames[n],xlabname=xlabnames[n],
-              title=titles[n],basename=basename,nbreaks=nbreaks)
-            R += tR
-            row = [titles[n],ploturls[n],outfnames[n] ]
-            html.append(row)
-            pdflist.append(outfnames[n])
-            if ordplotme[n]: # for missingness, hwe - plots to see where cutoffs will end up
-                otitle = 'Ranked %s' % titles[n]
-                dat.sort()
-                if oreverseme[n]:
-                    dat.reverse()
-                    ndat = len(dat)
-                    xvec = range(ndat)
-                    xvec = [100.0*(n-x)/n for x in xvec] # convert to centile
-                    # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-                    maxveclen = 1000.0 # for reasonable pdf sizes!
-                    if ndat > maxveclen: # oversample part of the distribution
-                        always = min(1000,ndat/20) # oversample smaller of lowest few hundred items or 5%
-                        skip = int(ndat/maxveclen) # take 1 in skip to get about maxveclen points
-                        samplei = [i for i in range(ndat) if (i % skip == 0) or (i < always)] # always oversample first sorted values
-                        yvec = [yvec[i] for i in samplei] # always get first and last
-                        xvec = [xvec[i] for i in samplei] # always get first and last
-                        plotme = file(dfilepath,'w')
-                        plotme.write('xvec\tyvec\n')
-                        plotme.write('\n'.join(['%f\t%f' % (xvec[i],y) for y in yvec])) # pass as a file - copout!
-                tR = rCum(plotme=dat,outfname=ordoutfnames[n],xlabname='Ordered %s' % xlabnames[n],
-                  title=otitle,basename=basename,nbreaks=nbreaks)
-                R += tR
-                row = [otitle,ordploturls[n],ordoutfnames[n]]
-                html.append(row)
-                pdflist.append(ordoutfnames[n])
-            if qqplotme[n]: #
-                otitle = 'LogQQ plot %s' % titles[n]
-                dat.sort()
-                dat.reverse()
-                ofn = os.path.split(ordoutfnames[n])
-                ofn = os.path.join(ofn[0],'QQ%s' % ofn[1])
-                ofu = os.path.split(ordploturls[n])
-                ofu = os.path.join(ofu[0],'QQ%s' % ofu[1])
-                tR = rQQ(plotme=dat,outfname=ofn,xlabname='QQ %s' % xlabnames[n],
-                  title=otitle,basename=basename)
-                R += tR
-                row = [otitle,ofu,ofn]
-                html.append(row)
-                pdflist.append(ofn)
-            elif qnplotme[n]:
-                otitle = 'F Statistic %s' % titles[n]
-                dat.sort()
-                dat.reverse()
-                ofn = os.path.split(ordoutfnames[n])
-                ofn = os.path.join(ofn[0],'FQNorm%s' % ofn[1])
-                ofu = os.path.split(ordploturls[n])
-                ofu = os.path.join(ofu[0],'FQNorm%s' % ofu[1])
-                tR = rQQNorm(plotme=dat,outfname=ofn,xlabname='F QNorm %s' % xlabnames[n],
-                  title=otitle,basename=basename)
-                R += tR
-                row = [otitle,ofu,ofn]
-                html.append(row)
-                pdflist.append(ofn)
-        else:
-            print '#$# no data for # %d - %s, data[:10]=%s' % (n,titles[n],dat[:10])
-    rlog,flist = RRun(rcmd=R,title='makeQCplots',outdir=newfpath)
-    if nup>0:
-        # pdfjoin --outfile chr1test.pdf `ls database/files/dataset_396_files/*.pdf`
-        # pdfnup chr1test.pdf --nup 3x3 --frame true --outfile chr1test3.pdf
-        filestojoin = ' '.join(pdflist) # all the file names so far
-        afname = '%s_All_Paged.pdf' % (basename)
-        fullafname = os.path.join(newfpath,afname)
-        expl = 'All %s QC Plots joined into a single pdf' % basename
-        vcl = 'pdfjoin %s --outfile %s ' % (filestojoin, fullafname)
-        # make single page pdf
-        x=subprocess.Popen(vcl,shell=True,cwd=newfpath)
-        retval = x.wait()
-        row = [expl,afname,fullafname]
-        html.insert(0,row) # last rather than second
-        nfname = '%s_All_%dx%d.pdf' % (basename,nup,nup)
-        fullnfname = os.path.join(newfpath,nfname)
-        expl = 'All %s QC Plots %d by %d to a page' % (basename,nup,nup)
-        vcl = 'pdfnup %s --nup %dx%d --frame true --outfile %s' % (afname,nup,nup,fullnfname)
-        # make thumbnail images
-        x=subprocess.Popen(vcl,shell=True,cwd=newfpath)
-        retval = x.wait()
-        row = [expl,nfname,fullnfname]
-        html.insert(1,row) # this goes second
-    vcl = 'mogrify -format jpg -resize %s %s' % (mogresize, os.path.join(newfpath,'*.pdf'))
-    # make thumbnail images
-    x=subprocess.Popen(vcl,shell=True,cwd=newfpath)
-    retval = x.wait()
-    return html # elements for an ordered list of urls or whatever..
-
-def countHet(bedf='fakeped_500000',linkageped=True,froot='fake500k',outfname="ahetf",logf='rgQC.log'):
-    """
-    NO LONGER USED - historical interest
-    count het loci for each subject to look for outliers = ? contamination
-    assume ped file is linkage format
-    need to make a ped file from the bed file we were passed
-    """
-    vcl = [plinke,'--bfile',bedf,'--recode','--out','%s_recode' % froot] # write a recoded ped file from the real .bed file
-    p=subprocess.Popen(' '.join(vcl),shell=True)
-    retval = p.wait()
-    f = open('%s_recode.recode.ped' % froot,'r')
-    if not linkageped:
-        head = f.next() # throw away header
-    hets = [] # simple count of het loci per subject. Expect poisson?
-    n = 1
-    for l in f:
-        n += 1
-        ll = l.strip().split()
-        if len(ll) > 6:
-            iid = idjoiner.join(ll[:2]) # fam_iid
-            gender = ll[4]
-            alleles = ll[6:]
-            nallele = len(alleles)
-            nhet = 0
-            for i in range(nallele/2):
-                a1=alleles[2*i]
-                a2=alleles[2*i+1]
-                if alleles[2*i] <> alleles[2*i+1]: # must be het
-                    if not missvals.get(a1,None) and not missvals.get(a2,None):
-                        nhet += 1
-            hets.append((nhet,iid,gender)) # for sorting later
-    f.close()
-    hets.sort()
-    hets.reverse() # biggest nhet now on top
-    f = open(outfname ,'w')
-    res = ['%d\t%s\t%s' % (x) for x in hets] # I love list comprehensions
-    res.insert(0,'nhetloci\tfamid_iid\tgender')
-    res.append('')
-    f.write('\n'.join(res))
-    f.close()
-
-
-
-def subjectRep(froot='cleantest',outfname="srep",newfpath='.',logf = None):
-    """by subject (missingness = .imiss, mendel = .imendel)
-    assume replicates have an underscore in family id for
-    hapmap testing
-    For sorting, we need floats and integers
-    """
-    isexfile = '%s.sexcheck' % froot
-    imissfile = '%s.imiss' % froot
-    imendfile = '%s.imendel' % froot
-    ihetfile = '%s.het' % froot
-    logf.write('## subject reports starting at %s\n' % timenow())
-    outfile = os.path.join(newfpath,outfname)
-    idlist = []
-    imissdict = {}
-    imenddict = {}
-    isexdict = {}
-    ihetdict = {}
-    Tops = {}
-    Tnames = ['Ranked Subject Missing Genotype', 'Ranked Subject Mendel',
-              'Ranked Sex check', 'Ranked Inbreeding (het) F statistic']
-    Tsorts = [2,3,6,8]
-    Treverse = [True,True,True,False] # so first values are worser
-    #rhead = ['famId','iId','FracMiss','Mendel_errors','Ped_sex','SNP_sex','Status','Fest']
-    ##              FID            IID MISS_PHENO   N_MISS   N_GENO   F_MISS
-    ##  1552042370_A   1552042370_A          N     5480   549883 0.009966
-    ##  1552042410_A   1552042410_A          N     1638   549883 0.002979
-
-    # ------------------missing--------------------
-    # imiss has FID  IID MISS_PHENO N_MISS  F_MISS
-    # we want F_MISS
-    try:
-        f = open(imissfile,'r')
-    except:
-        logf.write('# file %s is missing - talk about irony\n' % imissfile)
-        f = None
-    if f:
-        for n,line in enumerate(f):
-            ll = line.strip().split()
-            if n == 0:
-                head = [x.upper() for x in ll] # expect above
-                fidpos = head.index('FID')
-                iidpos = head.index('IID')
-                fpos = head.index('F_MISS')
-            elif len(ll) >= fpos: # full line
-                fid = ll[fidpos]
-                #if fid.find('_') == -1: # not replicate! - icondb ids have these...
-                iid = ll[iidpos]
-                fmiss = ll[fpos]
-                id = '%s%s%s' % (fid,idjoiner,iid)
-                imissdict[id] = fmiss
-                idlist.append(id)
-        f.close()
-    logf.write('## imissfile %s contained %d ids\n' % (imissfile,len(idlist)))
-    # ------------------mend-------------------
-    # *.imendel has FID  IID   N
-    # we want N
-    gotmend = True
-    try:
-        f = open(imendfile,'r')
-    except:
-        gotmend = False
-        for id in idlist:
-            imenddict[id] = '0'
-    if gotmend:
-        for n,line in enumerate(f):
-            ll = line.strip().split()
-            if n == 0:
-                head = [x.upper() for x in ll] # expect above
-                npos = head.index('N')
-                fidpos = head.index('FID')
-                iidpos = head.index('IID')
-            elif len(ll) >= npos: # full line
-                fid = ll[fidpos]
-                iid = ll[iidpos]
-                id = '%s%s%s' % (fid,idjoiner,iid)
-                nmend = ll[npos]
-                imenddict[id] = nmend
-        f.close()
-    else:
-        logf.write('## error No %s file - assuming not family data\n' % imendfile)
-    # ------------------sex check------------------
-    #[rerla@hg fresh]$ head /home/rerla/fresh/database/files/dataset_978_files/CAMP2007Dirty.sexcheck
-    # sexcheck has FID IID PEDSEX SNPSEX STATUS F
-    ##
-    ##     FID     Family ID
-    ##     IID     Individual ID
-    ##     PEDSEX  Sex as determined in pedigree file (1=male, 2=female)
-    ##     SNPSEX  Sex as determined by X chromosome
-    ##     STATUS  Displays "PROBLEM" or "OK" for each individual
-    ##     F       The actual X chromosome inbreeding (homozygosity) estimate
-    ##
-    ##    A PROBLEM arises if the two sexes do not match, or if the SNP data or pedigree data are
-    ##    ambiguous with regard to sex.
-    ##    A male call is made if F is more than 0.8; a femle call is made if F is less than 0.2.
-    try:
-        f = open(isexfile,'r')
-        got_sexcheck = 1
-    except:
-        got_sexcheck = 0
-    if got_sexcheck:
-        for n,line in enumerate(f):
-            ll = line.strip().split()
-            if n == 0:
-                head = [x.upper() for x in ll] # expect above
-                fidpos = head.index('FID')
-                iidpos = head.index('IID')
-                pedsexpos = head.index('PEDSEX')
-                snpsexpos = head.index('SNPSEX')
-                statuspos = head.index('STATUS')
-                fpos = head.index('F')
-            elif len(ll) >= fpos: # full line
-                fid = ll[fidpos]
-                iid = ll[iidpos]
-                fest = ll[fpos]
-                pedsex = ll[pedsexpos]
-                snpsex = ll[snpsexpos]
-                stat = ll[statuspos]
-                id = '%s%s%s' % (fid,idjoiner,iid)
-                isexdict[id] = (pedsex,snpsex,stat,fest)
-        f.close()
-    else:
-        # this only happens if there are no subjects!
-        logf.write('No %s file - assuming no sex errors' % isexfile)
-    ##
-    ##    FID  IID       O(HOM)       E(HOM)        N(NM)            F
-    ##    457    2       490665    4.928e+05       722154    -0.009096
-    ##    457    3       464519     4.85e+05       710986      -0.0908
-    ##   1037    2       461632    4.856e+05       712025       -0.106
-    ##   1037    1       491845    4.906e+05       719353     0.005577
-    try:
-        f = open(ihetfile,'r')
-    except:
-        f = None
-        logf.write('## No %s file - did we run --het in plink?\n' % ihetfile)
-    if f:
-        for i,line in enumerate(f):
-            ll = line.strip().split()
-            if i == 0:
-                head = [x.upper() for x in ll] # expect above
-                fidpos = head.index('FID')
-                iidpos = head.index('IID')
-                fpos = head.index('F')
-                n = 0
-            elif len(ll) >= fpos: # full line
-                fid = ll[fidpos]
-                iid = ll[iidpos]
-                fhet = ll[fpos]
-                id = '%s%s%s' % (fid,idjoiner,iid)
-                ihetdict[id] = fhet
-        f.close()      # now assemble and output result list
-    rhead = ['famId','iId','FracMiss','Mendel_errors','Ped_sex','SNP_sex','Status','XHomEst','F_Stat']
-    res = []
-    fres = [] # floats for sorting
-    for id in idlist: # for each snp in found order
-        fid,iid = id.split(idjoiner) # recover keys
-        f_missing = imissdict.get(id,'0.0')
-        nmend = imenddict.get(id,'0')
-        (pedsex,snpsex,status,fest) = isexdict.get(id,('0','0','0','0.0'))
-        fhet = ihetdict.get(id,'0.0')
-        res.append([fid,iid,f_missing,nmend,pedsex,snpsex,status,fest,fhet])
-        try:
-            ff_missing = float(f_missing)
-        except:
-            ff_missing = 0.0
-        try:
-            inmend = int(nmend)
-        except:
-            inmend = 0
-        try:
-            ffest = float(fest)
-        except:
-            fest = 0.0
-        try:
-            ffhet = float(fhet)
-        except:
-            ffhet = 0.0
-        fres.append([fid,iid,ff_missing,inmend,pedsex,snpsex,status,ffest,ffhet])
-    ntokeep = max(20,len(res)/keepfrac)
-    for i,col in enumerate(Tsorts):
-        fres.sort(key=operator.itemgetter(col))
-        if Treverse[i]:
-            fres.reverse()
-        repname = Tnames[i]
-        Tops[repname] = fres[0:ntokeep]
-        Tops[repname] = [map(str,x) for x in Tops[repname]]
-        Tops[repname].insert(0,rhead)
-    res.sort()
-    res.insert(0,rhead)
-    logf.write('### writing %s report with %s' % ( outfile,res[0]))
-    f = open(outfile,'w')
-    f.write('\n'.join(['\t'.join(x) for x in res]))
-    f.write('\n')
-    f.close()
-    return res,Tops
-
-def markerRep(froot='cleantest',outfname="mrep",newfpath='.',logf=None,maplist=None ):
-    """by marker (hwe = .hwe, missingness=.lmiss, freq = .frq)
-    keep a list of marker order but keep all stats in dicts
-    write out a fake xls file for R or SAS etc
-    kinda clunky, but..
-    TODO: ensure stable if any file not found?
-    """
-    mapdict = {}
-    if maplist <> None:
-       rslist = [x[1] for x in maplist]
-       offset = [(x[0],x[3]) for x in maplist]
-       mapdict = dict(zip(rslist,offset))
-    hwefile = '%s.hwe' % froot
-    lmissfile = '%s.lmiss' % froot
-    freqfile = '%s.frq' % froot
-    lmendfile = '%s.lmendel' % froot
-    outfile = os.path.join(newfpath,outfname)
-    markerlist = []
-    chromlist = []
-    hwedict = {}
-    lmissdict = {}
-    freqdict = {}
-    lmenddict = {}
-    Tops = {}
-    Tnames = ['Ranked Marker MAF', 'Ranked Marker Missing Genotype', 'Ranked Marker HWE', 'Ranked Marker Mendel']
-    Tsorts = [3,6,10,11]
-    Treverse = [False,True,True,True] # so first values are worse(r)
-    #res.append([rs,chrom,offset,maf,a1,a2,f_missing,hwe_all[0],hwe_all[1],hwe_unaff[0],hwe_unaff[1],nmend])
-    #rhead = ['snp','chrom','maf','a1','a2','missfrac','p_hwe_all','logp_hwe_all','p_hwe_unaff','logp_hwe_unaff','N_Mendel']
-    # -------------------hwe--------------------------
-    #    hwe has SNP TEST  GENO   O(HET)   E(HET) P_HWD
-    # we want all hwe where P_HWD <> NA
-    # ah changed in 1.04 to
-    ##  CHR         SNP     TEST   A1   A2                 GENO   O(HET)   E(HET)            P
-    ##   1   rs6671164      ALL    2    3           34/276/613    0.299   0.3032       0.6644
-    ##   1   rs6671164      AFF    2    3                0/0/0      nan      nan           NA
-    ##   1   rs6671164    UNAFF    2    3           34/276/613    0.299   0.3032       0.6644
-    ##   1   rs4448553      ALL    2    3            8/176/748   0.1888   0.1848       0.5975
-    ##   1   rs4448553      AFF    2    3                0/0/0      nan      nan           NA
-    ##   1   rs4448553    UNAFF    2    3            8/176/748   0.1888   0.1848       0.5975
-    ##   1   rs1990150      ALL    1    3           54/303/569   0.3272   0.3453       0.1067
-    ##   1   rs1990150      AFF    1    3                0/0/0      nan      nan           NA
-    ##   1   rs1990150    UNAFF    1    3           54/303/569   0.3272   0.3453       0.1067
-    logf.write('## marker reports starting at %s\n' % timenow())
-    try:
-        f = open(hwefile,'r')
-    except:
-        f = None
-        logf.write('## error - no hwefile %s found\n' % hwefile)
-    if f:
-        for i,line in enumerate(f):
-            ll = line.strip().split()
-            if i == 0: # head
-                head = [x.upper() for x in ll] # expect above
-                try:
-                    testpos = head.index('TEST')
-                except:
-                    testpos = 2 # patch for 1.04 which has b0rken headers - otherwise use head.index('TEST')
-                try:
-                    ppos = head.index('P')
-                except:
-                    ppos = 8 # patch - for head.index('P')
-                snppos = head.index('SNP')
-                chrpos = head.index('CHR')
-                logf.write('hwe header testpos=%d,ppos=%d,snppos=%d\n' % (testpos,ppos,snppos))
-            elif len(ll) >= ppos: # full line
-                ps = ll[ppos].upper()
-                rs = ll[snppos]
-                chrom = ll[chrpos]
-                test = ll[testpos]
-                if not hwedict.get(rs,None):
-                    hwedict[rs] = {}
-                    markerlist.append(rs)
-                chromlist.append(chrom) # one place to find it?
-                lpvals = 0
-                if ps.upper() <> 'NA' and ps.upper() <> 'NAN': # worth keeping
-                    lpvals = '0'
-                    if ps <> '1':
-                        try:
-                            pval = float(ps)
-                            lpvals = '%f' % -math.log10(pval)
-                        except:
-                            pass
-                    hwedict[rs][test] = (ps,lpvals)
-            else:
-                logf.write('short line #%d = %s\n' % (i,ll))
-        f.close()
-    # ------------------missing--------------------
-    """lmiss has
-    CHR          SNP   N_MISS   N_GENO   F_MISS
-   1   rs12354060        0       73        0
-   1    rs4345758        1       73   0.0137
-   1    rs2691310       73       73        1
-   1    rs2531266       73       73        1
-    # we want F_MISS"""
-    try:
-        f = open(lmissfile,'r')
-    except:
-        f = None
-    if f:
-        for i,line in enumerate(f):
-            ll = line.strip().split()
-            if i == 0:
-                head = [x.upper() for x in ll] # expect above
-                fracpos = head.index('F_MISS')
-                npos = head.index('N_MISS')
-                snppos = head.index('SNP')
-            elif len(ll) >= fracpos: # full line
-                rs = ll[snppos]
-                fracval = ll[fracpos]
-                lmissdict[rs] = fracval # for now, just that?
-            else:
-                lmissdict[rs] = 'NA'
-        f.close()
-    # ------------------freq-------------------
-    # frq has CHR          SNP   A1   A2          MAF       NM
-    # we want maf
-    try:
-        f = open(freqfile,'r')
-    except:
-        f = None
-    if f:
-        for i,line in enumerate(f):
-            ll = line.strip().split()
-            if i == 0:
-                head = [x.upper() for x in ll] # expect above
-                mafpos = head.index('MAF')
-                a1pos = head.index('A1')
-                a2pos = head.index('A2')
-                snppos = head.index('SNP')
-            elif len(ll) >= mafpos: # full line
-                rs = ll[snppos]
-                a1 = ll[a1pos]
-                a2 = ll[a2pos]
-                maf = ll[mafpos]
-                freqdict[rs] = (maf,a1,a2)
-        f.close()
-    # ------------------mend-------------------
-    # lmend has CHR SNP   N
-    # we want N
-    gotmend = True
-    try:
-        f = open(lmendfile,'r')
-    except:
-        gotmend = False
-        for rs in markerlist:
-            lmenddict[rs] = '0'
-    if gotmend:
-        for i,line in enumerate(f):
-            ll = line.strip().split()
-            if i == 0:
-                head = [x.upper() for x in ll] # expect above
-                npos = head.index('N')
-                snppos = head.index('SNP')
-            elif len(ll) >= npos: # full line
-                rs = ll[snppos]
-                nmend = ll[npos]
-                lmenddict[rs] = nmend
-        f.close()
-    else:
-        logf.write('No %s file - assuming not family data\n' % lmendfile)
-    # now assemble result list
-    rhead = ['snp','chromosome','offset','maf','a1','a2','missfrac','p_hwe_all','logp_hwe_all','p_hwe_unaff','logp_hwe_unaff','N_Mendel']
-    res = []
-    fres = []
-    for rs in markerlist: # for each snp in found order
-        f_missing = lmissdict.get(rs,'NA')
-        maf,a1,a2 = freqdict.get(rs,('NA','NA','NA'))
-        hwe_all = hwedict[rs].get('ALL',('NA','NA')) # hope this doesn't change...
-        hwe_unaff = hwedict[rs].get('UNAFF',('NA','NA'))
-        nmend = lmenddict.get(rs,'NA')
-        (chrom,offset)=mapdict.get(rs,('?','0'))
-        res.append([rs,chrom,offset,maf,a1,a2,f_missing,hwe_all[0],hwe_all[1],hwe_unaff[0],hwe_unaff[1],nmend])
-    ntokeep = max(10,len(res)/keepfrac)
-
-    def msortk(item=None):
-        """
-        deal with non numeric sorting
-        """
-        try:
-           return float(item)
-        except:
-           return item
-
-    for i,col in enumerate(Tsorts):
-        res.sort(key=msortk(lambda x:x[col]))
-        if Treverse[i]:
-            res.reverse()
-        repname = Tnames[i]
-        Tops[repname] = res[0:ntokeep]
-        Tops[repname].insert(0,rhead)
-    res.sort(key=lambda x: '%s_%10d' % (x[1].ljust(4,'0'),int(x[2]))) # in chrom offset order
-    res.insert(0,rhead)
-    f = open(outfile,'w')
-    f.write('\n'.join(['\t'.join(x) for x in res]))
-    f.close()
-    return res,Tops
-
-
-
-
-def getfSize(fpath,outpath):
-    """
-    format a nice file size string
-    """
-    size = ''
-    fp = os.path.join(outpath,fpath)
-    if os.path.isfile(fp):
-        n = float(os.path.getsize(fp))
-        if n > 2**20:
-            size = ' (%1.1f MB)' % (n/2**20)
-        elif n > 2**10:
-            size = ' (%1.1f KB)' % (n/2**10)
-        elif n > 0:
-            size = ' (%d B)' % (int(n))
-    return size
-
-
-if __name__ == "__main__":
-    u = """ called in xml as
-     <command interpreter="python">
-        rgQC.py -i '$input_file.extra_files_path/$input_file.metadata.base_name' -o "$out_prefix"
-        -s '$html_file' -p '$html_file.files_path' -l '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
-        -r '${GALAXY_DATA_INDEX_DIR}/rg/bin/R'
-    </command>
-
-        Prepare a qc report - eg:
-    print "%s %s -i birdlped -o birdlped -l qc.log -s htmlf -m marker.xls -s sub.xls -p ./" % (sys.executable,prog)
-
-    """
-    progname = os.path.basename(sys.argv[0])
-    if len(sys.argv) < 9:
-        print '%s requires 6 parameters - got %d = %s' % (progname,len(sys.argv),sys.argv)
-        sys.exit(1)
-    parser = OptionParser(usage=u, version="%prog 0.01")
-    a = parser.add_option
-    a("-i","--infile",dest="infile")
-    a("-o","--oprefix",dest="opref")
-    a("-l","--plinkexe",dest="plinke", default=plinke)
-    a("-r","--rexe",dest="rexe", default=rexe)
-    a("-s","--snps",dest="htmlf")
-    #a("-m","--markerRaw",dest="markf")
-    #a("-r","--rawsubject",dest="subjf")
-    a("-p","--patho",dest="newfpath")
-    (options,args) = parser.parse_args()
-    basename = os.path.split(options.infile)[-1] # just want the file prefix to find the .xls files below
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    opref = options.opref.translate(trantab)
-    alogh,alog = tempfile.mkstemp(suffix='.txt')
-    plogh,plog = tempfile.mkstemp(suffix='.txt')
-    alogf = open(alog,'w')
-    plogf = open(plog,'w')
-    ahtmlf = options.htmlf
-    amarkf = 'MarkerDetails_%s.xls' % opref
-    asubjf = 'SubjectDetails_%s.xls' % opref
-    newfpath = options.newfpath
-    newfpath = os.path.realpath(newfpath)
-    try:
-       os.makedirs(newfpath)
-    except:
-       pass
-    ofn = basename
-    bfn = options.infile
-    try:
-       mapf = '%s.bim' % bfn
-       maplist = file(mapf,'r').readlines()
-       maplist = [x.split() for x in maplist]
-    except:
-       maplist = None
-       alogf.write('## error - cannot open %s to read map - no offsets will be available for output files')
-    #rerla@beast galaxy]$ head test-data/tinywga.bim
-    #22      rs2283802       0       21784722        4       2
-    #22      rs2267000       0       21785366        4       2
-    rgbin = os.path.split(rexe)[0] # get our rg bin path
-    #plinktasks = [' --freq',' --missing',' --mendel',' --hardy',' --check-sex'] # plink v1 fixes that bug!
-    # if we could, do all at once? Nope. Probably never.
-    plinktasks = [['--freq',],['--hwe 0.0', '--missing','--hardy'],
-    ['--mendel',],['--check-sex',]]
-    vclbase = [options.plinke,'--noweb','--out',basename,'--bfile',bfn,'--mind','1.0','--geno','1.0','--maf','0.0']
-    runPlink(logf=plogf,plinktasks=plinktasks,cd=newfpath, vclbase=vclbase)
-    plinktasks = [['--bfile',bfn,'--indep-pairwise 40 20 0.5','--out %s' % basename],
-    ['--bfile',bfn,'--extract %s.prune.in --make-bed --out ldp_%s' % (basename, basename)],
-                  ['--bfile ldp_%s --het --out %s' % (basename,basename)]]
-    # subset of ld independent markers for eigenstrat and other requirements
-    vclbase = [options.plinke,'--noweb']
-    plogout = pruneLD(plinktasks=plinktasks,cd=newfpath,vclbase = vclbase)
-    plogf.write('\n'.join(plogout))
-    plogf.write('\n')
-    repout = os.path.join(newfpath,basename)
-    subjects,subjectTops = subjectRep(froot=repout,outfname=asubjf,newfpath=newfpath,
-                logf=alogf) # writes the subject_froot.xls file
-    markers,markerTops = markerRep(froot=repout,outfname=amarkf,newfpath=newfpath,
-                logf=alogf,maplist=maplist) # marker_froot.xls
-    nbreaks = 100
-    s = '## starting plotpage, newfpath=%s,m=%s,s=%s/n' % (newfpath,markers[:2],subjects[:2])
-    alogf.write(s)
-    print s
-    plotpage,cruft = makePlots(markers=markers,subjects=subjects,newfpath=newfpath,
-                         basename=basename,nbreaks=nbreaks,height=10,width=8,rgbin=rgbin)
-    #plotpage = RmakePlots(markers=markers,subjects=subjects,newfpath=newfpath,basename=basename,nbreaks=nbreaks,rexe=rexe)
-
-    # [titles[n],plotnames[n],outfnames[n] ]
-    html = []
-    html.append('<table cellpadding="5" border="0">')
-    size = getfSize(amarkf,newfpath)
-    html.append('<tr><td colspan="3"><a href="%s" type="application/vnd.ms-excel">%s</a>%s tab delimited</td></tr>' % \
-                (amarkf,'Click here to download the Marker QC Detail report file',size))
-    size = getfSize(asubjf,newfpath)
-    html.append('<tr><td colspan="3"><a href="%s" type="application/vnd.ms-excel">%s</a>%s tab delimited</td></tr>' % \
-                (asubjf,'Click here to download the Subject QC Detail report file',size))
-    for (title,url,ofname) in plotpage:
-        ttitle = 'Ranked %s' % title
-        dat = subjectTops.get(ttitle,None)
-        if not dat:
-            dat = markerTops.get(ttitle,None)
-        imghref = '%s.jpg' % os.path.splitext(url)[0] # removes .pdf
-        thumbnail = os.path.join(newfpath,imghref)
-        if not os.path.exists(thumbnail): # for multipage pdfs, mogrify makes multiple jpgs - fugly hack
-            imghref = '%s-0.jpg' % os.path.splitext(url)[0] # try the first jpg
-            thumbnail = os.path.join(newfpath,imghref)
-        if not os.path.exists(thumbnail):
-            html.append('<tr><td colspan="3"><a href="%s">%s</a></td></tr>' % (url,title))
-        else:
-            html.append('<tr><td><a href="%s"><img src="%s" alt="%s" hspace="10" align="middle">' \
-                    % (url,imghref,title))
-            if dat: # one or the other - write as an extra file and make a link here
-                t = '%s.xls' % (ttitle.replace(' ','_'))
-                fname = os.path.join(newfpath,t)
-                f = file(fname,'w')
-                f.write('\n'.join(['\t'.join(x) for x in dat])) # the report
-                size = getfSize(t,newfpath)
-                html.append('</a></td><td>%s</td><td><a href="%s">Worst data</a>%s</td></tr>' % (title,t,size))
-            else:
-                html.append('</a></td><td>%s</td><td>&nbsp;</td></tr>' % (title))
-    html.append('</table><hr><h3>All output files from the QC run are available below</h3>')
-    html.append('<table cellpadding="5" border="0">\n')
-    flist = os.listdir(newfpath) # we want to catch 'em all
-    flist.sort()
-    for f in flist:
-        fname = os.path.split(f)[-1]
-        size = getfSize(fname,newfpath)
-        html.append('<tr><td><a href="%s">%s</a>%s</td></tr>' % (fname,fname,size))
-    html.append('</table>')
-    alogf.close()
-    plogf.close()
-    llog = open(alog,'r').readlines()
-    plogfile = open(plog,'r').readlines()
-    os.unlink(alog)
-    os.unlink(plog)
-    llog += plogfile # add lines from pruneld log
-    lf = file(ahtmlf,'w') # galaxy will show this as the default view
-    lf.write(galhtmlprefix % progname)
-    s = '\n<div>Output from Rgenetics QC report tool run at %s<br>\n' % (timenow())
-    lf.write('<h4>%s</h4>\n' % s)
-    lf.write('</div><div><h4>(Click any preview image to download a full sized PDF version)</h4><br><ol>\n')
-    lf.write('\n'.join(html))
-    lf.write('<h4>QC run log contents</h4>')
-    lf.write('<pre>%s</pre>' % (''.join(llog))) # plink logs
-    if len(cruft) > 0:
-        lf.write('<h2>Blather from pdfnup follows:</h2><pre>%s</pre>' % (''.join(cruft))) # pdfnup
-    lf.write('%s\n<hr>\n' % galhtmlpostfix)
-    lf.close()
-
--- a/tools/rgenetics/rgQC.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-<tool id="rgQC1" name="QC reports:">
-
-    <description>Marker and Subject measures</description>
-
-    <command interpreter="python">
-        rgQC.py -i '$input_file.extra_files_path/$input_file.metadata.base_name' -o "$title"
-        -s '$html_file' -p '$html_file.files_path'
-    </command>
-
-    <inputs>
-          <param name="input_file" type="data" label="RGenetics genotype file in compressed Plink format"
-          size="80" format="pbed" />
-       <param name="title" size="80" type="text" value="RgQC report" label="Descriptive report title"/>
-   </inputs>
-
-   <outputs>
-       <data format="html" name="html_file" metadata_source="input_file" label="${title}.html"/>
-   </outputs>
-
-<tests>
- <test>
-    <param name='input_file' value='tinywga' ftype='pbed' >
-    <metadata name='base_name' value='tinywga' />
-    <composite_data value='tinywga.bim' />
-    <composite_data value='tinywga.bed' />
-    <composite_data value='tinywga.fam' />
-    <edit_attributes type='name' value='tinywga' />
-    </param>
-    <param name='title' value='rgQCtest1' />
-    <output name='html_file' file='rgtestouts/rgQC/rgQCtest1.html' ftype='html' lines_diff='300'>
-    <param name="dbkey" value="hg18" />
-    <extra_files type="file" name='tinywga_All_Paged.pdf' value="rgtestouts/rgQC/tinywga_All_Paged.pdf" compare="sim_size" delta = "100000"/>
-    <extra_files type="file" name='tinywga.log' value="rgtestouts/rgQC/tinywga.log" compare="diff" lines_diff="15"/>
-    <extra_files type="file" name='tinywga.frq' value="rgtestouts/rgQC/tinywga.frq" compare="diff" />
-    <extra_files type="file" name='tinywga.het' value="rgtestouts/rgQC/tinywga.het" compare="diff" lines_diff="90"/>
-    <extra_files type="file" name='tinywga.hwe' value="rgtestouts/rgQC/tinywga.hwe" compare="diff" lines_diff="90"/>
-    <extra_files type="file" name='tinywga.imendel' value="rgtestouts/rgQC/tinywga.imendel" compare="diff"/>
-    <extra_files type="file" name='tinywga.imiss' value="rgtestouts/rgQC/tinywga.imiss" compare="diff" />
-    <extra_files type="file" name='tinywga.lmendel' value="rgtestouts/rgQC/tinywga.lmendel" compare="diff" />
-    <extra_files type="file" name='tinywga.lmiss' value="rgtestouts/rgQC/tinywga.lmiss" compare="diff" />
-    <extra_files type="file" name='tinywga_All_3x3.pdf' value="rgtestouts/rgQC/tinywga_All_3x3.pdf" compare="sim_size" delta="100000"/>
-    <extra_files type="file" name='ldp_tinywga.bed' value="rgtestouts/rgQC/ldp_tinywga.bed" compare="diff" lines_diff="10" />
-    <extra_files type="file" name='ldp_tinywga.bim' value="rgtestouts/rgQC/ldp_tinywga.bim" compare="sim_size" delta="1000" />
-    <extra_files type="file" name='ldp_tinywga.fam' value="rgtestouts/rgQC/ldp_tinywga.fam" compare="diff" />
-    <extra_files type="file" name='ldp_tinywga.log' value="rgtestouts/rgQC/ldp_tinywga.log" compare="diff" lines_diff="20"/>
-    <extra_files type="file" name='Ranked_Marker_HWE.xls' value="rgtestouts/rgQC/Ranked_Marker_HWE.xls" compare="diff" />
-    <extra_files type="file" name='Ranked_Marker_MAF.xls' value="rgtestouts/rgQC/Ranked_Marker_MAF.xls" compare="diff" />
-    <extra_files type="file" name='Ranked_Marker_Missing_Genotype.xls' value="rgtestouts/rgQC/Ranked_Marker_Missing_Genotype.xls" compare="diff" lines_diff="5"/>
-    <extra_files type="file" name='Ranked_Subject_Missing_Genotype.xls' value="rgtestouts/rgQC/Ranked_Subject_Missing_Genotype.xls" compare="diff" lines_diff="40"/>
-    <extra_files type="file" name='tinywga_fracmiss_cum.jpg' value="rgtestouts/rgQC/tinywga_fracmiss_cum.jpg" compare="sim_size" delta = "20000"/>
-    <extra_files type="file" name='tinywga_fracmiss_cum.pdf' value="rgtestouts/rgQC/tinywga_fracmiss_cum.pdf" compare="sim_size" delta = "100000"/>
- </output>
- </test>
-</tests>
- <help>
-
-.. class:: infomark
-
-**Summary**
-
-This tool prepares an extensive and comprehensive series of reports for quality control checking of SNP genotypes from any arbitrary
-genotyping experiment. Designed for family based data, so includes optional reports on Mendelian errors by
-subject and by marker.
-
-The outputs include histograms and boxplots for missingness, maf, mendel counts and hwe by marker, and the ones that make sense by
-subject. The report is built as a single web page containing links to the summary marker and subject files.
-
-The F (inbreeding) statistic is calculated using a somewhat LD independent group of genotypes
-The Plink used is --indep-pairwise 40 20 0.5 until we make it configurable.
-High heterozygosity might mean contaminated sample - more than one DNA. Low heterozygosity might mean inbreeding as in strains
-of mice.
-
-If the data file you want is missing from the option list above,
-you will first need to "import" it so it will be available here. Files available in the system library
-can be imported by selecting and completing the "Import ped/map" choice from the Get Data tool group at the top of the Galaxy
-menu. Your system administrator will be responsible for adding files to the system library.
-
------
-
-.. class:: infomark
-
-**Syntax**
-
-- **Genotype file** is the input pedfile -
-- **Prefix** is a string used to name all of the outputs
-
------
-
-**Attribution**
-
-This Galaxy tool was written by Ross Lazarus for the Rgenetics project
-The current version uses Plink for most calculations and R for plotting - for full Plink attribution, source code and documentation,
-please see http://pngu.mgh.harvard.edu/~purcell/plink/ while R attribution and source code can be found at http://r-project.org
-
-Shaun Purcell provides the documentation you need specific to those settings, at
-http://pngu.mgh.harvard.edu/~purcell/plink/anal.shtml#glm
-
-Tool and Galaxy datatypes originally designed and written for the Rgenetics
-series of whole genome scale statistical genetics tools by ross lazarus (ross.lazarus@gmail.com)
-Shaun Purcell created and maintains Plink, while a cast of many maintain R.
-
-Please acknowledge your use of this tool, Galaxy, R and Plink in your publications and let
-us know so we can keep track. These tools all rely on highly competitive grant funding
-so your letting us know about publications is important to our ongoing support.
-
-</help>
-
-
-
-</tool>
--- a/tools/rgenetics/rgQQ.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,365 +0,0 @@
-"""
-oct 2009 - multiple output files
-Dear Matthias,
-
-Yes, you can define number of outputs dynamically in Galaxy. For doing
-this, you'll have to declare one output dataset in your xml and pass
-its ID ($out_file.id) to your python script. Also, set
-force_history_refresh="True" in your tool tag in xml, like this:
-<tool id="split1" name="Split" force_history_refresh="True">
-In your script, if your outputs are named in the following format,
-primary_associatedWithDatasetID_designation_visibility_extension
-(_DBKEY), all your datasets will show up in the history pane.
-associatedWithDatasetID is the $out_file.ID passed from xml,
-designation will be a unique identifier for each output (set in your
-script),
-visibility can be set to visible if you want the dataset visible in
-your history, or notvisible otherwise
-extension is the required format for your dataset (bed, tabular, fasta
-etc)
-DBKEY is optional, and can be set if required (e.g. hg18, mm9 etc)
-
-One of our tools "MAF to Interval converter" (tools/maf/
-maf_to_interval.xml) already uses this feature. You can use it as a
-reference.
-
-qq.chisq Quantile-quantile plot for chi-squared tests
-Description
-This function plots ranked observed chi-squared test statistics against the corresponding expected
-order statistics. It also estimates an inflation (or deflation) factor, lambda, by the ratio of the trimmed
-means of observed and expected values. This is useful for inspecting the results of whole-genome
-association studies for overdispersion due to population substructure and other sources of bias or
-confounding.
-Usage
-qq.chisq(x, df=1, x.max, main="QQ plot",
-sub=paste("Expected distribution: chi-squared (",df," df)", sep=""),
-xlab="Expected", ylab="Observed",
-conc=c(0.025, 0.975), overdisp=FALSE, trim=0.5,
-slope.one=FALSE, slope.lambda=FALSE,
-thin=c(0.25,50), oor.pch=24, col.shade="gray", ...)
-Arguments
-x A vector of observed chi-squared test values
-df The degreees of freedom for the tests
-x.max If present, truncate the observed value (Y) axis here
-main The main heading
-sub The subheading
-xlab x-axis label (default "Expected")
-ylab y-axis label (default "Observed")
-conc Lower and upper probability bounds for concentration band for the plot. Set this
-to NA to suppress this
-overdisp If TRUE, an overdispersion factor, lambda, will be estimated and used in calculating
-concentration band
-trim Quantile point for trimmed mean calculations for estimation of lambda. Default
-is to trim at the median
-slope.one Is a line of slope one to be superimpsed?
-slope.lambda Is a line of slope lambda to be superimposed?
-thin A pair of numbers indicating how points will be thinned before plotting (see
-Details). If NA, no thinning will be carried out
-oor.pch Observed values greater than x.max are plotted at x.max. This argument sets
-the plotting symbol to be used for out-of-range observations
-col.shade The colour with which the concentration band will be filled
-... Further graphical parameter settings to be passed to points()
-
-Details
-To reduce plotting time and the size of plot files, the smallest observed and expected points are
-thinned so that only a reduced number of (approximately equally spaced) points are plotted. The
-precise behaviour is controlled by the parameter thin, whose value should be a pair of numbers.
-The first number must lie between 0 and 1 and sets the proportion of the X axis over which thinning
-is to be applied. The second number should be an integer and sets the maximum number of points
-to be plotted in this section.
-The "concentration band" for the plot is shown in grey. This region is defined by upper and lower
-probability bounds for each order statistic. The default is to use the 2.5 Note that this is not a
-simultaneous confidence region; the probability that the plot will stray outside the band at some
-point exceeds 95
-When required, he dispersion factor is estimated by the ratio of the observed trimmed mean to its
-expected value under the chi-squared assumption.
-Value
-The function returns the number of tests, the number of values omitted from the plot (greater than
-x.max), and the estimated dispersion factor, lambda.
-Note
-All tests must have the same number of degrees of freedom. If this is not the case, I suggest
-transforming to p-values and then plotting -2log(p) as chi-squared on 2 df.
-Author(s)
-David Clayton hdavid.clayton@cimr.cam.ac.uki
-References
-Devlin, B. and Roeder, K. (1999) Genomic control for association studies. Biometrics, 55:997-1004
-"""
-
-import sys, random, math, copy,os, subprocess, tempfile
-from rgutils import RRun, rexe
-
-rqq = """
-# modified by ross lazarus for the rgenetics project may 2000
-# makes a pdf for galaxy from an x vector of chisquare values
-# from snpMatrix
-# http://www.bioconductor.org/packages/bioc/html/snpMatrix.html
- qq.chisq <-
-  function(x, df=1, x.max,
-    main="QQ plot",
-    sub=paste("Expected distribution: chi-squared (",df," df)", sep=""),
-    xlab="Expected", ylab="Observed",
-    conc=c(0.025, 0.975), overdisp=FALSE, trim=0.5,
-    slope.one=T, slope.lambda=FALSE,
-    thin=c(0.5,200), oor.pch=24, col.shade="gray", ofname="qqchi.pdf",
-    h=6,w=6,printpdf=F,...) {
-
-    # Function to shade concentration band
-
-    shade <- function(x1, y1, x2, y2, color=col.shade) {
-      n <- length(x2)
-      polygon(c(x1, x2[n:1]), c(y1, y2[n:1]), border=NA, col=color)
-    }
-
-    # Sort values and see how many out of range
-
-    obsvd <- sort(x, na.last=NA)
-    N <- length(obsvd)
-    if (missing(x.max)) {
-      Np <- N
-    }
-    else {
-      Np <- sum(obsvd<=x.max)
-    }
-    if(Np==0)
-      stop("Nothing to plot")
-
-    # Expected values
-
-    if (df==2) {
-      expctd <- 2*cumsum(1/(N:1))
-    }
-    else {
-      expctd <- qchisq(p=(1:N)/(N+1), df=df)
-    }
-
-    # Concentration bands
-
-    if (!is.null(conc)) {
-      if(conc[1]>0) {
-        e.low <- qchisq(p=qbeta(conc[1], 1:N, N:1), df=df)
-      }
-      else {
-        e.low <- rep(0, N)
-      }
-      if (conc[2]<1) {
-        e.high <- qchisq(p=qbeta(conc[2], 1:N, N:1), df=df)
-      }
-      else {
-        e.high <- 1.1*rep(max(x),N)
-      }
-    }
-
-    # Plot outline
-
-    if (Np < N)
-      top <- x.max
-    else
-      top <- obsvd[N]
-    right <- expctd[N]
-    if (printpdf) {pdf(ofname,h,w)}
-    plot(c(0, right), c(0, top), type="n", xlab=xlab, ylab=ylab,
-         main=main, sub=sub)
-
-    # Thinning
-
-    if (is.na(thin[1])) {
-      show <- 1:Np
-    }
-    else if (length(thin)!=2 || thin[1]<0 || thin[1]>1 || thin[2]<1) {
-      warning("invalid thin parameter; no thinning carried out")
-      show <- 1:Np
-    }
-    else {
-      space <- right*thin[1]/floor(thin[2])
-      iat <- round((N+1)*pchisq(q=(1:floor(thin[2]))*space, df=df))
-      if (max(iat)>thin[2])
-        show <- unique(c(iat, (1+max(iat)):Np))
-      else
-        show <- 1:Np
-    }
-    Nu <- floor(trim*N)
-    if (Nu>0)
-      lambda <- mean(obsvd[1:Nu])/mean(expctd[1:Nu])
-    if (!is.null(conc)) {
-      if (Np<N)
-        vert <- c(show, (Np+1):N)
-      else
-        vert <- show
-      if (overdisp)
-        shade(expctd[vert], lambda*e.low[vert],
-              expctd[vert], lambda*e.high[vert])
-      else
-        shade(expctd[vert], e.low[vert], expctd[vert], e.high[vert])
-    }
-    points(expctd[show], obsvd[show], ...)
-    # Overflow
-    if (Np<N) {
-      over <- (Np+1):N
-      points(expctd[over], rep(x.max, N-Np), pch=oor.pch)
-    }
-    # Lines
-    line.types <- c("solid", "dashed", "dotted")
-    key <- NULL
-    txt <- NULL
-    if (slope.one) {
-      key <- c(key, line.types[1])
-      txt <- c(txt, "y = x")
-      abline(a=0, b=1, lty=line.types[1])
-    }
-    if (slope.lambda && Nu>0) {
-      key <- c(key, line.types[2])
-      txt <- c(txt, paste("y = ", format(lambda, digits=4), "x", sep=""))
-      if (!is.null(conc)) {
-        if (Np<N)
-          vert <- c(show, (Np+1):N)
-        else
-          vert <- show
-      }
-      abline(a=0, b=lambda, lty=line.types[2])
-    }
-    if (printpdf) {dev.off()}
-    # Returned value
-
-    if (!is.null(key))
-       legend(0, top, legend=txt, lty=key)
-    c(N=N, omitted=N-Np, lambda=lambda)
-
-  }
-
-"""
-
-
-
-
-def makeQQ(dat=[], sample=1.0, maxveclen=4000, fname='fname',title='title',
-           xvar='Sample',h=8,w=8,logscale=True,outdir=None):
-    """
-    y is data for a qq plot and ends up on the x axis go figure
-    if sampling, oversample low values - all the top 1% ?
-    assume we have 0-1 p values
-    """
-    R = []
-    colour="maroon"
-    nrows = len(dat)
-    dat.sort() # small to large
-    fn = float(nrows)
-    unifx = [x/fn for x in range(1,(nrows+1))]
-    if logscale:
-        unifx = [-math.log10(x) for x in unifx] # uniform distribution
-    if sample < 1.0 and len(dat) > maxveclen:
-        # now have half a million markers eg - too many to plot all for a pdf - sample to get 10k or so points
-        # oversample part of the distribution
-        always = min(1000,nrows/20) # oversample smaller of lowest few hundred items or 5%
-        skip = int(nrows/float(maxveclen)) # take 1 in skip to get about maxveclen points
-        if skip <= 1:
-            skip = 2
-        samplei = [i for i in range(nrows) if (i < always) or (i % skip == 0)]
-        # always oversample first sorted (here lowest) values
-        yvec = [dat[i] for i in samplei] # always get first and last
-        xvec = [unifx[i] for i in samplei] # and sample xvec same way
-        maint='QQ %s (random %d of %d)' % (title,len(yvec),nrows)
-    else:
-        yvec = [x for x in dat]
-        maint='QQ %s (n=%d)' % (title,nrows)
-        xvec = unifx
-    if logscale:
-        maint = 'Log%s' % maint
-        mx = [0,math.log10(nrows)] # if 1000, becomes 3 for the null line
-        ylab = '-log10(%s) Quantiles' % title
-        xlab = '-log10(Uniform 0-1) Quantiles'
-        yvec = [-math.log10(x) for x in yvec if x > 0.0]
-    else:
-        mx = [0,1]
-        ylab = '%s Quantiles' % title
-        xlab = 'Uniform 0-1 Quantiles'
-
-    xv = ['%f' % x for x in xvec]
-    R.append('xvec = c(%s)' % ','.join(xv))
-    yv = ['%f' % x for x in yvec]
-    R.append('yvec = c(%s)' % ','.join(yv))
-    R.append('mx = c(0,%f)' % (math.log10(fn)))
-    R.append('pdf("%s",h=%d,w=%d)' % (fname,h,w))
-    R.append("par(lab=c(10,10,10))")
-    R.append('qqplot(xvec,yvec,xlab="%s",ylab="%s",main="%s",sub="%s",pch=19,col="%s",cex=0.8)' % (xlab,ylab,maint,title,colour))
-    R.append('points(mx,mx,type="l")')
-    R.append('grid(col="lightgray",lty="dotted")')
-    R.append('dev.off()')
-    RRun(rcmd=R,title='makeQQplot',outdir=outdir)
-
-
-
-def main():
-    u = """
-    """
-    u = """<command interpreter="python">
-        rgQQ.py "$input1" "$name" $sample "$cols" $allqq $height $width $logtrans $allqq.id $__new_file_path__
-    </command>
-
-    </command>
-    """
-    print >> sys.stdout,'## rgQQ.py. cl=',sys.argv
-    npar = 11
-    if len(sys.argv) < npar:
-            print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar
-            print >> sys.stdout, u
-            sys.exit(1)
-    in_fname = sys.argv[1]
-    name = sys.argv[2]
-    sample = float(sys.argv[3])
-    head = None
-    columns = [int(x) for x in sys.argv[4].strip().split(',')] # work with python columns!
-    allout = sys.argv[5]
-    height = int(sys.argv[6])
-    width = int(sys.argv[7])
-    logscale = (sys.argv[8].lower() == 'true')
-    outid = sys.argv[9] # this is used to allow multiple output files
-    outdir = sys.argv[10]
-    nan_row = False
-    rows = []
-    for i, line in enumerate( file( sys.argv[1] ) ):
-        # Skip comments
-        if  line.startswith( '#' ) or ( i == 0 ):
-            if i == 0:
-                 head = line.strip().split("\t")
-            continue
-        if len(line.strip()) == 0:
-            continue
-        # Extract values and convert to floats
-        fields = line.strip().split( "\t" )
-        row = []
-        nan_row = False
-        for column in columns:
-            if len( fields ) <= column:
-                return fail( "No column %d on line %d: %s" % ( column, i, fields ) )
-            val = fields[column]
-            if val.lower() == "na":
-                nan_row = True
-            else:
-                try:
-                    row.append( float( fields[column] ) )
-                except ValueError:
-                    return fail( "Value '%s' in column %d on line %d is not numeric" % ( fields[column], column+1, i ) )
-        if not nan_row:
-           rows.append( row )
-    if i > 1:
-       i = i-1 # remove header row from count
-    if head == None:
-       head = ['Col%d' % (x+1) for x in columns]
-    R = []
-    for c,column in enumerate(columns): # we appended each column in turn
-        outname = allout
-        if c > 0: # after first time
-            outname = 'primary_%s_col%s_visible_pdf' % (outid,column)
-            outname = os.path.join(outdir,outname)
-        dat = []
-        nrows = len(rows) # sometimes lots of NA's!!
-        for arow in rows:
-           dat.append(arow[c]) # remember, we appended each col in turn!
-        cname = head[column]
-        makeQQ(dat=dat,sample=sample,fname=outname,title='%s_%s' % (name,cname),
-                   xvar=cname,h=height,w=width,logscale=logscale,outdir=outdir)
-
-
-
-if __name__ == "__main__":
-    main()
--- a/tools/rgenetics/rgQQ.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-<tool id="rgQQ1" name="QQ Plots:">
-    <code file="rgQQ_code.py"/>
-
-    <description>for p values from an analysis </description>
-
-    <command interpreter="python">
-        rgQQ.py "$input1" "$title" "$sample" "$cols" "$allqq" "$height" "$width" "$logtrans" "$allqq.id" "$__new_file_path__"
-    </command>
-
-    <inputs>
-       <page>
-       <param name="input1"  type="data" label="Choose the History dataset containing p values to QQ plot"
-          size="80" format="tabular" help="Dataset missing? See Tip below" />
-       <param name="title" type="text" size="80" label = "Descriptive title for QQ plot" value="QQ" />
-
-       <param name="logtrans" type="boolean" label = "Use a log scale - recommended for p values in range 0-1.0"
-          truevalue="true" falsevalue="false"/>
-       <param name="sample" type="float" label="Random sample fraction - set to 1.0 for all data points" value="0.01"
-        help="If you have a million values, the QQ plots will be huge - a random sample of 1% will be fine" />
-       <param name="height" type="integer" label="PDF image height (inches)" value="6" />
-       <param name="width" type="integer" label="PDF image width (inches)" value="6" />
-       </page>
-       <page>
-       <param name="cols" type="select" display="checkboxes" multiple="True"
-       help="Choose from these numeric columns in the data file to make a quantile-quantile plot against a uniform distribution"
-       label="Columns (p values 0-1 eg) to make QQ plots" dynamic_options="get_columns( input1 )" />
-       </page>
-   </inputs>
-
-   <outputs>
-       <data format="pdf" name="allqq" label="${title}.html"/>
-   </outputs>
-
-<tests>
- <test>
- <param name='input1' value='tinywga.pphe' />
- <param name='title' value="rgQQtest1" />
- <param name='logtrans' value="false" />
- <param name='sample' value='1.0' />
- <param name='height' value='8' />
- <param name='width' value='10' />
- <param name='cols' value='3' />
- <output name='allqq' file='rgQQtest1.pdf' ftype='binary' compare="diff" lines_diff="29"/>
- </test>
-</tests>
-
-<help>
-
-.. class:: infomark
-
-**Explanation**
-
-A quantile-quantile (QQ) plot is a good way to see systematic departures from the null expectation of uniform p-values
-from a genomic analysis. If the QQ plot shows departure from the null (ie a uniform 0-1 distribution), you hope that this will be
-in the very smallest p-values suggesting that there might be some interesting results to look at. A log scale will help emphasise departures
-from the null at low p values more clear
-
------
-
-.. class:: infomark
-
-**Syntax**
-
-This tool has 2 pages. On the first one you choose the data set and output options, then on the second page, the
-column names are shown so you can choose the one containing the p values you wish to plot.
-
-- **History data** is one of your history tabular data sets
-- **Descriptive Title** is the text to appear in the output file names to remind you what the plots are!
-- **Use a Log scale** is recommended for p values in the range 0-1 as it highlights departures from the null at small p values
-- **Random Sample Fraction** is the fraction of points to randomly sample - highly recommended for >5k or so values
-- **Height and Width** will determine the scale of the pdf images
-
-
------
-
-.. class:: infomark
-
-**Summary**
-
-Generate a uniform QQ plot for any large number of p values from an analysis.
-Essentially a plot of n ranked p values against their rank as a centile - ie rank/n
-
-Works well where you have a column containing p values from
-a statistical test of some sort. These will be plotted against the values expected under the null. Departure
-from the diagonal suggests one distribution is more extreme than the other. You hope your p values are
-smaller than expected under the null.
-
-The sampling fraction will help cut down the size of the pdfs. If there are fewer than 5k points on any plot, all will be shown.
-Otherwise the sampling fraction will be used or 5k, whichever is larger.
-
-Note that the use of a log scale is ill-advised if you are plotting log transformed p values because the
-uniform distribution chosen for the qq plot is always 0-1 and log transformation is applied if required.
-The most useful plots for p values are log QQ plots of untransformed p values in the range 0-1
-
-Originally designed and written for family based data from the CAMP Illumina run of 2007 by
-ross lazarus (ross.lazarus@gmail.com)
-
-</help>
-</tool>
--- a/tools/rgenetics/rgQQ_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-# before running the qc, need to rename various output files
-#       <data format="html" name="html_file" />
-#       <data format="txt" name="log_file" parent="html_file" />
-#       <data format="tabular" name="marker_file" parent="html_file" />
-#       <data format="tabular" name="subject_file" parent="html_file" />
-
-from galaxy import datatypes,model
-import sys,string
-
-def get_columns( input ):
-    columns = []
-    elems = []
-    if input and input.metadata.columns:
-        ncols = input.metadata.columns
-        colnames = ['Col%d' % x for x in range(1,ncols+1)]
-        for i, line in enumerate( file ( input.file_name ) ):
-            valid = True
-            if line and not line.startswith( '#' ):
-                line = line.rstrip('\r\n')
-                elems = line.split( '\t' )
-
-                """
-                Since this tool requires users to select only those columns
-                that contain numerical values, we'll restrict the column select
-                list appropriately.
-                """
-                if len(elems) > 0:
-                    for col in range(len(elems)): # zero offset
-                       if i == 0: # header row
-                          colnames[col] = elems[col]
-                       else:
-                          val = elems[col]
-                          try:
-                              val = float(val)
-                              valid = True
-                          except:
-                              valid = False
-                       if valid:
-                            option = colnames[col]
-                            columns.append((option,str(col),False))
-                if len(columns) > 0:
-                    """
-                    We have our select list built, so we can break out of the outer most for loop
-                    """
-                    break
-            if i == 30:
-                break # Hopefully we never get here...
-    else:
-        columns = [('?','?',False),]
-    return columns
--- a/tools/rgenetics/rgRegion.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-"""
-released under the terms of the LGPL
-copyright ross lazarus August 2007
-for the rgenetics project
-
-Special galaxy tool for the camp2007 data
-Allows grabbing arbitrary columns from an arbitrary region
-
-Needs a mongo results file in the location hardwired below or could be passed in as
-a library parameter - but this file must have a very specific structure
-rs chrom offset float1...floatn
-
-called as
-    <command interpreter="python">
-        rsRegion.py $infile '$cols' $r $tag $out_file1
-    </command>
-
-cols is a delimited list of chosen column names for the subset
-r is a ucsc location region pasted into the tool
-
-"""
-
-
-import sys,string
-
-trantab = string.maketrans(string.punctuation,'_'*len(string.punctuation))
-print >> sys.stdout, '##rgRegion.py started'
-if len(sys.argv) <> 6:
-  print >> sys.stdout, '##!expected  params in sys.argv, got %d - %s' % (len(sys.argv),sys.argv)
-  sys.exit(1)
-print '##got %d - %s' % (len(sys.argv),sys.argv)
-# quick and dirty for galaxy - we always get something for each parameter
-fname = sys.argv[1]
-wewant = sys.argv[2].split(',')
-region = sys.argv[3].lower()
-tag = sys.argv[4].translate(trantab)
-ofname = sys.argv[5]
-myname = 'rgRegion'
-if len(wewant) == 0: # no columns selected?
-  print >> sys.stdout, '##!%s:  no columns selected - cannot run' % myname
-  sys.exit(1)
-try:
-  f = open(fname,'r')
-except: # bad input file name?
-  print >> sys.stdout, '##!%s unable to open file %s' % (myname, fname)
-  sys.exit(1)
-try: # TODO make a regexp?
-  c,rest = region.split(':')
-  c = c.replace('chr','') # leave although will break strict genome graphs
-  rest = rest.replace(',','') # remove commas
-  spos,epos = rest.split('-')
-  spos = int(spos)
-  epos = int(epos)
-except:
-  print >> sys.stdout, '##!%s unable to parse region %s - MUST look like "chr8:10,000-100,000' % (myname,region)
-  sys.exit(1)
-print >> sys.stdout, '##%s parsing chrom %s from %d to %d' % (myname, c,spos,epos)
-res = []
-cnames = f.next().strip().split() # column titles for output
-linelen = len(cnames)
-wewant = [int(x) - 1 for x in wewant] # need col numbers base 0
-for n,l in enumerate(f):
-  ll = l.strip().split()
-  thisc = ll[1]
-  thispos = int(ll[2])
-  if (thisc == c) and (thispos >= spos) and (thispos <= epos):
-     if len(ll) == linelen:
-        res.append([ll[x] for x in wewant]) # subset of columns!
-     else:
-        print >> sys.stdout, '##! looking for %d fields - found %d in ll=%s' % (linelen,len(ll),str(ll))
-o = file(ofname,'w')
-res = ['%s\n' % '\t'.join(x) for x in res] # turn into tab delim string
-print >> sys.stdout, '##%s selected and returning %d data rows' % (myname,len(res))
-head = [cnames[x] for x in wewant] # ah, list comprehensions - list of needed column names
-o.write('%s\n' % '\t'.join(head)) # header row for output
-o.write(''.join(res))
-o.close()
-f.close()
-
-
--- a/tools/rgenetics/rgRegion.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-<tool id="rgRegion" name="Subset:">
-    <description>genotypes from genomic region</description>
-
-    <command interpreter="python">
-        rgRegion.py $infile $r $title $out_file1
-    </command>
-
-    <inputs>
-       <page>
-       <param name="infile" type="data" format="lped" label="Linkage ped genotype file name from current history" size="80"/>
-       <param name="title" type="text" size="80" label="Title for output files" optional="true"
-        help="Descriptive title for new genotype/map files" value="RGRegion" />
-       <param name="r" type="text" label="Region" help="Cut and paste a UCSC browser region"
-        size="80" value="chr9:119,506,000-122,518,000"/>
-       <param name="rslist" type="text" area="true" label="List of rs numbers" help="Type (or cut and paste) a space or newline separated list of rs numbers"
-        size="5x20"/>
-       <param name="outformat" type="select" label="Output file format" dynamic_options="get_rgRegionOutFormats()" size="80"/>
-
-       </page>
-
-
-   </inputs>
-
-   <outputs>
-       <data format="lped" name="out_file1" label="${title}.lped" metadata_source="infile" />
-   </outputs>
-<help>
-
-.. class:: infomark
-
-**Syntax**
-
-- **Source** is the file you want to extract some columns from over a genomic region such as a gene or chromosome
-- **Tag** is the name to give the results file for this run
-- **Region** is the genomic region cut and paste from a UCSC browser location window
-- **Genome Build** is the version of the genome your markers are from - use hg18 for CAMP illumina data
-
------
-
-**Summary**
-
-This tool is a very general purpose report builder. It can cut specific columns from
-amalgamated analyses - eg powers and pvalues,
-or regressions over a specified genomic region (given as a UCSC browser location - eg)
-
-It takes a tab delimited file containing rs chrom offset float1..floatn and cuts out a region and
-a subset of the columns into a tabular file. If you make sure that RS is included, the
-result that appears in your history will have a direct link to ucsc genome graphs for viewing
-in full genomic context
-
-ross lazarus (ross.lazarus@gmail.com)
-August 2007
-released under the LGPL. see documentation for license terms.
-
-</help>
-</tool>
--- a/tools/rgenetics/rgTDT.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,264 +0,0 @@
-#!/usr/local/bin/python
-# hack to run and process a plink tdt
-# expects args as
-# bfilepath outname jobname outformat (wig,xls)
-# ross lazarus
-# for wig files, we need annotation so look for map file or complain
-
-"""
-Parameters for wiggle track definition lines
-All options are placed in a single line separated by spaces:
-
-  track type=wiggle_0 name=track_label description=center_label \
-        visibility=display_mode color=r,g,b altColor=r,g,b \
-        priority=priority autoScale=on|off \
-        gridDefault=on|off maxHeightPixels=max:default:min \
-        graphType=bar|points viewLimits=lower:upper \
-        yLineMark=real-value yLineOnOff=on|off \
-        windowingFunction=maximum|mean|minimum smoothingWindow=off|2-16
-"""
-
-import sys,math,shutil,subprocess,os,time,tempfile,shutil,string
-from os.path import abspath
-from optparse import OptionParser
-from rgutils import timenow, plinke
-myversion = 'v0.003 January 2010'
-verbose = False
-
-
-
-def makeGFF(resf='',outfname='',logf=None,twd='.',name='track name',description='track description',topn=1000):
-    """
-    score must be scaled to 0-1000
-
-    Want to make some wig tracks from each analysis
-    Best n -log10(p). Make top hit the window.
-    we use our tab output which has
-    rs	chrom	offset	ADD_stat	ADD_p	ADD_log10p
-    rs3094315	1	792429	1.151	0.2528	0.597223
-
-    """
-
-    def is_number(s):
-        try:
-            float(s)
-            return True
-        except ValueError:
-            return False
-    header = 'track name=%s description="%s" visibility=2 useScore=1 color=0,60,120\n' % (name,description)
-    column_names = [ 'Seqname', 'Source', 'Feature', 'Start', 'End', 'Score', 'Strand', 'Frame', 'Group' ]
-    halfwidth=100
-    resfpath = os.path.join(twd,resf)
-    resf = open(resfpath,'r')
-    resfl = resf.readlines() # dumb but convenient for millions of rows
-    resfl = [x.split() for x in resfl]
-    headl = resfl[0]
-    resfl = resfl[1:]
-    headl = [x.strip().upper() for x in headl]
-    headIndex = dict(zip(headl,range(0,len(headl))))
-    # s = 'rs\tchrom\toffset\ta1\ta2\ttransmitted\tuntransmitted\tTDTChiSq\tTDTp\t-log10TDTp\tAbsTDTOR\tTDTOR'
-    chrpos = headIndex.get('CHROM',None)
-    rspos = headIndex.get('RS',None)
-    offspos = headIndex.get('OFFSET',None)
-    ppos = headIndex.get('-LOG10TDTP',None)
-    wewant = [chrpos,rspos,offspos,ppos]
-    if None in wewant: # missing something
-       logf.write('### Error missing a required header in makeGFF - headIndex=%s\n' % headIndex)
-       return
-    resfl = [x for x in resfl if x[ppos] > '']
-    resfl = [(float(x[ppos]),x) for x in resfl] # decorate
-    resfl.sort()
-    resfl.reverse() # using -log10 so larger is better
-    resfl = resfl[:topn] # truncate
-    pvals = [x[0] for x in resfl] # need to scale
-    resfl = [x[1] for x in resfl] # drop decoration
-    maxp = max(pvals) # need to scale
-    minp = min(pvals)
-    prange = abs(maxp-minp) + 0.5 # fudge
-    scalefact = 1000.0/prange
-    logf.write('###maxp=%f,minp=%f,prange=%f,scalefact=%f\n' % (maxp,minp,prange,scalefact))
-    for i,row in enumerate(resfl):
-        row[ppos] = '%d' % (int(scalefact*pvals[i]))
-        resfl[i] = row # replace
-    outf = file(outfname,'w')
-    outf.write(header)
-    outres = [] # need to resort into chrom offset order
-    for i,lrow in enumerate(resfl):
-        chrom,snp,offset,p, = [lrow[x] for x in wewant]
-        gff = ('chr%s' % chrom,'rgTDT','variation','%d' % (int(offset)-halfwidth),
-               '%d' % (int(offset)+halfwidth),p,'.','.','%s logp=%1.2f' % (snp,pvals[i]))
-        outres.append(gff)
-    outres = [(x[0],int(x[3]),x) for x in outres] # decorate
-    outres.sort() # into chrom offset
-    outres=[x[2] for x in outres] # undecorate
-    outres = ['\t'.join(x) for x in outres]
-    outf.write('\n'.join(outres))
-    outf.write('\n')
-    outf.close()
-
-
-
-def xformTDT(infname='',resf='',outfname='',name='foo',mapf='/usr/local/galaxy/data/rg/lped/x.bim'):
-    """munge a plink .tdt file into either a ucsc track or an xls file
-  CHR         SNP  A1:A2      T:U_TDT       OR_TDT    CHISQ_TDT        P_TDT
-   0   MitoT217C    2:3          0:0           NA           NA           NA
-   0   MitoG228A    1:4          0:0           NA           NA           NA
-   0   MitoT250C    2:3          0:0           NA           NA           NA
-    map file has
-    1       rs4378174       0       003980745
-    1       rs10796404      0       005465256
-    1       rs2697965       0       014023092
-
-   grrr!
-   Changed in 1.01 to
-   [rerla@hg fresh]$ head database/job_working_directory/445/rgTDT.tdt
-     CHR         SNP           BP  A1  A2      T      U           OR        CHISQ            P
-   1  rs12562034       758311   1   3     71     79       0.8987       0.4267       0.5136
-   1   rs3934834       995669   4   2     98    129       0.7597        4.233      0.03963
-
-
-    """
-    if verbose:
-        print 'Rgenetics Galaxy Tools, rgTDT.py.xformTDT got resf=%s, outtype=%s, outfname=%s' % (resf,outtype,outfname)
-    wewantcols = ['SNP','CHR','BP','A1','A2','T','U','OR','CHISQ','P']
-    res = []
-    s = 'rs\tchrom\toffset\ta1\ta2\ttransmitted\tuntransmitted\tTDTChiSq\tTDTp\t-log10TDTp\tAbsTDTOR\tTDTOR' # header
-    res.append(s)
-    rsdict = {}
-    if not mapf:
-        sys.stderr.write('rgTDT called but no map file provided - cannot determine locations')
-        sys.exit(1)
-    map = file(mapf,'r')
-    for l in map: # plink map
-        ll = l.strip().split()
-        if len(ll) >= 3:
-            rs=ll[1].strip()
-            chrom = ll[0]
-            if chrom.lower() == 'x':
-               chrom = '23'
-            if chrom.lower() == 'y':
-               chrom = '24'
-            if chrom.lower() == 'mito':
-               chrom = '25'
-            offset = ll[3]
-            rsdict[rs] = (chrom,offset)
-    f = open(resf,'r')
-    headl = f.next().strip()
-    headl = headl.split()
-    wewant = [headl.index(x) for x in wewantcols]
-    llen = len(headl)
-    lnum = anum = 0
-    for l in f:
-        lnum += 1
-        ll = l.strip().split()
-        if len(ll) >= llen: # valid line
-            snp,chrom,offset,a1,a2,t,u,orat,chisq,p = [ll[x] for x in wewant]
-            if chisq == 'NA' or p == 'NA' or orat == 'NA':
-                continue # can't use these lines - gg gets unhappy
-            snp = snp.strip()
-            lp = '0.0'
-            fp = '1.0'
-            fakeorat = '1.0'
-            if p.upper().strip() <> 'NA':
-                try:
-                   fp = float(p)
-                   if fp <> 0:
-                       lp = '%6f' % -math.log10(fp)
-                       fp = '%6f' % fp
-                except:
-                  pass
-            else:
-                p = '1.0'
-            if orat.upper().strip() <> 'NA':
-                try:
-                   fakeorat = orat
-                   if float(orat) < 1.0:
-                      fakeorat = '%6f' % (1.0/float(orat)) # invert so large values big
-                except:
-                   pass
-            else:
-                orat = '1.0'
-            outl = '\t'.join([snp,chrom,offset,a1,a2,t,u,chisq,p,lp,fakeorat,orat])
-            res.append(outl)
-    f = file(outfname,'w')
-    res.append('')
-    f.write('\n'.join(res))
-    f.close()
-
-
-if __name__ == "__main__":
-    """ called as
-    <command interpreter="python">
-        rgTDT.py -i '$infile.extra_files_path/$infile.metadata.base_name' -o '$title' -f '$outformat' -r '$out_file1' -l '$logf' -x '${GALAXY_DATA_INDEX_DIR}/rg/bin/pl$
-
-    </command>
-
-    """
-    u = """ called in xml as
-        <command interpreter="python2.4">
-        rgTDT.py -i $i -o $out_prefix -f $outformat -r $out_file1 -l $logf
-    </command>
-    """
-    if len(sys.argv) < 6:
-       s = '## Error rgTDT.py needs 5 command line params - got %s \n' % (sys.argv)
-       if verbose:
-            print >> sys.stdout, s
-       sys.exit(0)
-    parser = OptionParser(usage=u, version="%prog 0.01")
-    a = parser.add_option
-    a("-i","--infile",dest="bfname")
-    a("-o","--oprefix",dest="oprefix")
-    a("-f","--formatOut",dest="outformat")
-    a("-r","--results",dest="outfname")
-    a("-l","--logfile",dest="logf")
-    a("-d","--du",dest="uId")
-    a("-e","--email",dest="uEmail")
-    a("-g","--gff",dest="gffout",default="")
-    (options,args) = parser.parse_args()
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    title = options.oprefix
-    title = title.translate(trantab)
-    map_file = '%s.bim' % (options.bfname) #
-    me = sys.argv[0]
-    alogf = options.logf # absolute paths
-    od = os.path.split(alogf)[0]
-    try:
-      os.path.makedirs(od)
-    except:
-      pass
-    aoutf = options.outfname # absolute paths
-    od = os.path.split(aoutf)[0]
-    try:
-      os.path.makedirs(od)
-    except:
-      pass
-    vcl = [plinke,'--noweb', '--bfile',options.bfname,'--out',title,'--mind','0.5','--tdt']
-    logme = []
-    if verbose:
-        s = 'Rgenetics %s http://rgenetics.org Galaxy Tools rgTDT.py started %s\n' % (myversion,timenow())
-        print >> sys.stdout,s
-        logme.append(s)
-        s ='rgTDT.py: bfname=%s, logf=%s, argv = %s\n' % (options.bfname,alogf, sys.argv)
-        print >> sys.stdout,s
-        logme.append(s)
-        s = 'rgTDT.py: vcl=%s\n' % (' '.join(vcl))
-        print >> sys.stdout,s
-        logme.append(s)
-    twd = tempfile.mkdtemp(suffix='rgTDT') # make sure plink doesn't spew log file into the root!
-    tname = os.path.join(twd,title)
-    p=subprocess.Popen(' '.join(vcl),shell=True,cwd=twd)
-    retval = p.wait()
-    shutil.copy('%s.log' % tname,alogf)
-    sto = file(alogf,'a')
-    sto.write('\n'.join(logme))
-    resf = '%s.tdt' % tname # plink output is here we hope
-    xformTDT(options.bfname,resf,aoutf,title,map_file) # leaves the desired summary file
-    gffout = options.gffout
-    if gffout > '':
-        makeGFF(resf=aoutf,outfname=gffout,logf=sto,twd='.',name='rgTDT_Top_Table',description=title,topn=1000)
-    shutil.rmtree(twd)
-    sto.close()
-
-
-
--- a/tools/rgenetics/rgTDT.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-<tool id="rgTDT1" name="Transmission Distortion:">
-    <description>for family data</description>
-
-    <command interpreter="python">
-        rgTDT.py -i '$i.extra_files_path/$i.metadata.base_name' -o '$title'
-        -r '$out_file1' -l '$logf'  -g '$gffout'
-    </command>
-
-    <inputs>
-       <param name="i"  type="data" label="Genotypes for analysis from your current history datasets"
-          size="132" format="pbed" />
-       <param name='title' type='text' value='rgTDT'  label="Title for the output to remind you what you did" size="80"/>
-   </inputs>
-
-   <outputs>
-       <data format="tabular" name="out_file1" label="${title}_rgTDT.xls"/>
-       <data format="gff" name="gffout" label="${title}_rgTDT.gff"/>
-       <data format="txt" name="logf" label="${title}_rgTDTlog.txt"/>
-   </outputs>
-
-<tests>
- <test>
- <param name='i' value='tinywga' ftype='pbed' >
-   <metadata name='base_name' value='tinywga' />
-   <composite_data value='tinywga.bim' />
-   <composite_data value='tinywga.bed' />
-   <composite_data value='tinywga.fam' />
-   <edit_attributes type='name' value='tinywga' />
- </param>
- <param name='title' value='rgTDTtest1' />
- <output name='out_file1' file='rgTDTtest1_TDT.xls' ftype='tabular' compare="diff"/>
- <output name='gffout' file='rgTDTtest1_TDT_topTable.gff' ftype='gff' compare="diff" />
- <output name='logf' file='rgTDTtest1_TDT_log.txt' ftype='txt' lines_diff='79'/>
- </test>
-</tests>
-
-
-<help>
-
-.. class:: infomark
-
-**Attribution**
-
-This tool relies on the work of many people. It uses Plink http://pngu.mgh.harvard.edu/~purcell/plink/ for
-analysis, and the R http://cran.r-project.org/ for graphics respectively.
-
-This implementation is a Galaxy tool wrapper around these third party applications.
-It was originally designed and written for family based data from the CAMP Illumina run of 2007 by
-ross lazarus (ross.lazarus@gmail.com) and incorporated into the rgenetics toolkit.
-
-Rgenetics merely exposes them, wrapping Plink so you can use it in Galaxy.
-
------
-
-.. class:: infomark
-
-**Syntax**
-
-- **Genotype file** is the input family data chosen from available library compressed files
-- **Format** determines how your data will be returned to your Galaxy workspace - the gg format is strongly recommended
-
------
-
-.. class:: infomark
-
-**Summary**
-
-This tool will perform the standard transmission distortion analyses suitable for
-nuclear families and a simple binary "affected" phenotype
-
-If you don't see the genotype data set you want here, it can be imported using one of the methods available from
-the Galaxy Get Data tool page.
-
-Outputs will include a GFF toptable with a link to view at UCSC if you want to see your
-results as a fully fledged UCSC track.
-
-Finally, if you can't live without
-spreadsheet data, choose the .xls tab delimited format. It's not a stupid binary excel file. Just a plain old tab delimited
-one with a header. Fortunately excel is dumb enough to open these without much protest.
-
-
-----
-
-.. class:: infomark
-
-**Attribution**
-
-This Galaxy tool relies on Plink (see Plinksrc_) to test TDT models.
-
-So, we rely on the author (Shaun Purcell) for the documentation you need specific to those settings - they are very nicely documented - see
-DOC_
-
-Tool and Galaxy datatypes originally designed and written for the Rgenetics
-series of whole genome scale statistical genetics tools by ross lazarus (ross.lazarus@gmail.com)
-
-Copyright Ross Lazarus March 2007
-This Galaxy wrapper is released licensed under the LGPL_ but is about as useful as a chocolate teapot without Plink which is GPL.
-
-I'm no lawyer, but it looks like you got GPL if you use this software. Good luck.
-
-.. _Plinksrc: http://pngu.mgh.harvard.edu/~purcell/plink/
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-
-.. _DOC: http://pngu.mgh.harvard.edu/~purcell/plink/anal.shtml#tdt
-
-</help>
-</tool>
--- a/tools/rgenetics/rgWebLogo3.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,157 +0,0 @@
-"""
-# modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion
-# rgWebLogo3.py
-# wrapper to check that all fasta files are same length
-
-"""
-import optparse, os, sys, subprocess, tempfile
-
-WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it?
-
-class WL3:
-    """
-    simple wrapper class to check fasta sequence lengths are all identical
-    """
-    FASTASTARTSYM = '>'
-    badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully'
-
-    def __init__(self,opts=None):
-        assert opts<>None,'WL3 class needs opts passed in - got None'
-        self.opts = opts
-        self.fastaf = file(self.opts.input,'r')
-        self.clparams = {}
-
-    def whereis(self,program):
-        for path in os.environ.get('PATH', '').split(':'):
-            if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)):
-                return os.path.join(path, program)
-        return None
-
-    def runCL(self):
-        """ construct and run a command line
-        """
-        wl = self.whereis(WEBLOGO)
-        if not wl:
-             print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO
-             print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo'
-             sys.exit(1)
-        cll = [WEBLOGO,]
-        cll += [' '.join(it) for it in list(self.clparams.items())]
-        cl = ' '.join(cll)
-        assert cl > '', 'runCL needs a command line as clparms'
-        fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt')
-        tlf = open(templog,'w')
-        process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf)
-        rval = process.wait()
-        tlf.close()
-        tlogs = ''.join(open(templog,'r').readlines())
-        if len(tlogs) > 1:
-            s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs)
-        else:
-            s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval)
-        os.unlink(templog) # always
-        if rval <> 0:
-             print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval)
-             print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO
-             print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO
-             sys.exit(1)
-        return s
-
-
-    def iter_fasta(self):
-        """
-        generator for fasta sequences from a file
-        """
-        aseq = []
-        seqname = None
-        for i,row in enumerate(self.fastaf):
-            if row.startswith(self.FASTASTARTSYM):
-                if seqname <> None: # already in a sequence
-                    s = ''.join(aseq)
-                    l = len(s)
-                    yield (seqname,l)
-                    seqname = row[1:].strip()
-                    aseq = []
-                else:
-                    if i > 0:
-                        print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
-                        sys.exit(1)
-                    else:
-                        seqname = row[1:].strip()
-            else: # sequence row
-                if seqname == None:
-                    print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
-                    sys.exit(1)
-                else:
-                    aseq.append(row.strip())
-
-        if seqname <> None: # last one
-            l = len(''.join(aseq))
-            yield (seqname,l)
-
-
-    def fcheck(self):
-        """ are all fasta sequence same length?
-        might be mongo big
-        """
-        flen = None
-        lasti = None
-        f = self.iter_fasta()
-        for i,(seqname,seqlen) in enumerate(f):
-            lasti = i
-            if i == 0:
-                flen = seqlen
-            else:
-                if seqlen <> flen:
-                    print >> sys.stderr,self.badseq % self.opts.input
-                    sys.exit(1)
-        return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input,lasti,flen)
-
-
-    def run(self):
-        check = self.fcheck()
-        self.clparams['-f'] = self.opts.input
-        self.clparams['-o'] = self.opts.output
-        self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string
-        self.clparams['-F'] = self.opts.outformat
-        if self.opts.size <> None:
-            self.clparams['-s'] = self.opts.size
-        if self.opts.lower <> None:
-            self.clparams['-l'] = self.opts.lower
-        if self.opts.upper <> None:
-            self.clparams['-u'] = self.opts.upper
-        if self.opts.colours <> None:
-            self.clparams['-c'] = self.opts.colours
-        if self.opts.units <> None:
-            self.clparams['-U'] = self.opts.units
-        s = self.runCL()
-        return check,s
-
-
-if __name__ == '__main__':
-    '''
-    called as
-<command interpreter="python">
-    rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours"
-#if $range.mode == 'part'
--l "$range.seqstart" -u "$range.seqend"
-#end if
-    </command>
-
-    '''
-    op = optparse.OptionParser()
-    op.add_option('-i', '--input', default=None)
-    op.add_option('-F', '--outformat', default='png')
-    op.add_option('-s', '--size', default=None)
-    op.add_option('-o', '--output', default='rgWebLogo3')
-    op.add_option('-t', '--logoname', default='rgWebLogo3')
-    op.add_option('-c', '--colours', default=None)
-    op.add_option('-l', '--lower', default=None)
-    op.add_option('-u', '--upper', default=None)
-    op.add_option('-U', '--units', default=None)
-    opts, args = op.parse_args()
-    assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open'
-    assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input
-    w = WL3(opts)
-    checks,s = w.run()
-    print >> sys.stdout, checks # for info
--- a/tools/rgenetics/rgWebLogo3.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-<tool id="rgweblogo3" name="Sequence Logo" version="0.4">
-   <description>generator for fasta (eg Clustal alignments)</description>
-   <command interpreter="python">
-    rgWebLogo3.py -F $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours" -U "$units"
-#if $range.mode == 'part'
--l "$range.seqstart" -u "$range.seqend"
-#end if
-    </command>
-  <inputs>
-   <page>
-    <param format="fasta" name="input" type="data" label="Fasta File" />
-    <param name="logoname" label="Title for output Sequence Logo" type="text" size="50" value="Galaxy-Rgenetics Sequence Logo" />
-    <param name="outformat" type="select" label="Output format for image (or text report)" >
-      <option value="png">PNG screen quality</option>
-      <option value="png_print">High quality printable PNG</option>
-      <option value="pdf" selected="True">PDF</option>
-      <option value="jpeg">JPG</option>
-      <option value="eps">EPS</option>
-      <option value="txt">Text (shows the detailed calculations for each position - no image)</option>
-    </param>
-    <param name="units" type="select" label="Display Units"
-      help="What the height of each logo element depicts - eg bits of entropy (default)">
-      <option value="bits" selected="True">Entropy (bits)</option>
-      <option value="probability">Probability</option>
-      <option value="nats">Nats</option>
-      <option value="kT">kT</option>
-      <option value="kJ/mol">kJ/mol</option>
-      <option value="kcal/mol">kcal/mol</option>
-    </param>
-    <param name="colours" type="select" label="Colour scheme for output Sequence Logo"
-      help="Note that some of these only make sense for protein sequences!">
-      <option value="auto" selected="True">Default automatic colour selection</option>
-      <option value="base pairing">Base pairing</option>
-      <option value="charge">Charge colours</option>
-      <option value="chemistry">Chemistry colours</option>
-      <option value="classic">Classical colours</option>
-      <option value="hydrophobicity">Hydrophobicity</option>
-      <option value="monochrome">monochrome</option>
-    </param>
-
-
-    <conditional name="range">
-        <param name="mode" type="select" label="Include entire sequence (default) or specify a subsequence range to use">
-          <option value="complete" selected="true">complete sequence</option>
-          <option value="part">Only use a part of the sequence</option>
-        </param>
-        <when value="complete">
-        </when>
-        <when value="part">
-           <param name="seqstart" size="5" type="integer" value="1" help="WARNING: Specifying indexes outside the sequence lengths will cause unpredictable but bad consequences!"
-             label="Index (eg 1=first letter) of the start of the sequence range to include in the logo">
-           </param>
-           <param name="seqend" size="5" type="integer" value="99999" label="Index (eg 75=75th letter) of the end of the sequence range to include in the logo" >
-           </param>
-        </when>
-    </conditional>
-    <param name="size" type="select" label="Output weblogo size" >
-      <option value="large" selected="True">Large</option>
-      <option value="medium">Medium</option>
-      <option value="small">Small</option>
-    </param>
-   </page>
-  </inputs>
-  <outputs>
-    <data format="pdf" name="output"  label="${logoname}_output.${outformat}">
-       <change_format>
-           <when input="outformat" value="png_print" format="png" />
-           <when input="outformat" value="png" format="png" />
-           <when input="outformat" value="jpeg" format="jpg" />
-           <when input="outformat" value="eps" format="eps" />
-           <when input="outformat" value="txt" format="txt" />
-       </change_format>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="rgClustal_testout.fasta" />
-      <param name = "logoname" value="Galaxy/Rgenetics weblogo" />
-      <param name = "outformat" value="jpeg" />
-      <param name = "mode" value="complete" />
-      <param name = "size" value="medium" />
-      <param name = "colours" value="auto" />
-      <param name = "units" value="bits" />
-      <output name="output" file="rgWebLogo3_test.jpg" ftype="jpg" compare="sim_size" delta="10000" />
-    </test>
-    <test>
-      <param name="input" value="rgClustal_testout.fasta" />
-      <param name = "logoname" value="Galaxy/Rgenetics weblogo" />
-      <param name = "outformat" value="png" />
-      <param name = "mode" value="complete" />
-      <param name = "size" value="medium" />
-      <param name = "colours" value="auto" />
-      <param name = "units" value="probability" />
-      <output name="output" file="rgWebLogo3_test2.png" ftype="png" compare="sim_size" delta="10000" />
-    </test>
-  </tests>
-  <help>
-
-**Note**
-
-This tool uses Weblogo3_ in Galaxy to generate a sequence logo. The input file must be a fasta file in your current history.
-
-It is recommended for (eg) viewing multiple sequence alignments output from the clustalw tool - set the output to fasta and feed
-it in to this tool.
-
-A typical output looks like this
-
-.. image:: ./static/images/rgWebLogo3_test.jpg
-
-----
-
-**Warning about input Fasta format files**
-
-The Weblogo3 program used by this tool will fail if your fasta sequences are not all EXACTLY the same length. The tool will provide a warning
-and refuse to call the weblogo3 executable if irregular length sequences are detected.
-
-Fasta alignments from the companion ClustalW Galaxy tool will work but many other fasta files may cause this tool to fail - please do not file
-a Galaxy bug report - this is a feature of the tool and a problem with your source data - not a tool error - please make certain all your fasta
-sequences are the same length!
-
-----
-
-**Attribution**
-
-Weblogo attribution and associated documentation are available at Weblogo3_
-
-This Galaxy wrapper was written by Ross Lazarus for the rgenetics project and the source code is licensed under the LGPL_ like other rgenetics artefacts
-
-.. _Weblogo3: http://weblogo.berkeley.edu/
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-
-  </help>
-
-</tool>
-
-
--- a/tools/rgenetics/rgfakePed.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,537 +0,0 @@
-# modified may 2011 to name components (map/ped) as RgeneticsData to align with default base_name
-# otherwise downstream tools fail
-# modified march  2011 to remove post execution hook
-# pedigree data faker
-# specifically designed for scalability testing of
-# Shaun Purcel's PLINK package
-# derived from John Ziniti's original suggestion
-# allele frequency spectrum and random mating added
-# ross lazarus me fecit january 13 2007
-# copyright ross lazarus 2007
-# without psyco
-# generates about 10k snp genotypes in 2k subjects (666 trios) per minute or so.
-# so 500k (a billion genotypes), at about 4 trios/min will a couple of hours to generate
-# psyco makes it literally twice as quick!!
-# all rights reserved except as granted under the terms of the LGPL
-# see http://www.gnu.org/licenses/lgpl.html
-# for a copy of the license you receive with this software
-# and for your rights and obligations
-# especially if you wish to modify or redistribute this code
-# january 19 added random missingness inducer
-# currently about 15M genos/minute without psyco, 30M/minute with
-# so a billion genos should take about 40 minutes with psyco or 80 without...
-# added mendel error generator jan 23 rml
-
-
-import random,sys,time,os,string
-
-from optparse import OptionParser
-
-defbasename="RgeneticsData"
-width = 500000
-ALLELES = ['1','2','3','4']
-prog = os.path.split(sys.argv[0])[-1]
-debug = 0
-
-"""Natural-order sorting, supporting embedded numbers.
-# found at http://lists.canonical.org/pipermail/kragen-hacks/2005-October/000419.html
-note test code there removed to conserve brain space
-foo9bar2 < foo10bar2 < foo10bar10
-
-"""
-import random, re, sys
-
-def natsort_key(item):
-    chunks = re.split('(\d+(?:\.\d+)?)', item)
-    for ii in range(len(chunks)):
-        if chunks[ii] and chunks[ii][0] in '0123456789':
-            if '.' in chunks[ii]: numtype = float
-            else: numtype = int
-            # wrap in tuple with '0' to explicitly specify numbers come first
-            chunks[ii] = (0, numtype(chunks[ii]))
-        else:
-            chunks[ii] = (1, chunks[ii])
-    return (chunks, item)
-
-def natsort(seq):
-    "Sort a sequence of text strings in a reasonable order."
-    alist = [item for item in seq]
-    alist.sort(key=natsort_key)
-    return alist
-
-
-def makeUniformMAFdist(low=0.02, high=0.5):
-    """Fake a non-uniform maf distribution to make the data
-    more interesting. Provide uniform 0.02-0.5 distribution"""
-    MAFdistribution = []
-    for i in xrange(int(100*low),int(100*high)+1):
-       freq = i/100.0 # uniform
-       MAFdistribution.append(freq)
-    return MAFdistribution
-
-def makeTriangularMAFdist(low=0.02, high=0.5, beta=5):
-    """Fake a non-uniform maf distribution to make the data
-    more interesting - more rare alleles """
-    MAFdistribution = []
-    for i in xrange(int(100*low),int(100*high)+1):
-       freq = (51 - i)/100.0 # large numbers of small allele freqs
-       for j in range(beta*i): # or i*i for crude exponential distribution
-            MAFdistribution.append(freq)
-    return MAFdistribution
-
-def makeFbathead(rslist=[], chromlist=[], poslist=[], width=100000):
-    """header row
-    """
-    res = ['%s_%s_%s' % (chromlist[x], poslist[x], rslist[x]) for x in range(len(rslist))]
-    return ' '.join(res)
-
-def makeMap( width=500000, MAFdistribution=[], useGP=False):
-    """make snp allele and frequency tables for consistent generation"""
-    usegp = 1
-    snpdb = 'snp126'
-    hgdb = 'hg18'
-    alleles = []
-    freqs = []
-    rslist = []
-    chromlist = []
-    poslist = []
-    for snp in range(width):
-        random.shuffle(ALLELES)
-        alleles.append(ALLELES[0:2]) # need two DIFFERENT alleles!
-        freqs.append(random.choice(MAFdistribution)) # more rare alleles
-    if useGP:
-        try:
-            import MySQLdb
-            genome = MySQLdb.Connect('localhost', 'hg18', 'G3gn0m3')
-            curs = genome.cursor() # use default cursor
-        except:
-            if debug:
-                print 'cannot connect to local copy of golden path'
-            usegp = 0
-    if usegp and useGP: # urrrgghh getting snps into chrom offset order is complicated....
-        curs.execute('use %s' % hgdb)
-        print 'Collecting %d real rs numbers - this may take a while' % width
-        # get a random draw of enough reasonable (hapmap) snps with frequency data
-        s = '''select distinct chrom,chromEnd, name from %s where avHet > 0 and chrom not like '%%random'
-        group by name order by rand() limit %d''' % (snpdb,width)
-        curs.execute(s)
-        reslist = curs.fetchall()
-        reslist = ['%s\t%09d\t%s' % (x[3:],y,z) for x,y,z in reslist] # get rid of chr
-        reslist = natsort(reslist)
-        for s in reslist:
-            chrom,pos,rs = s.split('\t')
-            rslist.append(rs)
-            chromlist.append(chrom)
-            poslist.append(pos)
-    else:
-        chrom = '1'
-        for snp in range(width):
-            pos = '%d' % (1000*snp)
-            rs = 'rs00%d' % snp
-            rslist.append(rs)
-            chromlist.append(chrom)
-            poslist.append(pos)
-    return alleles,freqs, rslist, chromlist, poslist
-
-def writeMap(fprefix = '', fpath='./', rslist=[], chromlist=[], poslist=[], width = 500000):
-    """make a faked plink compatible map file - fbat files
-    have the map written as a header line"""
-    outf = '%s.map'% (fprefix)
-    outf = os.path.join(fpath,outf)
-    amap = open(outf, 'w')
-    res = ['%s\t%s\t0\t%s' % (chromlist[x],rslist[x],poslist[x]) for x in range(len(rslist))]
-    res.append('')
-    amap.write('\n'.join(res))
-    amap.close()
-
-def makeMissing(genos=[], missrate = 0.03, missval = '0'):
-    """impose some random missingness"""
-    nsnps = len(genos)
-    for snp in range(nsnps): # ignore first 6 columns
-        if random.random() <= missrate:
-            genos[snp] = '%s %s' % (missval,missval)
-    return genos
-
-def makeTriomissing(genos=[], missrate = 0.03, missval = '0'):
-    """impose some random missingness on a trio - moth eaten like real data"""
-    for person in (0,1):
-        nsnps = len(genos[person])
-        for snp in range(nsnps):
-            for person in [0,1,2]:
-                if random.random() <= missrate:
-                    genos[person][snp] = '%s %s' % (missval,missval)
-    return genos
-
-
-def makeTriomendel(p1g=(0,0),p2g=(0,0), kiddip = (0,0)):
-    """impose some random mendels on a trio
-    there are 8 of the 9 mating types we can simulate reasonable errors for
-    Note, since random mating dual het parents can produce any genotype we can't generate an interesting
-    error for them, so the overall mendel rate will be lower than mendrate, depending on
-    allele frequency..."""
-    if p1g[0] <> p1g[1] and p2g[0] <> p2g[1]: # both parents het
-            return kiddip # cannot simulate a mendel error - anything is legal!
-    elif (p1g[0] <> p1g[1]): # p1 is het parent so p2 must be hom
-        if p2g[0] == 0: # - make child p2 opposite hom for error
-            kiddip = (1,1)
-        else:
-            kiddip = (0,0)
-    elif (p2g[0] <> p2g[1]): # p2 is het parent so p1 must be hom
-        if p1g[0] == 0: # - make child p1 opposite hom for error
-            kiddip = (1,1)
-        else:
-            kiddip = (0,0)
-    elif (p1g[0] == p1g[1]): # p1 is hom parent and if we get here p2 must also be hom
-        if p1g[0] == p2g[0]: # both parents are same hom - make child either het or opposite hom for error
-            if random.random() <= 0.5:
-                kiddip = (0,1)
-            else:
-                if p1g[0] == 0:
-                    kiddip = (1,1)
-                else:
-                    kiddip = (0,0)
-        else: # parents are opposite hom - return any hom as an error
-            if random.random() <= 0.5:
-                kiddip = (0,0)
-            else:
-                kiddip = (1,1)
-    return kiddip
-
-
-
-
-def makeFam(width=100, freqs={}, alleles={}, trio=1, missrate=0.03, missval='0', mendrate=0.0):
-    """this family is a simple trio, constructed by random mating two random genotypes
-    TODO: why not generate from chromosomes - eg hapmap
-    set each haplotype locus according to the conditional
-    probability implied by the surrounding loci - eg use both neighboring pairs, triplets
-    and quads as observed in hapmap ceu"""
-    dadped = '%d 1 0 0 1 1 %s'
-    mumped = '%d 2 0 0 2 1 %s' # a mother is a mum where I come from :)
-    kidped = '%d 3 1 2 %d %d %s'
-    family = [] # result accumulator
-    sex = random.choice((1,2)) # for the kid
-    affected = random.choice((1,2))
-    genos = [[],[],[]] # dad, mum, kid - 0/1 for common,rare initially, then xform to alleles
-    # parent1...kidn lists of 0/1 for common,rare initially, then xformed to alleles
-    for snp in xrange(width):
-        f = freqs[snp]
-        for i in range(2): # do dad and mum
-            p = random.random()
-            a1 = a2 = 0
-            if p <= f: # a rare allele
-               a1 = 1
-            p = random.random()
-            if p <= f: # a rare allele
-               a2 = 1
-            if a1 > a2:
-                a1,a2 = a2,a1 # so ordering consistent - 00,01,11
-            dip = (a1,a2)
-            genos[i].append(dip) # tuples of 0,1
-        a1 = random.choice(genos[0][snp]) # dad gamete
-        a2 = random.choice(genos[1][snp]) # mum gamete
-        if a1 > a2:
-            a1,a2 = a2,a1 # so ordering consistent - 00,01,11
-        kiddip = (a1,a2) # NSFW mating!
-        genos[2].append(kiddip)
-        if mendrate > 0:
-            if random.random() <= mendrate:
-                genos[2][snp] = makeTriomendel(genos[0][snp],genos[1][snp], kiddip)
-        achoice = alleles[snp]
-        for g in genos: # now convert to alleles using allele dict
-          a1 = achoice[g[snp][0]] # get allele letter
-          a2 = achoice[g[snp][1]]
-          g[snp] = '%s %s' % (a1,a2)
-    if missrate > 0:
-        genos = makeTriomissing(genos=genos,missrate=missrate, missval=missval)
-    family.append(dadped % (trio,' '.join(genos[0]))) # create a row for each member of trio
-    family.append(mumped % (trio,' '.join(genos[1])))
-    family.append(kidped % (trio,sex,affected,' '.join(genos[2])))
-    return family
-
-def makePerson(width=100, aff=1, freqs={}, alleles={}, id=1, missrate = 0.03, missval='0'):
-    """make an entire genotype vector for an independent subject"""
-    sex = random.choice((1,2))
-    if not aff:
-        aff = random.choice((1,2))
-    genos = [] #0/1 for common,rare initially, then xform to alleles
-    family = []
-    personped = '%d 1 0 0 %d %d %s'
-    poly = (0,1)
-    for snp in xrange(width):
-        achoice = alleles[snp]
-        f = freqs[snp]
-        p = random.random()
-        a1 = a2 = 0
-        if p <= f: # a rare allele
-           a1 = 1
-        p = random.random()
-        if p <= f: # a rare allele
-           a2 = 1
-        if a1 > a2:
-            a1,a2 = a2,a1 # so ordering consistent - 00,01,11
-        a1 = achoice[a1] # get allele letter
-        a2 = achoice[a2]
-        g = '%s %s' % (a1,a2)
-        genos.append(g)
-    if missrate > 0.0:
-        genos = makeMissing(genos=genos,missrate=missrate, missval=missval)
-    family.append(personped % (id,sex,aff,' '.join(genos)))
-    return family
-
-def makeHapmap(fprefix= 'fakebigped',width=100, aff=[], freqs={},
-               alleles={}, nsubj = 2000, trios = True, mendrate=0.03, missrate = 0.03, missval='0'):
-    """ fake a hapmap file and a pedigree file for eg haploview
-    this is arranged as the transpose of a ped file - cols are subjects, rows are markers
-    so we need to generate differently since we can't do the transpose in ram reliably for
-    a few billion genotypes...
-    """
-    outheadprefix = 'rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode %s'
-    cfake5 = ["illumina","urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1",
-"urn:LSID:illumina.hapmap.org:Assay:27741:1","urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1","QC+"]
-    yfake5 = ["illumina","urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1",
-"urn:LSID:illumina.hapmap.org:Assay:27741:1","urn:LSID:dcc.hapmap.org:Panel:Yoruba-30-trios:1","QC+"]
-    sampids = ids
-    if trios:
-        ts = '%d trios' % int(nsubj/3.)
-    else:
-        ts = '%d unrelated subjects' % nsubj
-    res = ['#%s fake hapmap file %d snps and %s, faked by %s' % (timenow(), width, ts, prog),]
-    res.append('# ross lazarus me fecit')
-    res.append(outheadprefix % ' '.join(sampids)) # make a header compatible with hapmap extracts
-    outf = open('%s.hmap' % (fprefix), 'w')
-    started = time.time()
-    if trios:
-        ntrios = int(nsubj/3.)
-        for n in ntrios: # each is a dict
-            row = copy.copy(cfake5) # get first fields
-            row = map(str,row)
-            if race == "YRI":
-                row += yfake5
-            elif race == 'CEU':
-                row += cfake5
-            else:
-                row += ['NA' for x in range(5)] # 5 dummy fields = center protLSID assayLSID panelLSID QCcode
-            row += [''.join(sorted(line[x])) for x in sampids] # the genotypes in header (sorted) sample id order
-            res.append(' '.join(row))
-    res.append('')
-    outfname = '%s_%s_%s_%dkb.geno' % (gene,probeid,race,2*flank/1000)
-    f = file(outfname,'w')
-    f.write('\n'.join(res))
-    f.close()
-    print '### %s: Wrote %d lines to %s' % (timenow(), len(res),outfname)
-
-
-def makePed(fprefix= 'fakebigped', fpath='./',
-            width=500000, nsubj=2000, MAFdistribution=[],alleles={},
-            freqs={}, fbatstyle=True, mendrate = 0.0, missrate = 0.03, missval='0',fbathead=''):
-    """fake trios with mendel consistent random mating genotypes in offspring
-    with consistent alleles and MAFs for the sample"""
-    res = []
-    if fbatstyle: # add a header row with the marker names
-        res.append(fbathead) # header row for fbat
-    outfname = '%s.ped'% (fprefix)
-    outfname = os.path.join(fpath,outfname)
-    outf = open(outfname,'w')
-    ntrios = int(nsubj/3.)
-    outf = open(outfile, 'w')
-    started = time.time()
-    for trio in xrange(ntrios):
-        family = makeFam(width=width, freqs=freqs, alleles=alleles, trio=trio,
-                         missrate = missrate, mendrate=mendrate, missval=missval)
-        res += family
-        if (trio + 1) % 10 == 0: # write out to keep ram requirements reasonable
-            if (trio + 1) % 50 == 0: # show progress
-                dur = time.time() - started
-                if dur == 0:
-                    dur = 1.0
-                print 'Trio: %d, %4.1f genos/sec at %6.1f sec' % (trio + 1, width*trio*3/dur, dur)
-            outf.write('\n'.join(res))
-            outf.write('\n')
-            res = []
-    if len(res) > 0: # some left
-        outf.write('\n'.join(res))
-    outf.write('\n')
-    outf.close()
-    if debug:
-        print '##makeped : %6.1f seconds total runtime' % (time.time() - started)
-
-def makeIndep(fprefix = 'fakebigped', fpath='./',
-              width=500000, Nunaff=1000, Naff=1000, MAFdistribution=[],
-              alleles={}, freqs={}, fbatstyle=True, missrate = 0.03, missval='0',fbathead=''):
-    """fake a random sample from a random mating sample
-    with consistent alleles and MAFs"""
-    res = []
-    Ntot = Nunaff + Naff
-    status = [1,]*Nunaff
-    status += [2,]*Nunaff
-    outf = '%s.ped' % (fprefix)
-    outf = os.path.join(fpath,outf)
-    outf = open(outf, 'w')
-    started = time.time()
-    #sample = personMaker(width=width, affs=status, freqs=freqs, alleles=alleles, Ntomake=Ntot)
-    if fbatstyle: # add a header row with the marker names
-        res.append(fbathead) # header row for fbat
-    for id in xrange(Ntot):
-        if id < Nunaff:
-            aff = 1
-        else:
-            aff = 2
-        family = makePerson(width=width, aff=aff, freqs=freqs, alleles=alleles, id=id+1)
-        res += family
-        if (id % 50 == 0): # write out to keep ram requirements reasonable
-            if (id % 200 == 0): # show progress
-                dur = time.time() - started
-                if dur == 0:
-                    dur = 1.0
-                print 'Id: %d, %4.1f genos/sec at %6.1f sec' % (id, width*id/dur, dur)
-            outf.write('\n'.join(res))
-            outf.write('\n')
-            res = []
-    if len(res) > 0: # some left
-        outf.write('\n'.join(res))
-    outf.write('\n')
-    outf.close()
-    print '## makeindep: %6.1f seconds total runtime' % (time.time() - started)
-
-u = """
-Generate either trios or independent subjects with a prespecified
-number of random alleles and a uniform or triangular MAF distribution for
-stress testing. No LD is simulated - alleles are random. Offspring for
-trios are generated by random mating the random parental alleles so there are
-no Mendelian errors unless the -M option is used. Mendelian errors are generated
-randomly according to the possible errors given the parental mating type although
-this is fresh code and not guaranteed to work quite right yet - comments welcomed
-
-Enquiries to ross.lazarus@gmail.com
-
-eg to generate 700 trios with 500k snps, use:
-fakebigped.py -n 2100 -s 500000
-or to generate 500 independent cases and 500 controls with 100k snps and 0.02 missingness (MCAR), use:
-fakebigped.py -c 500 -n 1000 -s 100000 -m 0.02
-
-fakebigped.py -o myfake -m 0.05 -s 100000 -n 2000
-will make fbat compatible myfake.ped with 100k markers in
-666 trios (total close to 2000 subjects), a uniform MAF distribution and about 5% MCAR missing
-
-fakebigped.py -o myfake -m 0.05 -s 100000 -n 2000 -M 0.05
-will make fbat compatible myfake.ped with 100k markers in
-666 trios (total close to 2000 subjects), a uniform MAF distribution,
-about 5% Mendelian errors and about 5% MCAR missing
-
-
-fakebigped.py  -o myfakecc -m 0.05 -s 100000 -n 2000 -c 1000 -l
-will make plink compatible myfakecc.ped and myfakecc.map (that's what the -l option does),
-with 100k markers in 1000 cases and 1000 controls (affection status 2 and 1 respectively),
-a triangular MAF distribution (more rare alleles) and about 5% MCAR missing
-
-You should see about 1/4 million genotypes/second so about an hour for a
-500k snps in 2k subjects and about a 4GB ped file - these are BIG!!
-
-"""
-
-import sys, os, glob
-
-galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
-<title></title>
-<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
-</head>
-<body>
-<div class="document">
-"""
-
-
-def doImport(outfile=None,outpath=None):
-    """ import into one of the new html composite data types for Rgenetics
-        Dan Blankenberg with mods by Ross Lazarus
-        October 2007
-    """
-    flist = glob.glob(os.path.join(outpath,'*'))
-    outf = open(outfile,'w')
-    outf.write(galhtmlprefix % prog)
-    for i, data in enumerate( flist ):
-        outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
-    outf.write('<br><h3>This is simulated null genotype data generated by Rgenetics!</h3>')
-    outf.write('%s called with command line:<br><pre>' % prog)
-    outf.write(' '.join(sys.argv))
-    outf.write('\n</pre>\n')
-    outf.write("</div></body></html>")
-    outf.close()
-
-
-
-if __name__ == "__main__":
-    """
-    """
-    parser = OptionParser(usage=u, version="%prog 0.01")
-    a = parser.add_option
-    a("-n","--nsubjects",type="int",dest="Ntot",
-      help="nsubj: total number of subjects",default=2000)
-    a("-t","--title",dest="title",
-      help="title: file basename for outputs",default='fakeped')
-    a("-c","--cases",type="int",dest="Naff",
-      help="number of cases: independent subjects with status set to 2 (ie cases). If not set, NTOT/3 trios will be generated", default = 0)
-    a("-s","--snps",dest="width",type="int",
-      help="snps: total number of snps per subject", default=1000)
-    a("-d","--distribution",dest="MAFdist",default="Uniform",
-      help="MAF distribution - default is Uniform, can be Triangular")
-    a("-o","--outf",dest="outf",
-      help="Output file", default = 'fakeped')
-    a("-p","--outpath",dest="outpath",
-      help="Path for output files", default = './')
-    a("-l","--pLink",dest="outstyle", default='L',
-      help="Ped files as for Plink - no header, separate Map file - default is Plink style")
-    a("-w","--loWmaf", type="float", dest="lowmaf", default=0.01, help="Lower limit for SNP MAF (minor allele freq)")
-    a("-m","--missing",dest="missrate",type="float",
-      help="missing: probability of missing MCAR - default 0.0", default=0.0)
-    a("-v","--valmiss",dest="missval",
-      help="missing character: Missing allele code - usually 0 or N - default 0", default="0")
-    a("-M","--Mendelrate",dest="mendrate",type="float",
-      help="Mendelian error rate: probability of a mendel error per trio, default=0.0", default=0.0)
-    a("-H","--noHGRS",dest="useHG",type="int",
-      help="Use local copy of UCSC snp126 database to generate real rs numbers", default=True)
-    (options,args) = parser.parse_args()
-    low = options.lowmaf
-    try:
-        os.makedirs(options.outpath)
-    except:
-        pass
-    if options.MAFdist.upper() == 'U':
-        mafDist = makeUniformMAFdist(low=low, high=0.5)
-    else:
-        mafDist = makeTriangularMAFdist(low=low, high=0.5, beta=5)
-    alleles,freqs, rslist, chromlist, poslist = makeMap(width=int(options.width),
-                                        MAFdistribution=mafDist, useGP=False)
-    fbathead = []
-    s = string.whitespace+string.punctuation
-    trantab = string.maketrans(s,'_'*len(s))
-    title = string.translate(options.title,trantab)
-
-    if options.outstyle == 'F':
-        fbatstyle = True
-        fbathead = makeFbathead(rslist=rslist, chromlist=chromlist, poslist=poslist, width=options.width)
-    else:
-        fbatstyle = False
-        writeMap(fprefix=defbasename, rslist=rslist, fpath=options.outpath,
-                 chromlist=chromlist, poslist=poslist, width=options.width)
-    if options.Naff > 0: # make case control data
-        makeIndep(fprefix = defbasename, fpath=options.outpath,
-                  width=options.width, Nunaff=options.Ntot-options.Naff,
-                  Naff=options.Naff, MAFdistribution=mafDist,alleles=alleles, freqs=freqs,
-                  fbatstyle=fbatstyle, missrate=options.missrate, missval=options.missval,
-                  fbathead=fbathead)
-    else:
-        makePed(fprefix=defbasename, fpath=options.fpath,
-            width=options.width, MAFdistribution=mafDist, nsubj=options.Ntot,
-            alleles=alleles, freqs=freqs, fbatstyle=fbatstyle, missrate=options.missrate,
-            mendrate=options.mendrate, missval=options.missval,
-                  fbathead=fbathead)
-    doImport(outfile=options.outf,outpath=options.outpath)
-
-
-
--- a/tools/rgenetics/rgfakePed.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-<tool id="rgfakePed1" name="Null genotypes" version="0.02">
-  <description>for testing</description>
-  <command interpreter="python">rgfakePed.py --title '$title'
-  -o '$out_file1' -p '$out_file1.files_path' -c '$ncases' -n '$ntotal'
-  -s '$nsnp'  -w '$lowmaf' -v '$missingValue' -l '$outFormat'
-  -d '$mafdist' -m '$missingRate' -M '$mendelRate' </command>
-   <inputs>
-
-    <param name="title"
-         type="text" value="Fake_test_geno_data"
-         help="Name for outputs from this job"
-         label="Descriptive short name"/>
-    <param name="ntotal"
-         type="integer" value = "200"
-         help="N total: total number of subjects"
-         label="Create this total N subjects"/>
-    <param name="ncases" type="integer"
-         value="100"
-         help = "N cases: Independent subjects with status set to 2. Set 0 for family data (NSubj/3 trios)"
-         label="Total N Cases (0=generate family data - trios)"/>
-    <param name="nsnp"
-         type="integer" value="1000"
-         help="nsnp: total number of markers"
-         label="Total N SNP"/>
-    <param name="lowmaf" type="float"
-         value="0.01"
-         help = "Lower limit for MAF distribution"
-         label="Lower MAF limit (default=1%)"/>
-    <param name="mafdist"
-         type="select"
-         help="Choose a MAF distribution"
-         label="SNP Minor Allele Frequency distribution">
-           <option value="U" selected="true">Uniform</option>
-           <option value="T">Triangular (more low frequency SNPs)</option>
-    </param>
-    <param name="outFormat"
-         type="select"
-         help="Choose an output format"
-         label="Output format file type - linkage ped or fbat ped">
-           <option value="L" selected="true">Linkage format - separate .map file</option>
-           <option value="F">fbat style - marker names in a header row</option>
-    </param>
-    <param name="missingRate" type="float"
-         value="0.05"
-         help = "Fraction of genotypes to be randomly set missing"
-         label="Missing genotype call fraction"/>
-    <param name="mendelRate"
-         type="float" value = "0.05"
-         help="(family data) Fraction of apparently non-Mendelian transmission patterns"
-         label="Mendel error transmission rate"/>
-
-    <param name="missingValue" type="text" size="1"
-         value='0'
-         help = "Missing allele value"
-         label="Missing value for an allele for the output ped file"/>
-
-</inputs>
-
- <outputs>
-    <data format="lped" name="out_file1" label="${title}.lped"/>
-  </outputs>
-<tests>
- <test>
-    <param name='title' value='rgfakePedtest1' />
-    <param name="ntotal" value="40" />
-    <param name="ncases" value="20" />
-    <param name="nsnp" value="10" />
-    <param name="lowmaf" value="0" />
-    <param name="mafdist" value="T" />
-    <param name="outFormat" value="L" />
-    <param name="missingRate" value="0" />
-    <param name="mendelRate" value="0" />
-    <param name="missingValue" value="0" />
-    <output name='out_file1' file='rgtestouts/rgfakePed/rgfakePedtest1.lped' ftype='lped' compare="diff" lines_diff='5'>
-    <extra_files type="file" name='RgeneticsData.ped' value="rgtestouts/rgfakePed/rgfakePedtest1.ped" compare="diff" lines_diff='80'/>
-    <extra_files type="file" name='RgeneticsData.map' value="rgtestouts/rgfakePed/rgfakePedtest1.map" compare="diff" />
-    </output>
- </test>
-</tests>
-<help>
-.. class:: infomark
-
-This tool allows you to generate an arbitrary (sort of)
-synthetic genotype file (no attempt at LD - the markers are independent)
-with optional missingness, Mendel errors, minor allele frequency settings, family structure
-These might be used for testing under
-the null hypothesis of no association and are certainly useful for
-scale testing.
-
-Note that although it runs reasonably fast given it's a script, generating a large data set takes
-a while. An hour or so should get you a reasonable (3GB) sized simulated null data set..
-
-A better simulator can easily be swapped in with this tool interface.
-
------
-
-.. class:: warningmark
-
-This tool is very experimental
-
-.. class:: infomark
-
-**Attribution and Licensing**
-
-Designed and written for the Rgenetics Galaxy tools
-copyright Ross Lazarus 2007 (ross.lazarus@gmail.com)
-Licensed under the terms of the _LGPL
-
- .. _LGPL: http://www.gnu.org/copyleft/lesser.html
-
-</help>
-</tool>
--- a/tools/rgenetics/rgfakePhe.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,469 +0,0 @@
-"""
-fakephe.py
-ross lazarus sept 30 2007
-This is available under the LGPL as defined then.
-
-use the pedigree data for ids
-
-use pythons generators to literally generate a bunch of random phenotype measurements
-
-Plink format at http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#pheno
-is
-
-To specify an alternate phenotype for analysis, i.e. other than the one in the *.ped file (or, if using a binary fileset, the
-*.fam file), use the --pheno option:
-plink --file mydata --pheno pheno.txt
-
-where pheno.txt is a file that contains 3 columns (one row per individual):
-
-     Family ID
-     Individual ID
-     Phenotype
-
-NOTE The original PED file must still contain a phenotype in column 6 (even if this is a dummy phenotype, e.g. all missing),
-unless the --no-pheno flag is given.
-
-If an individual is in the original file but not listed in the alternate phenotype file, that person's phenotype will be set to
-missing. If a person is in the alternate phenotype file but not in the original file, that entry will be ignored. The order of
-the alternate phenotype file need not be the same as for the original file.
-
-If the phenotype file contains more than one phenotype, then use the --mpheno N option to specify the Nth phenotype is the one
-to be used:
-plink --file mydata --pheno pheno2.txt --mpheno 4
-
-where pheno2.txt contains 5 different phenotypes (i.e. 7 columns in total), this command will use the 4th for analysis
-(phenotype D):
-
-     Family ID
-     Individual ID
-     Phenotype A
-     Phenotype B
-     Phenotype C
-     Phenotype D
-     Phenotype E
-
-Alternatively, your alternate phenotype file can have a header row, in which case you can use variable names to specify which
-phenotype to use. If you have a header row, the first two variables must be labelled FID and IID. All subsequent variable names
-cannot have any whitespace in them. For example,
-
-     FID    IID      qt1   bmi    site
-     F1     1110     2.3   22.22  2
-     F2     2202     34.12 18.23  1
-     ...
-
-then
-plink --file mydata --pheno pheno2.txt --pheno-name bmi --assoc
-
-will select the second phenotype labelled "bmi", for analysis
-
-Finally, if there is more than one phenotype, then for basic association tests, it is possible to specify that all phenotypes
-be tested, sequentially, with the output sent to different files: e.g. if bigpheno.raw contains 10,000 phenotypes, then
-plink --bfile mydata --assoc --pheno bigpheno.raw --all-pheno
-
-will loop over all of these, one at a time testing for association with SNP, generating a lot of output. You might want to use
-the --pfilter command in this case, to only report results with a p-value less than a certain value, e.g. --pfilter 1e-3.
-
-WARNING Currently, all phenotypes must be numerically coded, including missing values, in the alternate phenotype file. The
-default missing value is -9, change this with --missing-phenotype, but it must be a numeric value still (in contrast to the
-main phenotype in the PED/FAM file. This issue will be fixed in future releases.
-Covariate files
-
-===========================
-rgfakePhe.xml
-<tool id="fakePhe1" name="Fake phenos">
-  <description>for multiple null fake phenotype</description>
-  <command interpreter="python2.4">rgfakePhe.py $input1 '$title1' $out_file1 $log_file1 $script_file</command>
-   <inputs>
-    <page>
-    <param name="input1"
-         type="library" format="lped"
-         label="Pedigree from Dataset"/>
-    <param name="title1" type="text"
-         value="My fake phenos" size="60"
-         label="Title for outputs"/>
-    </page>
-    <page>
-    <repeat name="fakePhe" title="Phenotypes to Fake">
-        <param name="pName" type="text" label="Phenotype Name">
-        </param>
-      <conditional name="series">
-        <param name="phetype" type="select" label="Phenotype Type">
-          <option value="rnorm" selected="true">Random normal variate</option>
-          <option value="cat">Random categorical</option>
-        </param>
-        <when value="rnorm">
-          <param name="Mean" type="float" value="0.0" label="Mean">
-          </param>
-          <param name="SD" type="float" label="SD" value="1.0">
-          </param>
-        </when>
-        <when value="cat">
-          <param name="values" type="text" value="1,2,3,fiddle" label="comma separated values to choose from">
-          </param>
-        </when>
-      </conditional>
-    </repeat>
-    </page>
-</inputs>
-<configfiles>
-<configfile name="script_file">
-#for $n, $f in enumerate($fakePhe)
-#if $f.series.phetype=='rnorm'
-{'pN':'$f.pName','pT':'$f.series.phetype','pP':"{'Mean':'$f.series.Mean', 'SD':'$f.series.SD'}"}
-#elif $f.series.phetype=='cat'
-{'pN':'$f.pName','pT':'$f.series.phetype','pP':"{'values':'$f.series.values'}"}
-#end if
-#end for
-</configfile>
-</configfiles>
-
- <outputs>
-    <data format="pphe" name="out_file1" />
-    <data format="text" name="log_file1" parent="out_file1"/>
-  </outputs>
-
-<help>
-.. class:: infomark
-
-This tool allows you to generate an arbitrary (sort of)
-synthetic phenotype file with measurements drawn from normal,
-gamma, or categorical distributions. These are for testing under
-the null hypothesis of no association - the values are random but
-from user specified distributions.
-
------
-
-.. class:: warningmark
-
-This tool is very experimental
-
------
-
-- **Pedigree** is a library pedigree file - the id's will be used in the synthetic null phenotypes
-- **Title** is a name to give to the output phenotype file
-
-On the next page, you can add an unlimited number of various kinds of phenotypes including choices for
-categorical ones or distributions with specific parameters
-
-Just keep adding new ones until you're done then use the Execute button to run the generation
-
-
-
-
-**Example from another tool to keep you busy and in awe**
-
-Input file::
-
-    1   68  4.1
-    2   71  4.6
-    3   62  3.8
-    4   75  4.4
-    5   58  3.2
-    6   60  3.1
-    7   67  3.8
-    8   68  4.1
-    9   71  4.3
-    10  69  3.7
-
-Create a two series XY plot on the above data:
-
-- Series 1: Red Dashed-Line plot between columns 1 and 2
-- Series 2: Blue Circular-Point plot between columns 3 and 2
-
-.. image:: ./static/images/xy_example.jpg
-</help>
-</tool>
-
-
-
-"""
-
-import random,copy,sys,os,time,string,math
-
-doFbatphe = False
-
-galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
-<title></title>
-<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
-</head>
-<body>
-<div class="document">
-"""
-
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-
-def poisson(lamb=2):
-    """
-    algorithm poisson random number (Knuth):
-    init:
-         Let L = e^-lamb, k = 0 and p = 1.
-    do:
-         k = k + 1.
-         Generate uniform random number u in [0,1] and let p = p u.
-    while p >= L.
-    return k - 1.
-    """
-    lamb = float(lamb)
-    l = math.e**(-lamb)
-    k=0
-    p=1
-    while 1:
-        while p >= l:
-            k += 1
-            u = random.uniform(0.0,1.0)
-            p *= u
-        yield '%e' % (k - 1)
-
-def gammaPhe(alpha=1,beta=1):
-    """generator for random values from a gamma
-    """
-    while 1: # an iterator for a random phenotype
-        dat = random.gammavariate(float(alpha),float(beta))
-        yield '%e' % dat
-
-def weibullPhe(alpha=1,beta=1):
-    """generator for random values from a weibull distribution
-    """
-    while 1: # an iterator for a random phenotype
-        dat = random.weibullvariate(float(alpha),float(beta))
-        yield '%e' % dat
-
-def normPhe(mean=0,sd=1):
-    """generator for random values from a normal distribution
-    """
-    while 1:# an iterator for a random phenotype
-        dat = random.normalvariate(float(mean),float(sd))
-        yield '%e' % dat
-
-def expoPhe(mean=1):
-    """generator for random values from an exponential distribution
-    """
-    lamb = 1.0/float(mean)
-    while 1:# an iterator for a random phenotype
-        dat = random.expovariate(lamb)
-        yield '%e' % dat
-
-
-def catPhe(values='1,2,3'):
-    """ Schrodinger's of course.
-    """
-    v = values.split(',')
-    while 1:# an iterator for a random phenotype
-        dat = random.choice(v)
-        yield dat
-
-def uniPhe(low=0.0,hi=1.0):
-    """generator for a uniform distribution
-       what if low=-5 and hi=-2
-    """
-    low = float(low)
-    hi = float(hi)
-    while 1: # unif phenotype
-        v = random.uniform(low,hi) # 0-1
-        yield '%e' % v
-
-def getIds(indir='foo'):
-    """read identifiers - first 2 cols from a pedigree file or fam file
-    """
-    inpath = os.path.split(indir)[0] # get root
-    infam = '%s.fam' % indir
-    inped = '%s.ped' % indir # if there's a ped
-    flist = os.listdir(inpath)
-    if len(flist) == 0:
-        print >> sys.stderr, '### Error - input path %s is empty' % indir
-        sys.exit(1)
-    if os.path.exists(infam):
-        pfname = infam
-    elif os.path.exists(inped):
-        pfname = inped
-    else:
-        print >> sys.stderr, '### Error - input path %s contains no ped or fam' % indir
-        sys.exit(1)
-    f = file(pfname,'r')
-    ids = []
-    for l in f:
-        ll = l.split()
-        if len(ll) > 5: # ok line?
-            ids.append(ll[:2])
-    return ids
-
-def makePhe(phes = [],ids=[]):
-    """Create the null phenotype values and append them to the case id
-    phes is the (generator, headername) for each column
-    for a phe file, ids are the case identifiers for the phenotype file
-    res contains the final strings for the file
-    each column is derived by iterating
-    over the generator actions set up by makePhes
-    """
-    header = ['FID','IID'] # for plink
-    res = copy.copy(ids)
-    for (f,fname) in phes:
-        header.append(fname)
-        for n,subject in enumerate(ids):
-            res[n].append(f.next()) # generate the right value
-    res.insert(0,header)
-    res = [' '.join(x) for x in res] # must be space delim for fbat
-    return res
-
-def makePhes(pheTypes=[], pheNames=[], pheParams=[]):
-    """set up phes for makePhe
-    each has to have an iterator (action) and a name
-    """
-    action = None
-    phes = []
-    for n,pt in enumerate(pheTypes):
-        name = pheNames[n]
-        if pt == 'rnorm':
-            m = pheParams[n].get('Mean',0.0)
-            s = pheParams[n].get('SD',1.0)
-            action = normPhe(mean=m,sd=s) # so we can just iterate over action
-        elif pt == 'rgamma':
-            m = pheParams[n].get('Alpha',0.0)
-            s = pheParams[n].get('Beta',1.0)
-            action = gammaPhe(alpha=m,beta=s)
-        if pt == 'exponential':
-            m = pheParams[n].get('Mean',1.0)
-            action = expoPhe(mean=m) # so we can just iterate over action
-        elif pt == 'weibull':
-            m = pheParams[n].get('Alpha',0.0)
-            s = pheParams[n].get('Beta',1.0)
-            action = weibullPhe(alpha=m,beta=s)
-        elif pt == 'cat':
-            v = pheParams[n].get('values',['?',])
-            action = catPhe(values=v)
-        elif pt == 'unif':
-            low = pheParams[n].get('low',0.0)
-            hi = pheParams[n].get('hi',1.0)
-            action = uniPhe(low=low,hi=hi)
-        elif pt == 'poisson':
-            lamb = pheParams[n].get('lamb',1.0)
-            action = poisson(lamb=lamb)
-        phes.append((action,name))
-    return phes
-
-def doImport(outfile='test',flist=[],expl='',mylog=[]):
-    """ import into one of the new html composite data types for Rgenetics
-        Dan Blankenberg with mods by Ross Lazarus
-        October 2007
-    """
-    outf = open(outfile,'w')
-    progname = os.path.basename(sys.argv[0])
-    outf.write(galhtmlprefix % progname)
-    if len(flist) > 0:
-        outf.write('<ol>\n')
-        for i, data in enumerate( flist ):
-           outf.write('<li><a href="%s">%s %s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1],expl))
-        outf.write('</ol>\n')
-    else:
-           outf.write('No files found')
-    outf.write('<hr/><h4>Log of run follows:</h4><br/><pre>%s\n</pre><br/><hr/>' % ('\n'.join(mylog)))
-    outf.write("</div></body></html>\n")
-    outf.close()
-
-
-def test():
-    """test case
-    need to get these out of a galaxy form - series of pages - get types
-    on first screen, names on second, params on third?
-    holy shit. this actually works I think
-    """
-    pT = ['rnorm','rnorm','rnorm','rnorm','cat','unif']
-    pN = ['SysBP','DiaBP','HtCM','WtKG','Race','age']
-    pP = [{'Mean':'120','SD':'10'},{'Mean':'90','SD':'15'},{'Mean':'160','SD':'20'},{'Mean':'60','SD':'20'}, \
-          {'values':'Blink,What,Yours,green'},{'low':16,'hi':99}]
-    phes = makePhes(pheTypes=pT, pheNames=pN, pheParams=pP)
-    ids = []
-    for i in range(10):
-        ids.append(['fid%d' % i,'iid%d' % i])
-    pheres = makePhe(phes=phes,ids=ids)
-    res = [''.join(x) for x in pheres]
-    print '\n'.join(res)
-
-
-
-if __name__ == "__main__":
-    """
-   <command interpreter="python">rgfakePhe.py '$infile1.extra_files_path/$infile1.metadata.base_name'
-   "$title1" '$ppheout' '$ppheout.files_path' '$script_file '
-   </command>
-    The xml file for this tool is complex, and allows any arbitrary number of
-    phenotype columns to be specified from a couple of optional types - rnorm, cat
-    are working now.
-
-    Note that we create new files in their respective library directories and link to them in the output file
-    so they can be displayed and downloaded separately
-
-    """
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    progname = os.path.basename(sys.argv[0])
-    cl = '## at %s, %s got cl= %s' % (timenow(),progname,' '.join(sys.argv))
-    print >> sys.stdout,cl
-    if len(sys.argv) < 5:
-        test()
-    else:
-        inped = sys.argv[1]
-        title = sys.argv[2].translate(trantab)
-        ppheout = sys.argv[3]
-        pphe_path = sys.argv[4]
-        scriptfile = sys.argv[5]
-        ind = file(scriptfile,'r').readlines()
-        mylog = []
-        s = '## %s starting at %s<br/>\n' % (progname,timenow())
-        mylog.append(s)
-        mylog.append(cl)
-        s = '## params = %s<br/>\n' % (' '.join(sys.argv[1:]))
-        mylog.append(s)
-        s = '\n'.join(ind)
-        mylog.append('Script file %s contained %s<br/>\n' % (scriptfile,s))
-        pT = []
-        pN = []
-        pP = []
-        for l in ind:
-            l = l.strip()
-            if len(l) > 1:
-                adict = eval(l)
-                pT.append(adict.get('pT',None))
-                pN.append(adict.get('pN',None))
-                pP.append(eval(adict.get('pP',None)))
-        s = '## pt,pn,pp=%s,%s,%s<br/>\n' % (str(pT),str(pN),str(pP))
-        mylog.append(s)
-        phes = makePhes(pheTypes=pT, pheNames=pN, pheParams=pP)
-        ids = getIds(indir=inped) # for labelling rows
-        pheres = makePhe(phes=phes,ids=ids) # random values from given distributions
-        try:
-            os.makedirs(pphe_path)
-        except:
-            pass
-        outname = os.path.join(pphe_path,title)
-        pphefname = '%s.pphe' % outname
-        f = file(pphefname, 'w')
-        f.write('\n'.join(pheres))
-        f.write('\n')
-        f.close()
-        if doFbatphe:
-            try:
-                os.makedirs(fphe_path)
-            except:
-                pass
-            outname = os.path.join(fphe_path,title)
-            fphefname = '%s.phe' % outname
-            f = file(fphefname, 'w')
-            header = pheres[0].split()
-            pheres[0] = ' '.join(header[2:])# remove iid fid from header for fbat
-            f.write('\n'.join(pheres))
-            f.close()
-            doImport(outfile=fpheout,flist=[fphefname,],expl='(FBAT phenotype format)',mylog=mylog)
-        #doImport(outfile='test',flist=[],expl='',mylog=[]):
-        doImport(outfile=ppheout,flist=[pphefname,],expl='(Plink phenotype format)',mylog=mylog)
-
--- a/tools/rgenetics/rgfakePhe.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-<tool id="fakePhe1" name="Null phenotypes">
-  <description>for testing</description>
-   <command interpreter="python">rgfakePhe.py '$infile1.extra_files_path/$infile1.metadata.base_name'
-   "$title1" '$ppheout' '$ppheout.files_path' '$script_file'
-   </command>
-   <inputs>
-    <page>
-    <param name="infile1"
-         type="data" format="pbed,lped"
-         label="Pedigree from Dataset" />
-        <param name="title1" type="text"
-         value="My null phenos" size="60"
-         label="Title for outputs"/>
-        <param name="dbkey" type="hidden" value='hg18' />
-    </page>
-    <page>
-    <repeat name="fakePhe" title="Phenotypes to simulate under the Null">
-        <param name="pName" type="text" label="Phenotype Name">
-        </param>
-      <conditional name="series">
-        <param name="phetype" type="select" label="Phenotype Distribution">
-          <option value="rnorm" selected="true">Random Normal variate</option>
-          <option value="unif">Random Uniform variate</option>
-          <option value="rgamma">Random Gamma variate</option>
-          <option value="weibull">Random Weibull variate</option>
-          <option value="exponential">Random exponential variate</option>
-          <option value="poisson">Random Poisson variate</option>
-          <option value="cat">Random categorical choice</option>
-        </param>
-        <when value="poisson">
-          <param name="lamb" type="integer" value="2" label="Lambda (mean and variance)" />
-        </when>
-        <when value="rnorm">
-          <param name="Mean" type="float" value="0.0" label="Mean" />
-          <param name="SD" type="float" label="SD" value="1.0"/>
-        </when>
-        <when value="exponential">
-          <param name="Mean" type="float" value="1.0" label="Mean" help="lambda for the exponential will be 1.0/Mean" />=
-        </when>
-        <when value="rgamma">
-          <param name="Alpha" type="float" value="10" label="Alpha">
-          </param>
-          <param name="Beta" type="float" label="Beta" value="1.0">
-          </param>
-        </when>
-        <when value="weibull">
-          <param name="Alpha" type="float" value="10" label="Alpha">
-          </param>
-          <param name="Beta" type="float" label="Beta" value="1.0">
-          </param>
-        </when>
-        <when value="unif">
-          <param name="low" type="float" value="0.0" label="Lowest uniform value">
-          </param>
-          <param name="hi" type="float" label="Highest uniform value" value="1.0"
-           help="A uniform value will be generated from the range specified (low to high) - eg 0.0 to 1.0">
-          </param>
-        </when>
-        <when value="cat">
-          <param name="values" type="text" value="A,B,C" label="Comma separated values to choose from"
-         help = "Each of the comma separated values will have an equal probability of being chosen - eg 'A1,A2,B1,B2'">
-          </param>
-        </when>
-      </conditional>
-    </repeat>
-    </page>
-</inputs>
-<outputs>
-       <data format="pphe" name="ppheout"  metadata_source="infile1" />
-</outputs>
-<configfiles>
-<configfile name="script_file">
-#for $n, $f in enumerate($fakePhe)
-#if $f.series.phetype=='rnorm'
-{'pN':'$f.pName','pT':'rnorm','pP':"{'Mean':'$f.series.Mean', 'SD':'$f.series.SD'}"}
-#elif $f.series.phetype=='rgamma'
-{'pN':'$f.pName','pT':'rgamma','pP':"{'Alpha':'$f.series.Alpha', 'Beta':'$f.series.Beta'}"}
-#elif $f.series.phetype=='poisson'
-{'pN':'$f.pName','pT':'poisson','pP':"{'lamb':'$f.series.lamb',}"}
-#elif $f.series.phetype=='exponential'
-{'pN':'$f.pName','pT':'exponential','pP':"{'Mean':'$f.series.Mean',}"}
-#elif $f.series.phetype=='weibull'
-{'pN':'$f.pName','pT':'weibull','pP':"{'Alpha':'$f.series.Alpha', 'Beta':'$f.series.Beta'}"}
-#elif $f.series.phetype=='cat'
-{'pN':'$f.pName','pT':'$f.series.phetype','pP':"{'values':'$f.series.values'}"}
-#elif $f.series.phetype=='unif'
-{'pN':'$f.pName','pT':'$f.series.phetype','pP':"{'low':'$f.series.low','hi':'$f.series.hi'}"}
-#end if
-#end for
-</configfile>
-</configfiles>
-<help>
-.. class:: infomark
-
-This tool allows you to generate an arbitrary (sort of)
-synthetic phenotype file with measurements drawn from normal,
-gamma, weibull, exponential, uniform or categorical distributions. These are for testing under
-the null hypothesis of no association - the values are random but
-from user specified distributions.
-
-Two output files will appear - one for FBAT and the other for Plink since unfortunately,
-they have slightly differing requirements for the header row.
-
------
-
-.. class:: warningmark
-
-This tool is very experimental
-
------
-
-- **Pedigree** is a library pedigree file - the id's will be used in the synthetic null phenotypes
-- **Title** is a name to give to the output phenotype file
-
-On the next page, you can add an unlimited number of various kinds of phenotypes including choices for
-categorical ones or distributions with specific parameters
-
-Just keep using the "Add new phenotype" button to add new specifications until you're done.
-Use the Execute button to run the program and generate the null phenotype data.
-The new files will be available on the drop down lists for appropriate tools - eg the
-FBAT format one will be available if you run the FBAT modelling tool.
-
-**Attribution**
-Originally designed and written for the Rgenetics
-series of Galaxy tools, and
-copyright Ross Lazarus 2007 (ross period lazarus at gmail period com)
-Licensed under the terms of the LGPL
-as documented http://www.gnu.org/licenses/lgpl.html
-
-</help>
-</tool>
--- a/tools/rgenetics/rgtest.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,201 +0,0 @@
-#!/bin/sh
-# script to generate all functional test outputs for each rgenetics tool
-# could be run at installation to ensure all dependencies are in place?
-if test $# -lt 2
-then
-   echo "We need to agree on 2 parameters - GalaxyRoot and OutRoot - use paths to galaxy and galaxy to re-create all test outputs"
-   echo "or more prudently, galaxy and /tmp/foo for checking without updating all your test-data"
-   echo "Exiting with no changes"
-   exit 1
-fi
-if [ $1 ]
-then
-  GALAXYROOT=$1
-else
-  GALAXYROOT=`pwd`
-fi
-if [ $2 ]
-then
-  OUTROOT=$2
-else
-  OUTROOT=`pwd`
-  OUTROOT="$OUTROOT/test-data"
-fi
-echo "using $GALAXYROOT as galaxyroot and $OUTROOT as outroot"
-# change this as needed for your local install
-INPATH="${GALAXYROOT}/test-data"
-JARPATH="${GALAXYROOT}/tool-data/shared/jars"
-TOOLPATH="${GALAXYROOT}/tools/rgenetics"
-OROOT="${OUTROOT}/test-data/rgtestouts"
-NORMALOROOT="${OUTROOT}/test-data"
-mkdir -p $OROOT
-rm -rf $OROOT/*
-# needed for testing - but tool versions should be bumped if this is rerun?
-TOOL="rgManQQ"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-CL="python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0"
-# rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col'
-# '$pval_col'
-#python /opt/galaxy/tools/rgenetics/rgManQQ.py /opt/galaxy/test-data/smallwgaP.xls rgManQQtest1
-#/opt/galaxy/test-data/rgtestouts/rgManQQ/rgManQQtest1.html /opt/galaxy/test-data/rgtestouts/rgManQQ 1 2 5,7
-echo "Testing $TOOL using $CL"
-python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0
-TOOL="rgfakePhe"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-PSSCRIPT="$OUTPATH/script_file"
-echo "{'pN':'normtest','pT':'rnorm','pP':\"{'Mean':'100', 'SD':'10'}\"}" > $PSSCRIPT
-echo "{'pN':'cattest','pT':'cat','pP':\"{'values':'red,green,blue'}\"}" >> $PSSCRIPT
-echo "{'pN':'uniftest','pT':'$f.series.phetype','pP':\"{'low':'1','hi':'100'}\"}" >> $PSSCRIPT
-echo "{'pN':'gammatest','pT':'rgamma','pP':\"{'Alpha':'1', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "{'pN':'poissontest','pT':'poisson','pP':\"{'lamb':'1.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'exptest','pT':'exponential','pP':\"{'Mean':'100.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'weibtest','pT':'weibull','pP':\"{'Alpha':'1.0', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py ${INPATH}/tinywga $NPRE $NPRE.pphe $OUTPATH $PSSCRIPT
-#   <command interpreter="python">rgfakePhe.py '$infile1.extra_files_path/$infile1.metadata.base_name'
-#   "$title1" '$ppheout' '$ppheout.files_path' '$script_file'
-#
-#
-TOOL="rgQC"
-NPRE=${TOOL}test1
-echo "now doing $TOOL"
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-python $TOOLPATH/$TOOL.py -i "$INPATH/tinywga" -o $NPRE -s ${OUTPATH}/${NPRE}.html -p $OUTPATH
-# rgQC.py -i '$input_file.extra_files_path/$input_file.metadata.base_name' -o "$out_prefix"
-# -s '$html_file' -p '$html_file.files_path'
-#
-TOOL="rgGRR"
-NPRE=${TOOL}test1
-echo "now doing $TOOL"
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-CMD="python $TOOLPATH/$TOOL.py "$INPATH/tinywga" "tinywga" $OUTPATH/${NPRE}.html $OUTPATH "$NPRE" 100 6 true"
-echo "doing $CMD"
-$CMD
-# rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
-#'$out_file1' '$out_file1.files_path' "$title"  '$n' '$Z' '$force'
-#
-TOOL="rgLDIndep"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-python $TOOLPATH/$TOOL.py "$INPATH" "tinywga" "$NPRE" 1 1 0 0 1 1 $OUTPATH/${NPRE}.pbed $OUTPATH 10000 5000 0.1
-#rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
-# '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1'
-#'$out_file1.files_path'  '$window' '$step' '$r2'
-TOOL="rgPedSub"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-PSSCRIPT="$OUTPATH/pedsub.script"
-echo "title~~~~$NPRE" > $PSSCRIPT
-echo "output1~~~~${OUTPATH}/${NPRE}.lped" >> $PSSCRIPT
-echo "outformat~~~~lped" >> $PSSCRIPT
-echo "basename~~~~tinywga" >> $PSSCRIPT
-echo "inped~~~~$INPATH/tinywga" >> $PSSCRIPT
-echo "outdir~~~~$OUTPATH" >> $PSSCRIPT
-echo "region~~~~" >> $PSSCRIPT
-echo "relfilter~~~~all" >> $PSSCRIPT
-echo "rslist~~~~rs2283802Xrs2267000Xrs16997606Xrs4820537Xrs3788347Xrs756632Xrs4820539Xrs2283804Xrs2267006Xrs4822363X" >> $PSSCRIPT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py $PSSCRIPT
-rm -rf $PSSCRIPT
-#
-TOOL="rgfakePhe"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-PSSCRIPT="$OUTPATH/script_file"
-echo "{'pN':'normtest','pT':'rnorm','pP':\"{'Mean':'100', 'SD':'10'}\"}" > $PSSCRIPT
-echo "{'pN':'cattest','pT':'cat','pP':\"{'values':'red,green,blue'}\"}" >> $PSSCRIPT
-echo "{'pN':'uniftest','pT':'$f.series.phetype','pP':\"{'low':'1','hi':'100'}\"}" >> $PSSCRIPT
-echo "{'pN':'gammatest','pT':'rgamma','pP':\"{'Alpha':'1', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "{'pN':'poissontest','pT':'poisson','pP':\"{'lamb':'1.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'exptest','pT':'exponential','pP':\"{'Mean':'100.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'weibtest','pT':'weibull','pP':\"{'Alpha':'1.0', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py $PSSCRIPT
-#
-echo "Now doing rgclean"
-TOOL="rgClean"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-python $TOOLPATH/$TOOL.py $INPATH "tinywga" "$NPRE" 1 1 0 0 1 1 $OUTPATH/${NPRE}.pbed $OUTPATH 0 0 0 0
-# rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
-#        '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' '$out_file1.files_path'
-#        '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' '$relfilter' '$afffilter' '$sexfilter' '$fixaff'
-#
-echo "Now doing rgEigPCA"
-TOOL="rgEigPCA"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-python $TOOLPATH/$TOOL.py "$INPATH/tinywga" "$NPRE" ${OUTPATH}/${NPRE}.html $OUTPATH 4 2 2 2 $OUTPATH/rgEigPCAtest1.txt
-#    rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1"
-#    "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca"
-#
-TOOL="rgfakePed"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py --title "$NPRE" -o $OUTPATH/${NPRE}.lped -p $OUTPATH -c "20" -n "40" -s "10" -w "0" -v "0" -l "pbed" -d "T" -m "0" -M "0"
-#rgfakePed.py --title '$title1'
-#  -o '$out_file1' -p '$out_file1.extra_files_path' -c '$ncases' -n '$ntotal'
-#  -s '$nsnp'  -w '$lowmaf' -v '$missingValue' -l '$outFormat'
-#  -d '$mafdist' -m '$missingRate' -M '$mendelRate'
-#
-TOOL="rgHaploView"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-mkdir $OUTPATH
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py ""  "rs2283802Xrs2267000Xrs16997606Xrs4820537Xrs3788347Xrs756632Xrs4820539Xrs2283804Xrs2267006Xrs4822363X" \
-"$NPRE" $OUTPATH/${NPRE}.html  "$INPATH" "tinywga" 0.0 200000 "RSQ" "lo" "2048" "$OUTPATH" "noinfo" "0.8" "YRI" $JARPATH/haploview.jar
-#  rgHaploView.py "$ucsc_region" "$rslist" "$title" "$output1"
-#  "$lhistIn.extra_files_path" "$lhistIn.metadata.base_name"
-#  "$minmaf" "$maxdist" "$ldtype" "$hires" "$memsize" "$output1.files_path"
-#  "$infoTrack" "$tagr2" "$hmpanel" ${GALAXY_DATA_INDEX_DIR}/rg/bin/haploview.jar
-# note these statistical tools do NOT generate composite outputs
-TOOL="rgGLM"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py "$INPATH/tinywga" $INPATH/tinywga "$NPRE" "c1" "" $OUTPATH/${NPRE}_GLM.xls \
-$OUTPATH/${NPRE}_GLM_log.txt "tinywga" "" "" "" 1 1 0 0 $OUTPATH/${NPRE}_GLM_topTable.gff
-##        rgGLM.py '$i.extra_files_path/$i.metadata.base_name' '$phef.extra_files_path/$phef.metadata.base_name'
-##        "$title1" '$predvar' '$covar' '$out_file1' '$logf' '$dbkey' '$i.metadata.base_name'
-##        '$inter' '$cond' '$gender' '$mind' '$geno' '$maf' '$logistic' '$gffout'
-#
-TOOL="rgTDT"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py -i "$INPATH/tinywga"  -o "$NPRE" -r $OUTPATH/${NPRE}_TDT.xls \
--l $OUTPATH/${NPRE}_TDT_log.txt -g $OUTPATH/${NPRE}_TDT_topTable.gff
-##        rgTDT.py -i '$infile.extra_files_path/$infile.metadata.base_name' -o '$title'
-##        -r '$out_file1' -l '$logf' -x '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
-##        -g '$gffout'
-#
-TOOL="rgCaCo"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-echo "now doing $TOOL"
-python $TOOLPATH/rgCaCo.py $INPATH/tinywga "$NPRE" $OUTPATH/${NPRE}_CaCo.xls $OUTPATH/${NPRE}_CaCo_log.txt $OUTPATH $OUTPATH/${NPRE}_CaCo_topTable.gff
-# rgCaCo.py '$i.extra_files_path/$i.metadata.base_name' "$name"  '$out_file1' '$logf' '$logf.files_path' '$gffout'
-#
-TOOL="rgQQ"
-echo "now doing $TOOL"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-CL="python $TOOLPATH/$TOOL.py "$INPATH/tinywga.pphe" "$NPRE" 1 3 $OUTPATH/$NPRE.pdf 8 10 "false" 1 $OUTPATH"
-echo "running $TOOL using $CL"
-python $TOOLPATH/$TOOL.py "$INPATH/tinywga.pphe" "$NPRE" 1 3 $OUTPATH/$NPRE.pdf 8 10 "false" 1 $OUTPATH
-# rgQQ.py "$input1" "$name" $sample "$cols" $allqq $height $width $log $allqq.id $__new_file_path__
-#
--- a/tools/rgenetics/rgtest_one_tool.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,224 +0,0 @@
-#!/bin/sh
-# script to generate all functional test outputs for each rgenetics tool
-# could be run at installation to ensure all dependencies are in place?
-case $# in 0) echo "USAGE: ${0##*/} TooltoTest galaxyRoot outRoot"; exit 1;;
-           [1-2]*) echo "Need ToolToTest and paths for galaxyRoot outRoot as parameters"; exit 2;;
-           [5-10]*) echo "Too many arguments - ToolToTest and paths for galaxyRoot outRoot as parameters"; exit 2;;
-           *)
-esac
-GALAXYROOT=$2
-OUTROOT=$3
-echo "using $GALAXYROOT"
-# change this as needed for your local install
-INPATH="${GALAXYROOT}/test-data"
-JARPATH="${GALAXYROOT}/tool-data/shared/jars"
-TOOLPATH="${GALAXYROOT}/tools/rgenetics"
-OROOT="${OUTROOT}/test-data/rgtestouts"
-NORMALOROOT="${OUTROOT}/test-data"
-case "$1" in
-'rgManQQ')
-
-TOOL="rgManQQ"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-CL="python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0"
-# rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col'
-# '$pval_col'
-#python /opt/galaxy/tools/rgenetics/rgManQQ.py /opt/galaxy/test-data/smallwgaP.xls rgManQQtest1
-#/opt/galaxy/test-data/rgtestouts/rgManQQ/rgManQQtest1.html /opt/galaxy/test-data/rgtestouts/rgManQQ 1 2 5,7
-echo "Testing $TOOL using $CL"
-python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0
-;;
-
-'rgfakePhe')
-TOOL="rgfakePhe"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-PSSCRIPT="$OUTPATH/script_file"
-echo "{'pN':'normtest','pT':'rnorm','pP':\"{'Mean':'100', 'SD':'10'}\"}" > $PSSCRIPT
-echo "{'pN':'cattest','pT':'cat','pP':\"{'values':'red,green,blue'}\"}" >> $PSSCRIPT
-echo "{'pN':'uniftest','pT':'$f.series.phetype','pP':\"{'low':'1','hi':'100'}\"}" >> $PSSCRIPT
-echo "{'pN':'gammatest','pT':'rgamma','pP':\"{'Alpha':'1', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "{'pN':'poissontest','pT':'poisson','pP':\"{'lamb':'1.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'exptest','pT':'exponential','pP':\"{'Mean':'100.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'weibtest','pT':'weibull','pP':\"{'Alpha':'1.0', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py ${INPATH}/tinywga $NPRE $NPRE.pphe $OUTPATH $PSSCRIPT
-#   <command interpreter="python">rgfakePhe.py '$infile1.extra_files_path/$infile1.metadata.base_name'
-#   "$title1" '$ppheout' '$ppheout.files_path' '$script_file'
-#
-;;
-'rgQC')
-
-TOOL="rgQC"
-NPRE=${TOOL}test1
-echo "now doing $TOOL"
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-CMD="python $TOOLPATH/$TOOL.py -i $INPATH/tinywga -o $NPRE -s ${OUTPATH}/${NPRE}.html -p $OUTPATH"
-echo "doing $CMD"
-$CMD
-# rgQC.py -i '$input_file.extra_files_path/$input_file.metadata.base_name' -o "$out_prefix"
-# -s '$html_file' -p '$html_file.files_path'
-#
-;;
-
-'rgGRR')
-TOOL="rgGRR"
-NPRE=${TOOL}test1
-echo "now doing $TOOL"
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-cmd="$TOOLPATH/$TOOL.py "$INPATH/tinywga" "tinywga" $OUTPATH/${NPRE}.html $OUTPATH "$NPRE" '100' '6' 'true'"
-echo "Doing $cmd"
-python $TOOLPATH/$TOOL.py "$INPATH/tinywga" "tinywga" $OUTPATH/${NPRE}.html $OUTPATH "$NPRE" '100' '6'
-# rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
-#'$out_file1' '$out_file1.files_path' "$title"  '$n' '$Z'
-;;
-'rgLDIndep')
-TOOL="rgLDIndep"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-python $TOOLPATH/$TOOL.py "$INPATH" "tinywga" "$NPRE" 1 1 0 0 1 1 $OUTPATH/${NPRE}.pbed $OUTPATH 10000 5000 0.1
-#rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
-# '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1'
-#'$out_file1.files_path'  '$window' '$step' '$r2'
-;;
-
-'rgPedSub')
-TOOL="rgPedSub"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-PSSCRIPT="$OUTPATH/pedsub.script"
-echo "title~~~~$NPRE" > $PSSCRIPT
-echo "output1~~~~${OUTPATH}/${NPRE}.lped" >> $PSSCRIPT
-echo "outformat~~~~lped" >> $PSSCRIPT
-echo "basename~~~~tinywga" >> $PSSCRIPT
-echo "inped~~~~$INPATH/tinywga" >> $PSSCRIPT
-echo "outdir~~~~$OUTPATH" >> $PSSCRIPT
-echo "region~~~~" >> $PSSCRIPT
-echo "relfilter~~~~all" >> $PSSCRIPT
-echo "rslist~~~~rs2283802Xrs2267000Xrs16997606Xrs4820537Xrs3788347Xrs756632Xrs4820539Xrs2283804Xrs2267006Xrs4822363X" >> $PSSCRIPT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py $PSSCRIPT
-rm -rf $PSSCRIPT
-;;
-
-'rgfakePhe')
-
-TOOL="rgfakePhe"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-PSSCRIPT="$OUTPATH/script_file"
-echo "{'pN':'normtest','pT':'rnorm','pP':\"{'Mean':'100', 'SD':'10'}\"}" > $PSSCRIPT
-echo "{'pN':'cattest','pT':'cat','pP':\"{'values':'red,green,blue'}\"}" >> $PSSCRIPT
-echo "{'pN':'uniftest','pT':'$f.series.phetype','pP':\"{'low':'1','hi':'100'}\"}" >> $PSSCRIPT
-echo "{'pN':'gammatest','pT':'rgamma','pP':\"{'Alpha':'1', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "{'pN':'poissontest','pT':'poisson','pP':\"{'lamb':'1.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'exptest','pT':'exponential','pP':\"{'Mean':'100.0',}\"}" >> $PSSCRIPT
-echo "{'pN':'weibtest','pT':'weibull','pP':\"{'Alpha':'1.0', 'Beta':'0.1'}\"}" >> $PSSCRIPT
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py $PSSCRIPT
-;;
-
-'rgClean')
-TOOL="rgClean"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-python $TOOLPATH/$TOOL.py $INPATH "tinywga" "$NPRE" 1 1 0 0 1 1 $OUTPATH/${NPRE}.pbed $OUTPATH 0 0 0 0
-# rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
-#        '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' '$out_file1.files_path'
-#        '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' '$relfilter' '$afffilter' '$sexfilter' '$fixaff'
-#
-;;
-
-'rgEigPCA')
-
-TOOL="rgEigPCA"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-python $TOOLPATH/$TOOL.py "$INPATH/tinywga" "$NPRE" ${OUTPATH}/${NPRE}.html $OUTPATH 4 2 2 2 $OUTPATH/rgEigPCAtest1.txt
-#    rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1"
-#    "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca"
-#
-;;
-
-'rgfakePed')
-TOOL="rgfakePed"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-echo "now doing $TOOL"
-python $TOOLPATH/$TOOL.py --title "$NPRE" -o $OUTPATH/${NPRE}.lped -p $OUTPATH -c "20" -n "40" -s "10" -w "0" -v "0" -l "pbed" -d "T" -m "0" -M "0"
-#rgfakePed.py --title '$title1'
-#  -o '$out_file1' -p '$out_file1.extra_files_path' -c '$ncases' -n '$ntotal'
-#  -s '$nsnp'  -w '$lowmaf' -v '$missingValue' -l '$outFormat'
-#  -d '$mafdist' -m '$missingRate' -M '$mendelRate'
-;;
-
-'rgHaploView')
-
-TOOL="rgHaploView"
-NPRE=${TOOL}test1
-OUTPATH="$OROOT/$TOOL"
-rm -rf $OUTPATH/*
-CL="python $TOOLPATH/$TOOL.py ''  'rs2283802 rs2267000 rs16997606 rs4820537 rs3788347 rs756632 rs4820539 rs2283804 rs2267006 rs4822363' '$NPRE' $OUTPATH/${NPRE}.html  '$INPATH' 'tinywga' 0.0 200000 'RSQ' 'lo' '2048' '$OUTPATH' 'noinfo' '0.8' 'YRI' $JARPATH/haploview.jar"
-echo "Testing $TOOL using $CL"
-python $TOOLPATH/$TOOL.py ""  "rs2283802 rs2267000 rs16997606 rs4820537 rs3788347 rs756632 rs4820539 rs2283804 rs2267006 rs4822363" \
-"$NPRE" $OUTPATH/${NPRE}.html  "$INPATH" "tinywga" 0.0 200000 "RSQ" "lo" "2048" "$OUTPATH" "noinfo" "0.8" "YRI" $JARPATH/haploview.jar
-#  rgHaploView.py "$ucsc_region" "$rslist" "$title" "$output1"
-#  "$lhistIn.extra_files_path" "$lhistIn.metadata.base_name"
-#  "$minmaf" "$maxdist" "$ldtype" "$hires" "$memsize" "$output1.files_path"
-#  "$infoTrack" "$tagr2" "$hmpanel" ${GALAXY_DATA_INDEX_DIR}/rg/bin/haploview.jar
-# note these statistical tools do NOT generate composite outputs
-;;
-
-'rgGLM')
-TOOL="rgGLM"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-python $TOOLPATH/$TOOL.py "$INPATH/tinywga" $INPATH/tinywga "$NPRE" "c1" "" $OUTPATH/${NPRE}_GLM.xls \
-$OUTPATH/${NPRE}_GLM_log.txt "tinywga" "" "" "" 1 1 0 0 $OUTPATH/${NPRE}_GLM_topTable.gff
-##        rgGLM.py '$i.extra_files_path/$i.metadata.base_name' '$phef.extra_files_path/$phef.metadata.base_name'
-##        "$title1" '$predvar' '$covar' '$out_file1' '$logf' '$dbkey' '$i.metadata.base_name'
-##        '$inter' '$cond' '$gender' '$mind' '$geno' '$maf' '$logistic' '$gffout'
-;;
-
-'rgTDT')
-TOOL="rgTDT"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-python $TOOLPATH/$TOOL.py -i "$INPATH/tinywga"  -o "$NPRE" -r $OUTPATH/${NPRE}_TDT.xls \
--l $OUTPATH/${NPRE}_TDT_log.txt -g $OUTPATH/${NPRE}_TDT_topTable.gff
-##        rgTDT.py -i '$infile.extra_files_path/$infile.metadata.base_name' -o '$title'
-##        -r '$out_file1' -l '$logf' -x '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
-##        -g '$gffout'
-;;
-
-'rgCaCo')
-TOOL="rgCaCo"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-echo "now doing $TOOL"
-python $TOOLPATH/rgCaCo.py $INPATH/tinywga "$NPRE" $OUTPATH/${NPRE}_CaCo.xls $OUTPATH/${NPRE}_CaCo_log.txt $OUTPATH $OUTPATH/${NPRE}_CaCo_topTable.gff
-# rgCaCo.py '$i.extra_files_path/$i.metadata.base_name' "$name"  '$out_file1' '$logf' '$logf.files_path' '$gffout'
-;;
-
-'rgQQ')
-TOOL="rgQQ"
-echo "now doing $TOOL"
-NPRE=${TOOL}test1
-OUTPATH=$NORMALOROOT
-CL="python $TOOLPATH/$TOOL.py "$INPATH/tinywga.pphe" "$NPRE" 1 3 $OUTPATH/$NPRE.pdf 8 10 "false" 1 $OUTPATH"
-echo "running $TOOL using $CL"
-python $TOOLPATH/$TOOL.py "$INPATH/tinywga.pphe" "$NPRE" 1 3 $OUTPATH/$NPRE.pdf 8 10 "false" 1 $OUTPATH
-# rgQQ.py "$input1" "$name" $sample "$cols" $allqq $height $width $log $allqq.id $__new_file_path__
-;;
-esac
--- a/tools/rgenetics/rgutils.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,618 +0,0 @@
-# utilities for rgenetics
-#
-# copyright 2009 ross lazarus
-# released under the LGPL
-#
-
-import subprocess, os, sys, time, tempfile,string,plinkbinJZ
-import datetime
-
-galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
-<title></title>
-<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
-</head>
-<body>
-<div class="document">
-"""
-galhtmlattr = """<h3><a href="http://rgenetics.org">Rgenetics</a> tool %s run at %s</h3>"""
-galhtmlpostfix = """</div></body></html>\n"""
-
-plinke = 'plink' # changed jan 2010 - all exes must be on path
-rexe = 'R'       # to avoid cluster/platform dependencies
-smartpca = 'smartpca.perl'
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-def timestamp():
-    return datetime.datetime.now().strftime('%Y%m%d%H%M%S')
-
-def fail( message ):
-    print >> sys.stderr, message
-    return -1
-
-def whereis(program):
-    for path in os.environ.get('PATH', '').split(':'):
-        if os.path.exists(os.path.join(path, program)) and \
-           not os.path.isdir(os.path.join(path, program)):
-            return os.path.join(path, program)
-    return None
-
-
-def bedToPicInterval(infile=None):
-    """
-    Picard tools requiring targets want
-    a sam style header which incidentally, MUST be sorted in natural order - not lexicographic order:
-
-    @SQ     SN:chrM LN:16571
-    @SQ     SN:chr1 LN:247249719
-    @SQ     SN:chr2 LN:242951149
-    @SQ     SN:chr3 LN:199501827
-    @SQ     SN:chr4 LN:191273063
-    added to the start of what looks like a bed style file
-    chr1    67052400        67052451        -       CCDS635.1_cds_0_0_chr1_67052401_r
-    chr1    67060631        67060788        -       CCDS635.1_cds_1_0_chr1_67060632_r
-    chr1    67065090        67065317        -       CCDS635.1_cds_2_0_chr1_67065091_r
-    chr1    67066082        67066181        -       CCDS635.1_cds_3_0_chr1_67066083_r
-
-
-    see http://genome.ucsc.edu/FAQ/FAQtracks.html#tracks1
-    we need to add 1 to start coordinates on the way through - but length calculations are easier
-    """
-    # bedToPicard.py
-    # ross lazarus October 2010
-    # LGPL
-    # for Rgenetics
-
-    def getFlen(bedfname=None):
-        """
-        find all features in a BED file and sum their lengths
-        """
-        features = {}
-        try:
-            infile = open(bedfname,'r')
-        except:
-            print '###ERROR: getFlen unable to open bedfile %s' % bedfname
-            sys.exit(1)
-        for i,row in enumerate(infile):
-            if row[0] == '@': # shouldn't happen given a bed file!
-                print 'row %d=%s - should NOT start with @!' % (i,row)
-                sys.exit(1)
-        row = row.strip()
-        if len(row) > 0:
-            srow = row.split('\t')
-            f = srow[0]
-            spos = srow[1] # zero based from UCSC so no need to add 1 - eg 0-100 is 100 bases numbered 0-99 (!)
-            epos = srow[2] # see http://genome.ucsc.edu/FAQ/FAQtracks.html#tracks1
-            flen = int(epos) - int(spos)
-            features.setdefault(f,0)
-            features[f] += flen
-        infile.close()
-        return features
-
-    def keynat(string):
-        '''
-        borrowed from http://code.activestate.com/recipes/285264-natural-string-sorting/
-        A natural sort helper function for sort() and sorted()
-        without using regular expressions or exceptions.
-
-        >>> items = ('Z', 'a', '10th', '1st', '9')
-        >>> sorted(items)
-        ['10th', '1st', '9', 'Z', 'a']
-        >>> sorted(items, key=keynat)
-        ['1st', '9', '10th', 'a', 'Z']
-        '''
-        it = type(1)
-        r = []
-        for c in string:
-            if c.isdigit():
-                d = int(c)
-                if r and type( r[-1] ) == it:
-                    r[-1] = r[-1] * 10 + d
-                else:
-                    r.append(d)
-            else:
-                r.append(c.lower())
-        return r
-
-    def writePic(outfname=None,bedfname=None):
-        """
-        collect header info and rewrite bed with header for picard
-        """
-        featlen = getFlen(bedfname=bedfname)
-        try:
-            outf = open(outfname,'w')
-        except:
-            print '###ERROR: writePic unable to open output picard file %s' % outfname
-            sys.exit(1)
-        infile = open(bedfname,'r') # already tested in getFlen
-        k = featlen.keys()
-        fk = sorted(k, key=keynat)
-        header = ['@SQ\tSN:%s\tLN:%d' % (x,featlen[x]) for x in fk]
-        outf.write('\n'.join(header))
-        outf.write('\n')
-        for row in infile:
-            row = row.strip()
-            if len(row) > 0: # convert zero based start coordinate to 1 based
-                srow = row.split('\t')
-                srow[1] = '%d' % (int(srow[1])+1) # see http://genome.ucsc.edu/FAQ/FAQtracks.html#tracks1
-                outf.write('\t'.join(srow))
-                outf.write('\n')
-        outf.close()
-        infile.close()
-
-
-
-    # bedToPicInterval starts here
-    fd,outf = tempfile.mkstemp(prefix='rgPicardHsMetrics')
-    writePic(outfname=outf,bedfname=infile)
-    return outf
-
-
-def getFileString(fpath, outpath):
-    """
-    format a nice file size string
-    """
-    size = ''
-    fp = os.path.join(outpath, fpath)
-    s = '? ?'
-    if os.path.isfile(fp):
-        n = float(os.path.getsize(fp))
-        if n > 2**20:
-            size = ' (%1.1f MB)' % (n/2**20)
-        elif n > 2**10:
-            size = ' (%1.1f KB)' % (n/2**10)
-        elif n > 0:
-            size = ' (%d B)' % (int(n))
-        s = '%s %s' % (fpath, size)
-    return s
-
-
-def fixPicardOutputs(tempout=None,output_dir=None,log_file=None,html_output=None,progname=None,cl=[],transpose=True):
-    """
-    picard produces long hard to read tab header files
-    make them available but present them transposed for readability
-    """
-    rstyle="""<style type="text/css">
-    tr.d0 td {background-color: oldlace; color: black;}
-    tr.d1 td {background-color: aliceblue; color: black;}
-    </style>"""
-    cruft = []
-    dat = []
-    try:
-        r = open(tempout,'r').readlines()
-    except:
-        r = []
-    for row in r:
-        if row.strip() > '':
-            srow = row.split('\t')
-            if row[0] == '#':
-                cruft.append(row.strip()) # want strings
-            else:
-                dat.append(srow) # want lists
-
-    res = [rstyle,]
-    res.append(galhtmlprefix % progname)
-    res.append(galhtmlattr % (progname,timenow()))
-    flist = os.listdir(output_dir)
-    pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
-    if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs
-        for p in pdflist:
-            imghref = '%s.jpg' % os.path.splitext(p)[0] # removes .pdf
-            res.append('<table cellpadding="10"><tr><td>\n')
-            res.append('<a href="%s"><img src="%s" alt="Click thumbnail to download %s" hspace="10" align="middle"></a>\n' % (p,imghref,p))
-            res.append('</tr></td></table>\n')
-    res.append('<b>Your job produced the following output files.</b><hr/>\n')
-    res.append('<table>\n')
-    for i,f in enumerate(flist):
-         fn = os.path.split(f)[-1]
-         res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn))
-    res.append('</table><p/>\n')
-    if len(cruft) + len(dat) > 0:
-        res.append('<b>Picard on line resources</b><ul>\n')
-        res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n')
-        res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n')
-        if transpose:
-            res.append('<b>Picard output (transposed for readability)</b><hr/>\n')
-        else:
-            res.append('<b>Picard output</b><hr/>\n')
-        res.append('<table cellpadding="3" >\n')
-        if len(cruft) > 0:
-            cres = ['<tr class="d%d"><td>%s</td></tr>' % (i % 2,x) for i,x in enumerate(cruft)]
-            res += cres
-        if len(dat) > 0:
-            maxrows = 100
-            if transpose:
-                tdat = map(None,*dat) # transpose an arbitrary list of lists
-                missing = len(tdat) - maxrows
-                tdat = ['<tr class="d%d"><td>%s</td><td>%s</td></tr>\n' % ((i+len(cruft)) % 2,x[0],x[1]) for i,x in enumerate(tdat) if i < maxrows]
-                if len(tdat) > maxrows:
-                   tdat.append('<tr><td colspan="2">...WARNING: %d rows deleted for sanity...see raw files for all rows</td></tr>' % missing)
-            else:
-                tdat = ['<tr class="d%d"><td>%s</td></tr>\n' % ((i+len(cruft)) % 2,x) for i,x in enumerate(dat) if i < maxrows]
-                if len(dat) > maxrows:
-                    missing = len(dat) - maxrows
-                    tdat.append('<tr><td>...WARNING: %d rows deleted for sanity...see raw files for all rows</td></tr>' % missing)
-            res += tdat
-        res.append('</table>\n')
-    else:
-        res.append('<b>No Picard output found - please consult the Picard log above for an explanation</b>')
-    l = open(log_file,'r').readlines()
-    if len(l) > 0:
-        res.append('<b>Picard log</b><hr/>\n')
-        rlog = ['<pre>',]
-        rlog += l
-        rlog.append('</pre>')
-        res += rlog
-    else:
-        res.append("Odd, Picard left no log file %s - must have really barfed badly?" % log_file)
-    res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
-    res.append( 'generated all outputs reported here, using this command line:<br/>\n<pre>%s</pre>\n' % ''.join(cl))
-    res.append(galhtmlpostfix)
-    outf = open(html_output,'w')
-    outf.write(''.join(res))
-    outf.write('\n')
-    outf.close()
-
-def keynat(string):
-    '''
-    borrowed from http://code.activestate.com/recipes/285264-natural-string-sorting/
-    A natural sort helper function for sort() and sorted()
-    without using regular expressions or exceptions.
-
-    >>> items = ('Z', 'a', '10th', '1st', '9')
-    >>> sorted(items)
-    ['10th', '1st', '9', 'Z', 'a']
-    >>> sorted(items, key=keynat)
-    ['1st', '9', '10th', 'a', 'Z']
-    '''
-    it = type(1)
-    r = []
-    for c in string:
-        if c.isdigit():
-            d = int(c)
-            if r and type( r[-1] ) == it:
-                r[-1] = r[-1] * 10 + d
-            else:
-                r.append(d)
-        else:
-            r.append(c.lower())
-    return r
-
-def getFlen(bedfname=None):
-    """
-    find all features in a BED file and sum their lengths
-    """
-    features = {}
-    otherHeaders = []
-    try:
-        infile = open(bedfname,'r')
-    except:
-        print '###ERROR: getFlen unable to open bedfile %s' % bedfname
-        sys.exit(1)
-    for i,row in enumerate(infile):
-        if row.startswith('@'): # add to headers if not @SQ
-            if not row.startswith('@SQ'):
-                otherHeaders.append(row)
-        else:
-            row = row.strip()
-            if row.startswith('#') or row.lower().startswith('browser') or row.lower().startswith('track'):
-                continue # ignore headers
-            srow = row.split('\t')
-            if len(srow) > 3:
-                srow = row.split('\t')
-                f = srow[0]
-                spos = srow[1] # zero based from UCSC so no need to add 1 - eg 0-100 is 100 bases numbered 0-99 (!)
-                epos = srow[2] # see http://genome.ucsc.edu/FAQ/FAQtracks.html#tracks1
-                flen = int(epos) - int(spos)
-                features.setdefault(f,0)
-                features[f] += flen
-    infile.close()
-    fk = features.keys()
-    fk = sorted(fk, key=keynat)
-    return features,fk,otherHeaders
-
-def bedToPicInterval(infile=None,outfile=None):
-    """
-    Picard tools requiring targets want
-    a sam style header which incidentally, MUST be sorted in natural order - not lexicographic order:
-
-    @SQ     SN:chrM LN:16571
-    @SQ     SN:chr1 LN:247249719
-    @SQ     SN:chr2 LN:242951149
-    @SQ     SN:chr3 LN:199501827
-    @SQ     SN:chr4 LN:191273063
-    added to the start of what looks like a bed style file
-    chr1    67052400        67052451        -       CCDS635.1_cds_0_0_chr1_67052401_r
-    chr1    67060631        67060788        -       CCDS635.1_cds_1_0_chr1_67060632_r
-    chr1    67065090        67065317        -       CCDS635.1_cds_2_0_chr1_67065091_r
-    chr1    67066082        67066181        -       CCDS635.1_cds_3_0_chr1_67066083_r
-
-    see http://genome.ucsc.edu/FAQ/FAQtracks.html#tracks1
-    we need to add 1 to start coordinates on the way through - but length calculations are easier
-    """
-    # bedToPicard.py
-    # ross lazarus October 2010
-    # LGPL
-    # for Rgenetics
-    """
-    collect header info and rewrite bed with header for picard
-    """
-    featlen,fk,otherHeaders = getFlen(bedfname=infile)
-    try:
-        outf = open(outfile,'w')
-    except:
-        print '###ERROR: writePic unable to open output picard file %s' % outfile
-        sys.exit(1)
-    inf = open(infile,'r') # already tested in getFlen
-    header = ['@SQ\tSN:%s\tLN:%d' % (x,featlen[x]) for x in fk]
-    if len(otherHeaders) > 0:
-        header += otherHeaders
-    outf.write('\n'.join(header))
-    outf.write('\n')
-    for row in inf:
-        row = row.strip()
-        if len(row) > 0: # convert zero based start coordinate to 1 based
-            if row.startswith('@'):
-                continue
-            else:
-                srow = row.split('\t')
-                srow[1] = '%d' % (int(srow[1])+1) # see http://genome.ucsc.edu/FAQ/FAQtracks.html#tracks1
-                outf.write('\t'.join(srow))
-                outf.write('\n')
-    outf.close()
-    inf.close()
-
-
-def oRRun(rcmd=[],outdir=None,title='myR',rexe='R'):
-    """
-    run an r script, lines in rcmd,
-    in a temporary directory
-    move everything, r script and all back to outdir which will be an html file
-
-
-      # test
-      RRun(rcmd=['print("hello cruel world")','q()'],title='test')
-    """
-    rlog = []
-    print '### rexe = %s' % rexe
-    assert os.path.isfile(rexe)
-    rname = '%s.R' % title
-    stoname = '%s.R.log' % title
-    rfname = rname
-    stofname = stoname
-    if outdir: # want a specific path
-        rfname = os.path.join(outdir,rname)
-        stofname = os.path.join(outdir,stoname)
-        try:
-        	os.makedirs(outdir) # might not be there yet...
-        except:
-            pass
-    else:
-        outdir = tempfile.mkdtemp(prefix=title)
-        rfname = os.path.join(outdir,rname)
-        stofname = os.path.join(outdir,stoname)
-        rmoutdir = True
-    f = open(rfname,'w')
-    if type(rcmd) == type([]):
-        f.write('\n'.join(rcmd))
-    else: # string
-        f.write(rcmd)
-    f.write('\n')
-    f.close()
-    sto = file(stofname,'w')
-    vcl = [rexe,"--vanilla --slave", '<', rfname ]
-    x = subprocess.Popen(' '.join(vcl),shell=True,stderr=sto,stdout=sto,cwd=outdir)
-    retval = x.wait()
-    sto.close()
-    rlog = file(stofname,'r').readlines()
-    rlog.insert(0,'## found R at %s' % rexe)
-    if outdir <> None:
-        flist = os.listdir(outdir)
-    else:
-        flist = os.listdir('.')
-    flist.sort
-    flist = [(x,x) for x in flist]
-    for i,x in enumerate(flist):
-        if x == rname:
-            flist[i] = (x,'R script for %s' % title)
-        elif x == stoname:
-            flist[i] = (x,'R log for %s' % title)
-    if False and rmoutdir:
-        os.removedirs(outdir)
-    return rlog,flist # for html layout
-
-
-
-
-def RRun(rcmd=[],outdir=None,title='myR',tidy=True):
-    """
-    run an r script, lines in rcmd,
-    in a temporary directory
-    move everything, r script and all back to outdir which will be an html file
-
-
-      # test
-      RRun(rcmd=['print("hello cruel world")','q()'],title='test')
-    echo "a <- c(5, 5); b <- c(0.5, 0.5)" | cat - RScript.R | R --slave \ --vanilla
-    suggested by http://tolstoy.newcastle.edu.au/R/devel/05/09/2448.html
-    """
-    killme = string.punctuation + string.whitespace
-    trantab = string.maketrans(killme,'_'*len(killme))
-    title = title.translate(trantab)
-    rlog = []
-    tempout=False
-    rname = '%s.R' % title
-    stoname = '%s.R.log' % title
-    cwd = os.getcwd()
-    if outdir: # want a specific path
-        try:
-            os.makedirs(outdir) # might not be there yet...
-        except:
-            pass
-        os.chdir(outdir)
-    if type(rcmd) == type([]):
-        script = '\n'.join(rcmd)
-    else: # string
-        script = rcmd
-    sto = file(stoname,'w')
-    rscript = file(rname,'w')
-    rscript.write(script)
-    rscript.write('\n#R script autogenerated by rgenetics/rgutils.py on %s\n' % timenow())
-    rscript.close()
-    vcl = '%s --slave --vanilla < %s' %  (rexe,rname)
-    if outdir:
-        x = subprocess.Popen(vcl,shell=True,stderr=sto,stdout=sto,cwd=outdir)
-    else:
-        x = subprocess.Popen(vcl,shell=True,stderr=sto,stdout=sto)
-    retval = x.wait()
-    sto.close()
-    rlog = file(stoname,'r').readlines()
-    if retval <> 0:
-        rlog.insert(0,'Nonzero exit code = %d' % retval) # indicate failure
-    if outdir:
-        flist = os.listdir(outdir)
-    else:
-        flist = os.listdir(os.getcwd())
-    flist.sort
-    flist = [(x,x) for x in flist]
-    for i,x in enumerate(flist):
-        if x == rname:
-            flist[i] = (x,'R script for %s' % title)
-        elif x == stoname:
-            flist[i] = (x,'R log for %s' % title)
-    if outdir:
-        os.chdir(cwd)
-    return rlog,flist # for html layout
-
-def runPlink(bfn='bar',ofn='foo',logf=None,plinktasks=[],cd='./',vclbase = []):
-    """run a series of plink tasks and append log results to stdout
-    vcl has a list of parameters for the spawnv
-    common settings can all go in the vclbase list and are added to each plinktask
-    """
-    # root for all
-    fplog,plog = tempfile.mkstemp()
-    if type(logf) == type('  '): # open otherwise assume is file - ugh I'm in a hurry
-    	mylog = file(logf,'a+')
-    else:
-        mylog = logf
-    mylog.write('## Rgenetics: http://rgenetics.org Galaxy Tools rgQC.py Plink runner\n')
-    for task in plinktasks: # each is a list
-        vcl = vclbase + task
-        sto = file(plog,'w')
-        x = subprocess.Popen(' '.join(vcl),shell=True,stdout=sto,stderr=sto,cwd=cd)
-        retval = x.wait()
-        sto.close()
-        try:
-            lplog = file(plog,'r').read()
-            mylog.write(lplog)
-            os.unlink(plog) # no longer needed
-        except:
-            mylog.write('### %s Strange - no std out from plink when running command line\n%s' % (timenow(),' '.join(vcl)))
-
-def pruneLD(plinktasks=[],cd='./',vclbase = []):
-    """
-    plink blathers when doing pruning - ignore
-    Linkage disequilibrium based SNP pruning
-    if a million snps in 3 billion base pairs, have mean 3k spacing
-    assume 40-60k of ld in ceu, a window of 120k width is about 40 snps
-    so lots more is perhaps less efficient - each window computational cost is
-    ON^2 unless the code is smart enough to avoid unecessary computation where
-    allele frequencies make it impossible to see ld > the r^2 cutoff threshold
-    So, do a window and move forward 20?
-    The fine Plink docs at http://pngu.mgh.harvard.edu/~purcell/plink/summary.shtml#prune
-    reproduced below
-
-Sometimes it is useful to generate a pruned subset of SNPs that are in approximate linkage equilibrium with each other. This can be achieved via two commands:
---indep which prunes based on the variance inflation factor (VIF), which recursively removes SNPs within a sliding window; second, --indep-pairwise which is
-similar, except it is based only on pairwise genotypic correlation.
-
-Hint The output of either of these commands is two lists of SNPs: those that are pruned out and those that are not. A separate command using the --extract or
---exclude option is necessary to actually perform the pruning.
-
-The VIF pruning routine is performed:
-plink --file data --indep 50 5 2
-
-will create files
-
-     plink.prune.in
-     plink.prune.out
-
-Each is a simlpe list of SNP IDs; both these files can subsequently be specified as the argument for
-a --extract or --exclude command.
-
-The parameters for --indep are: window size in SNPs (e.g. 50), the number of SNPs to shift the
-window at each step (e.g. 5), the VIF threshold. The VIF is 1/(1-R^2) where R^2 is the multiple correlation coefficient for a SNP being regressed on all other
-SNPs simultaneously. That is, this considers the correlations between SNPs but also between linear combinations of SNPs. A VIF of 10 is often taken to represent
-near collinearity problems in standard multiple regression analyses (i.e. implies R^2 of 0.9). A VIF of 1 would imply that the SNP is completely independent of
-all other SNPs. Practically, values between 1.5 and 2 should probably be used; particularly in small samples, if this threshold is too low and/or the window
-size is too large, too many SNPs may be removed.
-
-The second procedure is performed:
-plink --file data --indep-pairwise 50 5 0.5
-
-This generates the same output files as the first version; the only difference is that a
-simple pairwise threshold is used. The first two parameters (50 and 5) are the same as above (window size and step); the third parameter represents the r^2
-threshold. Note: this represents the pairwise SNP-SNP metric now, not the multiple correlation coefficient; also note, this is based on the genotypic
-correlation, i.e. it does not involve phasing.
-
-To give a concrete example: the command above that specifies 50 5 0.5 would a) consider a
-window of 50 SNPs, b) calculate LD between each pair of SNPs in the window, b) remove one of a pair of SNPs if the LD is greater than 0.5, c) shift the window 5
-SNPs forward and repeat the procedure.
-
-To make a new, pruned file, then use something like (in this example, we also convert the
-standard PED fileset to a binary one):
-plink --file data --extract plink.prune.in --make-bed --out pruneddata
-    """
-    fplog,plog = tempfile.mkstemp()
-    alog = []
-    alog.append('## Rgenetics: http://rgenetics.org Galaxy Tools rgQC.py Plink pruneLD runner\n')
-    for task in plinktasks: # each is a list
-        vcl = vclbase + task
-        sto = file(plog,'w')
-        x = subprocess.Popen(' '.join(vcl),shell=True,stdout=sto,stderr=sto,cwd=cd)
-        retval = x.wait()
-        sto.close()
-        try:
-            lplog = file(plog,'r').readlines()
-            lplog = [x for x in lplog if x.find('Pruning SNP') == -1]
-            alog += lplog
-            alog.append('\n')
-            os.unlink(plog) # no longer needed
-        except:
-            alog.append('### %s Strange - no std out from plink when running command line\n%s\n' % (timenow(),' '.join(vcl)))
-    return alog
-
-def readMap(mapfile=None,allmarkers=False,rsdict={},c=None,spos=None,epos=None):
-    """abstract out - keeps reappearing
-    """
-    mfile = open(mapfile, 'r')
-    markers = []
-    snpcols = {}
-    snpIndex = 0 # in case empty or comment lines
-    for rownum,row in enumerate(mfile):
-        line = row.strip()
-        if not line or line[0]=='#': continue
-        chrom, snp, genpos, abspos = line.split()[:4] # just in case more cols
-        try:
-            abspos = int(abspos)
-        except:
-            abspos = 0 # stupid framingham data grumble grumble
-        if allmarkers or rsdict.get(snp,None) or (chrom == c and (spos <= abspos <= epos)):
-            markers.append((chrom,abspos,snp)) # decorate for sort into genomic
-            snpcols[snp] = snpIndex # so we know which col to find genos for this marker
-            snpIndex += 1
-    markers.sort()
-    rslist = [x[2] for x in markers] # drop decoration
-    rsdict = dict(zip(rslist,rslist))
-    mfile.close()
-    return markers,snpcols,rslist,rsdict
-
-
Binary file tools/rgenetics/rgutils.pyc has changed
--- a/tools/rgenetics/test.eps	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,851 +0,0 @@
-%!PS-Adobe-3.0 EPSF-3.0
-%%Title:        Sequence Logo: sdfs
-%%Creator:      WebLogo 3.1 (2011-02-16)
-%%CreationDate: 2011-10-15 16:47:38.275112
-%%BoundingBox:  0  0  281  92
-%%Pages: 0
-%%DocumentFonts:
-%%EndComments
-
-
-% ---- VARIABLES ----
-
-/True   true def
-/False  false def
-
-/debug              False        def
-
-/logo_height        92  def
-/logo_width         281  def
-/logo_title         (sdfs) def
-/show_title         True def
-
-/logo_margin        2 def
-/xaxis_label_height 6.0 def
-/title_height       12 def
-/stroke_width       0.5 def
-/tic_length         5 def
-
-/lines_per_logo     1 def
-/line_width         277.6 def
-/line_height        70.0 def
-/line_margin_left   30.0 def
-/line_margin_right  10 def
-/line_margin_bottom 12.0 def
-/line_margin_top    4 def
-
-/stack_width         10.8 def
-/stack_height        54.0 def
-/stacks_per_line     22 def
-/stack_margin        0.5 def
-
-/show_yaxis             True def
-/show_yaxis_label       True def
-/yaxis_label            (bits) def
-/yaxis_scale          2.0 def              % height in units
-/yaxis_tic_interval     1.0 def           % in units
-/yaxis_minor_tic_interval 0.2 def   % in units
-
-/show_xaxis_label       False def             % True or False
-/show_xaxis             True def                   % True or False
-/xaxis_label            () def
-/xaxis_tic_interval     1 def
-/rotate_numbers         False def               % True or False
-/number_interval        5 def
-/show_ends              False def
-/end_type               (-) def          % d: DNA, p: PROTEIN, -: none
-
-/show_fineprint         True def
-/fineprint              (WebLogo 3.1) def
-/logo_label             () def
-
-/show_boxes             False def    % True or False
-/shrink                 false def    % True or False
-/shrink_fraction        0.5 def
-
-/show_errorbars         True def      % True or False
-/errorbar_fraction      0.9 def
-/errorbar_width_fraction  0.25 def
-/errorbar_gray          0.75 def
-
-/fontsize               10 def
-/small_fontsize         6 def
-/title_fontsize         12 def
-/number_fontsize        8 def
-
-
-/UseCIEColor true def       % Fix for issue 4
-/default_color [ 0.0 0.0 0.0 ] def
-/color_dict <<
-  (T) [ 1.0 0.549019607843 0.0 ]
-  (A) [ 1.0 0.549019607843 0.0 ]
-  (U) [ 1.0 0.549019607843 0.0 ]
-  (G) [ 0.0 0.0 1.0 ]
-  (C) [ 0.0 0.0 1.0 ]
->> def
-
-
-
-% ---- DERIVED PARAMETERS ----
-
-/char_width stack_width 2 stack_margin mul sub def
-/char_width2 char_width 2 div def
-/char_width4 char_width 4 div def
-
-% movements to place 5'/N and 3'/C symbols
-/leftEndDeltaX  fontsize neg         def
-/leftEndDeltaY  fontsize 1.25 mul neg def
-/rightEndDeltaX fontsize 0.25 mul     def
-/rightEndDeltaY leftEndDeltaY        def
-
-
-% ---- PROCEDURES ----
-
-
-/SetTitleFont {/ArialMT findfont title_fontsize scalefont setfont} bind def
-/SetLogoFont  {/Arial-BoldMT findfont char_width  scalefont setfont} bind def
-/SetStringFont{/ArialMT findfont fontsize scalefont setfont} bind def
-/SetPrimeFont {/Symbol findfont fontsize scalefont setfont} bind def
-/SetSmallFont {/ArialMT findfont small_fontsize scalefont setfont} bind def
-/SetNumberFont {/ArialMT findfont number_fontsize scalefont setfont} bind def
-
-/DrawBox { % width height
-    /hh exch def
-    /ww exch def
-    gsave
-        0.2 setlinewidth
-        %0.5 setgray
-
-        %0 0 moveto
-        hh 0 rlineto
-        0 ww rlineto
-        hh neg 0 rlineto
-        0 ww neg rlineto
-        stroke
-    grestore
-} bind def
-
-
-/StartLogo {
-  %save
-  gsave
-
-
-  debug {
-    logo_margin logo_margin moveto
-    logo_height logo_margin 2 mul sub
-    logo_width logo_margin 2 mul sub
-    DrawBox } if
-
-  show_title { DrawTitle } if
-  show_xaxis_label { DrawXaxisLable } if
-  show_fineprint { DrawFineprint } if
-  DrawLogoLabel
-
-
-  MoveToFirstLine
-} bind def
-
-
-/DrawLogoLabel {
-  gsave
-    SetTitleFont
-
-    logo_margin
-    logo_height title_fontsize sub logo_margin sub
-    moveto
-
-    debug { title_fontsize logo_label stringwidth pop DrawBox } if
-    0 title_fontsize 4 div rmoveto % Move up to baseline (approximatly)
-    logo_label show
-
-  grestore
-} bind def
-
-/DrawTitle {
-  gsave
-    SetTitleFont
-
-    logo_width 2 div logo_title stringwidth pop 2 div sub
-    logo_height title_fontsize sub logo_margin sub
-    moveto
-
-    debug { title_fontsize logo_title stringwidth pop DrawBox } if
-
-    0 title_fontsize 4 div rmoveto % Move up to baseline (approximatly)
-    logo_title show
-
-  grestore
-} bind def
-
-/DrawXaxisLable {
-  % Print X-axis label, bottom center
-  gsave
-    SetStringFont
-
-    logo_width 2 div xaxis_label stringwidth pop 2 div sub
-    xaxis_label_height logo_margin add fontsize sub
-    moveto
-    %fontsize 3 div
-
-    debug { fontsize xaxis_label stringwidth pop DrawBox } if
-
-    xaxis_label show
-
-  grestore
-} bind def
-
-
-/DrawFineprint {
-    gsave
-
-    SetSmallFont
-
-    logo_width fineprint stringwidth pop sub
-        logo_margin sub line_margin_right sub
-    logo_margin
-    moveto
-
-    debug { small_fontsize fineprint stringwidth pop DrawBox } if
-
-    fineprint show
-    grestore
-} bind def
-
-/MoveToFirstLine {
-    logo_margin
-    logo_height logo_margin sub title_height sub line_height sub
-    moveto
-} bind def
-
-/EndLogo {
-  grestore
-  %showpage
-  %restore
-} bind def
-
-
-/StartLine{
-    gsave
-
-    % Draw outer box
-    debug { line_height line_width DrawBox } if
-
-    % Move to lower left corner of content area
-    line_margin_left line_margin_bottom rmoveto
-
-    % Draw inner content box
-    debug {
-        line_height line_margin_bottom sub line_margin_top sub
-        line_width line_margin_left sub line_margin_right sub
-        DrawBox
-    } if
-
-    show_yaxis { DrawYaxis } if
-    show_xaxis { DrawLeftEnd } if
-
-} bind def
-
-/EndLine{
-    show_xaxis { DrawRightEnd } if
-    grestore
-    0 line_height neg rmoveto
-} bind def
-
-
-/DrawYaxis {
-  gsave
-    stack_margin neg 0 translate
-    DrawYaxisBar
-    DrawYaxisLabel
-  grestore
-} bind def
-
-
-/DrawYaxisBar {
-  gsave
-    stack_margin neg 0 rmoveto
-
-    SetNumberFont
-    stroke_width setlinewidth
-
-    /str 10 string def % string to hold number
-    /smallgap stack_margin  def
-
-    % Draw first tic and bar
-    gsave
-      tic_length neg 0 rmoveto
-      tic_length 0 rlineto
-      0 stack_height rlineto
-      stroke
-    grestore
-
-    % Draw the tics
-    % initial increment limit proc for
-    0 yaxis_tic_interval yaxis_scale abs
-    {/loopnumber exch def
-
-      % convert the number coming from the loop to a string
-      % and find its width
-      loopnumber 10 str cvrs
-      /stringnumber exch def % string representing the number
-
-      stringnumber stringwidth pop
-      /numberwidth exch def % width of number to show
-
-      /halfnumberheight
-         stringnumber CharBoxHeight 2 div
-      def
-
-      gsave
-        numberwidth % move back width of number
-        neg loopnumber stack_height yaxis_scale div mul % shift on y axis
-        halfnumberheight sub % down half the digit
-        rmoveto % move back the width of the string
-
-        tic_length neg smallgap sub % Move back a bit more
-        0 rmoveto % move back the width of the tic
-
-        stringnumber show
-        smallgap 0 rmoveto % Make a small gap
-
-        % now show the tic mark
-        0 halfnumberheight rmoveto % shift up again
-        tic_length 0 rlineto
-        stroke
-     grestore
-    } for
-
-    % Draw the minor tics
-    % initial increment limit proc for
-    0 yaxis_minor_tic_interval yaxis_scale abs
-    {/loopnumber2 exch def
-      gsave
-        0
-        loopnumber2 stack_height yaxis_scale div mul
-        rmoveto
-
-        tic_length 2 div neg 0 rlineto
-        stroke
-     grestore
-    } for
-
-  grestore
-} bind def
-
-/DrawYaxisLabel {
-  gsave
-    SetStringFont
-
-    % How far we move left depends on the size of
-    % the tic labels.
-    /str 10 string def % string to hold number
-    yaxis_scale yaxis_tic_interval div cvi yaxis_tic_interval mul
-    str cvs stringwidth pop
-    tic_length 1.25 mul  add neg
-
-    stack_height
-    yaxis_label stringwidth pop
-    sub 2 div
-
-    rmoveto
-    90 rotate
-
-    yaxis_label show
-  grestore
-} bind def
-
-
-%Take a single character and return the bounding box
-/CharBox { % <char> CharBox <lx> <ly> <ux> <uy>
-  gsave
-    newpath
-    0 0 moveto
-    % take the character off the stack and use it here:
-    true charpath
-    flattenpath
-    pathbbox % compute bounding box of 1 pt. char => lx ly ux uy
-    % the path is here, but toss it away ...
-  grestore
-} bind def
-
-
-% The height of a characters bounding box
-/CharBoxHeight { % <char> CharBoxHeight <num>
-  CharBox
-  exch pop sub neg exch pop
-} bind def
-
-
-% The width of a characters bounding box
-/CharBoxWidth { % <char> CharBoxHeight <num>
-  CharBox
-  pop exch pop sub neg
-} bind def
-
-
-/DrawLeftEnd {
-  gsave
-    SetStringFont
-    leftEndDeltaX leftEndDeltaY rmoveto
-
-    show_ends {
-        debug { leftEndDeltaY neg leftEndDeltaX neg DrawBox } if
-        end_type (d) eq {(5) show DrawPrime} if
-        end_type (p) eq {(N) show} if
-    } if
-  grestore
-} bind def
-
-/DrawRightEnd {
-  gsave
-    SetStringFont
-    rightEndDeltaX rightEndDeltaY rmoveto
-
-    show_ends {
-        debug { rightEndDeltaY neg leftEndDeltaX neg  DrawBox } if
-        end_type (d) eq {(3) show DrawPrime} if
-        end_type (p) eq {(C) show} if
-    } if
-  grestore
-} bind def
-
-/DrawPrime {
-  gsave
-    SetPrimeFont
-    (\242) show
-  grestore
-} bind def
-
-
-/StartStack {  % <stackNumber> startstack
-  show_xaxis {DrawNumber}{pop} ifelse
-  gsave
-  debug { stack_height stack_width DrawBox } if
-
-} bind def
-
-/EndStack {
-  grestore
-  stack_width 0 rmoveto
-} bind def
-
-
-/DrawNumber { % number MakeNumber
-    /n exch def
-
-
-  gsave
-    %0 stack_margin neg rmoveto
-    stroke_width setlinewidth
-    stack_width  0 rlineto
-    stack_width 2 div neg 0 rmoveto
-
-    n () eq
-    {  0 tic_length 4 div neg rlineto  }
-    { 0 tic_length 2 div neg rlineto }
-    ifelse
-
-    stroke
-  grestore
-
-
-
-  gsave
-    n
-    SetNumberFont
-    stack_width 2 div tic_length 2 div neg rmoveto
-
-    rotate_numbers {
-        90 rotate
-        dup stringwidth pop neg % find the length of the number
-        stack_margin sub        % Move down a bit
-        (0) CharBoxHeight 2 div neg % left half height of numbers
-        rmoveto
-        show
-    } {
-        dup stringwidth pop neg 2 div number_fontsize neg  rmoveto
-        show
-    } ifelse
-
-
-
-  grestore
-} bind def
-
-
-
-% Draw a character whose height is proportional to symbol bits
-/ShowSymbol{ % interval character ShowSymbol
-    /char exch def
-    /interval exch def
-    /fraction_width exch def
-
-    /char_height
-       interval yaxis_scale div stack_height mul
-       stack_margin sub
-       dup
-       % if char_height is negative or very small replace with zero
-       % BUG FIX: This used to be '0.0 gt' but it seems that DrawHeight
-       % has a finite, non-zero minimum, which results in a rangecheck error
-       0.001 gt {}{pop 0.0} ifelse
-    def
-
-    char_height 0.0 gt {
-        show_boxes {
-            gsave
-                /ww char_height stack_margin add def
-                /hh stack_width def
-                stroke_width setlinewidth
-                hh 0 rlineto
-                0 ww rlineto
-                hh neg 0 rlineto
-                0 ww neg rlineto
-                stroke
-            grestore
-        } if
-
-        gsave
-            stack_margin stack_margin rmoveto
-            debug { char_height char_width DrawBox } if
-            1 fraction_width sub char_width mul 2 div  0 rmoveto
-            fraction_width char_width mul char_height char DrawChar
-        grestore
-
-    } if
-    0 interval yaxis_scale div stack_height mul rmoveto
-} bind def
-
-
-/DrawChar { % <width> <height> <char> ShowChar
-    /tc exch def    % The character
-    /ysize exch def % the y size of the character
-    /xsize exch def % the x size of the character
-    /xmulfactor 1 def
-    /ymulfactor 1 def
-
-    gsave
-        SetLogoFont
-        tc SetColor
-
-        % IReplacementHack
-        % Deal with the lack of bars on the letter 'I' in Arial and Helvetica
-        % by replacing with 'I' from Courier.
-        tc (I) eq {
-            /Courier findfont char_width  scalefont setfont
-        } if
-
-
-        shrink {
-            xsize 1 shrink_fraction sub 2 div mul
-            ysize 1 shrink_fraction sub 2 div mul rmoveto
-            shrink_fraction shrink_fraction scale
-        } if
-
-        % Calculate the font scaling factors
-        % Loop twice to catch small correction due to first scaling
-        2 {
-            gsave
-            xmulfactor ymulfactor scale
-
-            ysize % desired size of character in points
-            tc CharBoxHeight
-            dup 0.0 ne {
-                div % factor by which to scale up the character
-                /ymulfactor exch def
-            } {pop pop} ifelse
-
-            xsize % desired size of character in points
-            tc CharBoxWidth
-            dup 0.0 ne {
-                div % factor by which to scale up the character
-                /xmulfactor exch def
-            } {pop pop} ifelse
-            grestore
-        } repeat
-
-
-
-        % Draw the character
-        xmulfactor ymulfactor scale
-        % Move lower left corner of character to start point
-        tc CharBox pop pop % llx lly : Lower left corner
-        exch neg exch neg
-        rmoveto
-
-        tc show
-
-    grestore
-} bind def
-
-/SetColor{ % <char> SetColor
-  dup color_dict exch known {
-    color_dict exch get aload pop setrgbcolor
-  } {
-    pop
-    default_color aload pop setrgbcolor
-  } ifelse
-} bind def
-
-
-/DrawErrorbar{ % interval_down interval_up DrawErrorbar
-
-    gsave
-    /points_per_unit stack_height yaxis_scale div def
-    /height_up   exch points_per_unit mul def
-    /height_down exch points_per_unit mul def
-
-    show_errorbars {
-
-    stroke_width setlinewidth
-    errorbar_gray setgray
-    stack_width 2 div 0 rmoveto
-
-    /errorbar_width char_width errorbar_width_fraction mul def
-    /errorbar_width2 errorbar_width 2 div def
-
-    gsave
-        0 height_down neg rmoveto
-        errorbar_width2 neg 0 rlineto
-        errorbar_width 0 rlineto
-        errorbar_width2 neg 0 rlineto
-        0 height_down errorbar_fraction mul rlineto
-        stroke
-    grestore
-
-    gsave
-        0 height_up  rmoveto
-        errorbar_width2 neg 0 rlineto
-        errorbar_width 0 rlineto
-        errorbar_width2 neg 0 rlineto
-        0 height_up neg errorbar_fraction mul rlineto
-        stroke
-    grestore
-    } if
-
-    grestore
-
-} bind def
-
-/DrawErrorbarFirst{ % interval_down interval_up center DrawErrorbarFirst
-    gsave
-    /points_per_unit stack_height yaxis_scale div def
-    /center   exch points_per_unit mul def
-
-    0 center rmoveto
-    DrawErrorbar
-    grestore
-} bind def
-
-%%EndProlog
-
-%%Page: 1 1
-
-% Example Data
-%StartLogo
-%    StartLine
-%        (1) StartStack
-%            1.2 (C) ShowSymbol
-%            2.2 (I) ShowSymbol
-%            0.5 0.5 DrawErrorbar
-%        EndStack
-%        (2) StartStack
-%            0.5 (I) ShowSymbol
-%            0.9 (L) ShowSymbol
-%            1.0 (G) ShowSymbol
-%
-%            0.5 0.5 DrawErrorbar
-%        EndStack
-%        (234) StartStack
-%        EndStack
-%        (235) StartStack
-%        EndStack
-%    EndLine
-%EndLogo
-
-StartLogo
-
-StartLine
-() StartStack
- 1.000000 0.010108 (C) ShowSymbol
- 1.000000 0.040431 (G) ShowSymbol
- 1.000000 0.212261 (T) ShowSymbol
- 1.000000 0.232476 (A) ShowSymbol
- 0.232248 0.232248 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.015152 (C) ShowSymbol
- 1.000000 0.045455 (G) ShowSymbol
- 1.000000 0.136365 (T) ShowSymbol
- 1.000000 0.174245 (A) ShowSymbol
- 0.218101 0.218101 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.016616 (C) ShowSymbol
- 1.000000 0.037386 (G) ShowSymbol
- 1.000000 0.074773 (T) ShowSymbol
- 1.000000 0.074773 (A) ShowSymbol
- 0.169220 0.169220 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (A) ShowSymbol
- 1.000000 0.038953 (G) ShowSymbol
- 1.000000 0.155812 (C) ShowSymbol
- 1.000000 0.759583 (T) ShowSymbol
- 0.326656 0.326656 DrawErrorbar
-EndStack
-
-(5) StartStack
- 1.000000 0.019459 (C) ShowSymbol
- 1.000000 0.038917 (A) ShowSymbol
- 1.000000 0.116752 (T) ShowSymbol
- 1.000000 0.778345 (G) ShowSymbol
- 0.350333 0.350333 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.021020 (A) ShowSymbol
- 1.000000 0.168160 (C) ShowSymbol
- 1.000000 0.840802 (T) ShowSymbol
- 0.325915 0.325915 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (C) ShowSymbol
- 1.000000 0.083359 (T) ShowSymbol
- 1.000000 0.083359 (A) ShowSymbol
- 1.000000 0.854432 (G) ShowSymbol
- 0.347959 0.347959 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (T) ShowSymbol
- 1.000000 0.070036 (G) ShowSymbol
- 1.000000 0.070036 (C) ShowSymbol
- 1.000000 1.003846 (A) ShowSymbol
- 0.356819 0.356819 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.026714 (A) ShowSymbol
- 1.000000 0.040070 (G) ShowSymbol
- 1.000000 0.044523 (C) ShowSymbol
- 1.000000 0.106855 (T) ShowSymbol
- 0.196056 0.196056 DrawErrorbar
-EndStack
-
-(10) StartStack
- 1.000000 0.014496 (A) ShowSymbol
- 1.000000 0.016107 (G) ShowSymbol
- 1.000000 0.020939 (T) ShowSymbol
- 1.000000 0.027382 (C) ShowSymbol
- 0.078924 0.106593 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.018308 (G) ShowSymbol
- 1.000000 0.032954 (A) ShowSymbol
- 1.000000 0.054923 (C) ShowSymbol
- 1.000000 0.073231 (T) ShowSymbol
- 0.164679 0.164679 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.015253 (C) ShowSymbol
- 1.000000 0.021790 (T) ShowSymbol
- 1.000000 0.032685 (A) ShowSymbol
- 1.000000 0.037043 (G) ShowSymbol
- 0.106770 0.125094 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.022457 (C) ShowSymbol
- 1.000000 0.028072 (T) ShowSymbol
- 1.000000 0.028072 (A) ShowSymbol
- 1.000000 0.058950 (G) ShowSymbol
- 0.137551 0.153378 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.013621 (G) ShowSymbol
- 1.000000 0.021404 (C) ShowSymbol
- 1.000000 0.029188 (T) ShowSymbol
- 1.000000 0.031133 (A) ShowSymbol
- 0.095346 0.115803 DrawErrorbar
-EndStack
-
-(15) StartStack
- 1.000000 0.033669 (C) ShowSymbol
- 1.000000 0.067338 (A) ShowSymbol
- 1.000000 0.078561 (G) ShowSymbol
- 1.000000 0.370360 (T) ShowSymbol
- 0.303054 0.303054 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.056955 (T) ShowSymbol
- 1.000000 0.132896 (A) ShowSymbol
- 1.000000 0.740420 (C) ShowSymbol
- 0.331433 0.331433 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.014884 (C) ShowSymbol
- 1.000000 0.044653 (T) ShowSymbol
- 1.000000 0.148844 (G) ShowSymbol
- 1.000000 0.520953 (A) ShowSymbol
- 0.310748 0.310748 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.088853 (T) ShowSymbol
- 1.000000 0.126932 (A) ShowSymbol
- 1.000000 0.406183 (C) ShowSymbol
- 0.268423 0.268423 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.043760 (C) ShowSymbol
- 1.000000 0.065640 (T) ShowSymbol
- 1.000000 0.065640 (G) ShowSymbol
- 1.000000 0.361019 (A) ShowSymbol
- 0.304415 0.304415 DrawErrorbar
-EndStack
-
-(20) StartStack
- 1.000000 0.021502 (G) ShowSymbol
- 1.000000 0.027646 (C) ShowSymbol
- 1.000000 0.036861 (A) ShowSymbol
- 1.000000 0.064506 (T) ShowSymbol
- 0.150515 0.158545 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.015671 (C) ShowSymbol
- 1.000000 0.282073 (A) ShowSymbol
- 1.000000 0.470122 (T) ShowSymbol
- 0.247172 0.247172 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.022864 (G) ShowSymbol
- 1.000000 0.040011 (C) ShowSymbol
- 1.000000 0.080022 (A) ShowSymbol
- 1.000000 0.137181 (T) ShowSymbol
- 0.209363 0.209363 DrawErrorbar
-EndStack
-
-EndLine
-
-EndLogo
-
-
-%%EOF
-
--- a/tools/rgenetics/test.pdf	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,851 +0,0 @@
-%!PS-Adobe-3.0 EPSF-3.0
-%%Title:        Sequence Logo: rgWebLogo3
-%%Creator:      WebLogo 3.1 (2011-02-16)
-%%CreationDate: 2011-10-15 16:48:55.480094
-%%BoundingBox:  0  0  281  92
-%%Pages: 0
-%%DocumentFonts:
-%%EndComments
-
-
-% ---- VARIABLES ----
-
-/True   true def
-/False  false def
-
-/debug              False        def
-
-/logo_height        92  def
-/logo_width         281  def
-/logo_title         (rgWebLogo3) def
-/show_title         True def
-
-/logo_margin        2 def
-/xaxis_label_height 6.0 def
-/title_height       12 def
-/stroke_width       0.5 def
-/tic_length         5 def
-
-/lines_per_logo     1 def
-/line_width         277.6 def
-/line_height        70.0 def
-/line_margin_left   30.0 def
-/line_margin_right  10 def
-/line_margin_bottom 12.0 def
-/line_margin_top    4 def
-
-/stack_width         10.8 def
-/stack_height        54.0 def
-/stacks_per_line     22 def
-/stack_margin        0.5 def
-
-/show_yaxis             True def
-/show_yaxis_label       True def
-/yaxis_label            (bits) def
-/yaxis_scale          2.0 def              % height in units
-/yaxis_tic_interval     1.0 def           % in units
-/yaxis_minor_tic_interval 0.2 def   % in units
-
-/show_xaxis_label       False def             % True or False
-/show_xaxis             True def                   % True or False
-/xaxis_label            () def
-/xaxis_tic_interval     1 def
-/rotate_numbers         False def               % True or False
-/number_interval        5 def
-/show_ends              False def
-/end_type               (-) def          % d: DNA, p: PROTEIN, -: none
-
-/show_fineprint         True def
-/fineprint              (WebLogo 3.1) def
-/logo_label             () def
-
-/show_boxes             False def    % True or False
-/shrink                 false def    % True or False
-/shrink_fraction        0.5 def
-
-/show_errorbars         True def      % True or False
-/errorbar_fraction      0.9 def
-/errorbar_width_fraction  0.25 def
-/errorbar_gray          0.75 def
-
-/fontsize               10 def
-/small_fontsize         6 def
-/title_fontsize         12 def
-/number_fontsize        8 def
-
-
-/UseCIEColor true def       % Fix for issue 4
-/default_color [ 0.0 0.0 0.0 ] def
-/color_dict <<
-  (T) [ 1.0 0.549019607843 0.0 ]
-  (A) [ 1.0 0.549019607843 0.0 ]
-  (U) [ 1.0 0.549019607843 0.0 ]
-  (G) [ 0.0 0.0 1.0 ]
-  (C) [ 0.0 0.0 1.0 ]
->> def
-
-
-
-% ---- DERIVED PARAMETERS ----
-
-/char_width stack_width 2 stack_margin mul sub def
-/char_width2 char_width 2 div def
-/char_width4 char_width 4 div def
-
-% movements to place 5'/N and 3'/C symbols
-/leftEndDeltaX  fontsize neg         def
-/leftEndDeltaY  fontsize 1.25 mul neg def
-/rightEndDeltaX fontsize 0.25 mul     def
-/rightEndDeltaY leftEndDeltaY        def
-
-
-% ---- PROCEDURES ----
-
-
-/SetTitleFont {/ArialMT findfont title_fontsize scalefont setfont} bind def
-/SetLogoFont  {/Arial-BoldMT findfont char_width  scalefont setfont} bind def
-/SetStringFont{/ArialMT findfont fontsize scalefont setfont} bind def
-/SetPrimeFont {/Symbol findfont fontsize scalefont setfont} bind def
-/SetSmallFont {/ArialMT findfont small_fontsize scalefont setfont} bind def
-/SetNumberFont {/ArialMT findfont number_fontsize scalefont setfont} bind def
-
-/DrawBox { % width height
-    /hh exch def
-    /ww exch def
-    gsave
-        0.2 setlinewidth
-        %0.5 setgray
-
-        %0 0 moveto
-        hh 0 rlineto
-        0 ww rlineto
-        hh neg 0 rlineto
-        0 ww neg rlineto
-        stroke
-    grestore
-} bind def
-
-
-/StartLogo {
-  %save
-  gsave
-
-
-  debug {
-    logo_margin logo_margin moveto
-    logo_height logo_margin 2 mul sub
-    logo_width logo_margin 2 mul sub
-    DrawBox } if
-
-  show_title { DrawTitle } if
-  show_xaxis_label { DrawXaxisLable } if
-  show_fineprint { DrawFineprint } if
-  DrawLogoLabel
-
-
-  MoveToFirstLine
-} bind def
-
-
-/DrawLogoLabel {
-  gsave
-    SetTitleFont
-
-    logo_margin
-    logo_height title_fontsize sub logo_margin sub
-    moveto
-
-    debug { title_fontsize logo_label stringwidth pop DrawBox } if
-    0 title_fontsize 4 div rmoveto % Move up to baseline (approximatly)
-    logo_label show
-
-  grestore
-} bind def
-
-/DrawTitle {
-  gsave
-    SetTitleFont
-
-    logo_width 2 div logo_title stringwidth pop 2 div sub
-    logo_height title_fontsize sub logo_margin sub
-    moveto
-
-    debug { title_fontsize logo_title stringwidth pop DrawBox } if
-
-    0 title_fontsize 4 div rmoveto % Move up to baseline (approximatly)
-    logo_title show
-
-  grestore
-} bind def
-
-/DrawXaxisLable {
-  % Print X-axis label, bottom center
-  gsave
-    SetStringFont
-
-    logo_width 2 div xaxis_label stringwidth pop 2 div sub
-    xaxis_label_height logo_margin add fontsize sub
-    moveto
-    %fontsize 3 div
-
-    debug { fontsize xaxis_label stringwidth pop DrawBox } if
-
-    xaxis_label show
-
-  grestore
-} bind def
-
-
-/DrawFineprint {
-    gsave
-
-    SetSmallFont
-
-    logo_width fineprint stringwidth pop sub
-        logo_margin sub line_margin_right sub
-    logo_margin
-    moveto
-
-    debug { small_fontsize fineprint stringwidth pop DrawBox } if
-
-    fineprint show
-    grestore
-} bind def
-
-/MoveToFirstLine {
-    logo_margin
-    logo_height logo_margin sub title_height sub line_height sub
-    moveto
-} bind def
-
-/EndLogo {
-  grestore
-  %showpage
-  %restore
-} bind def
-
-
-/StartLine{
-    gsave
-
-    % Draw outer box
-    debug { line_height line_width DrawBox } if
-
-    % Move to lower left corner of content area
-    line_margin_left line_margin_bottom rmoveto
-
-    % Draw inner content box
-    debug {
-        line_height line_margin_bottom sub line_margin_top sub
-        line_width line_margin_left sub line_margin_right sub
-        DrawBox
-    } if
-
-    show_yaxis { DrawYaxis } if
-    show_xaxis { DrawLeftEnd } if
-
-} bind def
-
-/EndLine{
-    show_xaxis { DrawRightEnd } if
-    grestore
-    0 line_height neg rmoveto
-} bind def
-
-
-/DrawYaxis {
-  gsave
-    stack_margin neg 0 translate
-    DrawYaxisBar
-    DrawYaxisLabel
-  grestore
-} bind def
-
-
-/DrawYaxisBar {
-  gsave
-    stack_margin neg 0 rmoveto
-
-    SetNumberFont
-    stroke_width setlinewidth
-
-    /str 10 string def % string to hold number
-    /smallgap stack_margin  def
-
-    % Draw first tic and bar
-    gsave
-      tic_length neg 0 rmoveto
-      tic_length 0 rlineto
-      0 stack_height rlineto
-      stroke
-    grestore
-
-    % Draw the tics
-    % initial increment limit proc for
-    0 yaxis_tic_interval yaxis_scale abs
-    {/loopnumber exch def
-
-      % convert the number coming from the loop to a string
-      % and find its width
-      loopnumber 10 str cvrs
-      /stringnumber exch def % string representing the number
-
-      stringnumber stringwidth pop
-      /numberwidth exch def % width of number to show
-
-      /halfnumberheight
-         stringnumber CharBoxHeight 2 div
-      def
-
-      gsave
-        numberwidth % move back width of number
-        neg loopnumber stack_height yaxis_scale div mul % shift on y axis
-        halfnumberheight sub % down half the digit
-        rmoveto % move back the width of the string
-
-        tic_length neg smallgap sub % Move back a bit more
-        0 rmoveto % move back the width of the tic
-
-        stringnumber show
-        smallgap 0 rmoveto % Make a small gap
-
-        % now show the tic mark
-        0 halfnumberheight rmoveto % shift up again
-        tic_length 0 rlineto
-        stroke
-     grestore
-    } for
-
-    % Draw the minor tics
-    % initial increment limit proc for
-    0 yaxis_minor_tic_interval yaxis_scale abs
-    {/loopnumber2 exch def
-      gsave
-        0
-        loopnumber2 stack_height yaxis_scale div mul
-        rmoveto
-
-        tic_length 2 div neg 0 rlineto
-        stroke
-     grestore
-    } for
-
-  grestore
-} bind def
-
-/DrawYaxisLabel {
-  gsave
-    SetStringFont
-
-    % How far we move left depends on the size of
-    % the tic labels.
-    /str 10 string def % string to hold number
-    yaxis_scale yaxis_tic_interval div cvi yaxis_tic_interval mul
-    str cvs stringwidth pop
-    tic_length 1.25 mul  add neg
-
-    stack_height
-    yaxis_label stringwidth pop
-    sub 2 div
-
-    rmoveto
-    90 rotate
-
-    yaxis_label show
-  grestore
-} bind def
-
-
-%Take a single character and return the bounding box
-/CharBox { % <char> CharBox <lx> <ly> <ux> <uy>
-  gsave
-    newpath
-    0 0 moveto
-    % take the character off the stack and use it here:
-    true charpath
-    flattenpath
-    pathbbox % compute bounding box of 1 pt. char => lx ly ux uy
-    % the path is here, but toss it away ...
-  grestore
-} bind def
-
-
-% The height of a characters bounding box
-/CharBoxHeight { % <char> CharBoxHeight <num>
-  CharBox
-  exch pop sub neg exch pop
-} bind def
-
-
-% The width of a characters bounding box
-/CharBoxWidth { % <char> CharBoxHeight <num>
-  CharBox
-  pop exch pop sub neg
-} bind def
-
-
-/DrawLeftEnd {
-  gsave
-    SetStringFont
-    leftEndDeltaX leftEndDeltaY rmoveto
-
-    show_ends {
-        debug { leftEndDeltaY neg leftEndDeltaX neg DrawBox } if
-        end_type (d) eq {(5) show DrawPrime} if
-        end_type (p) eq {(N) show} if
-    } if
-  grestore
-} bind def
-
-/DrawRightEnd {
-  gsave
-    SetStringFont
-    rightEndDeltaX rightEndDeltaY rmoveto
-
-    show_ends {
-        debug { rightEndDeltaY neg leftEndDeltaX neg  DrawBox } if
-        end_type (d) eq {(3) show DrawPrime} if
-        end_type (p) eq {(C) show} if
-    } if
-  grestore
-} bind def
-
-/DrawPrime {
-  gsave
-    SetPrimeFont
-    (\242) show
-  grestore
-} bind def
-
-
-/StartStack {  % <stackNumber> startstack
-  show_xaxis {DrawNumber}{pop} ifelse
-  gsave
-  debug { stack_height stack_width DrawBox } if
-
-} bind def
-
-/EndStack {
-  grestore
-  stack_width 0 rmoveto
-} bind def
-
-
-/DrawNumber { % number MakeNumber
-    /n exch def
-
-
-  gsave
-    %0 stack_margin neg rmoveto
-    stroke_width setlinewidth
-    stack_width  0 rlineto
-    stack_width 2 div neg 0 rmoveto
-
-    n () eq
-    {  0 tic_length 4 div neg rlineto  }
-    { 0 tic_length 2 div neg rlineto }
-    ifelse
-
-    stroke
-  grestore
-
-
-
-  gsave
-    n
-    SetNumberFont
-    stack_width 2 div tic_length 2 div neg rmoveto
-
-    rotate_numbers {
-        90 rotate
-        dup stringwidth pop neg % find the length of the number
-        stack_margin sub        % Move down a bit
-        (0) CharBoxHeight 2 div neg % left half height of numbers
-        rmoveto
-        show
-    } {
-        dup stringwidth pop neg 2 div number_fontsize neg  rmoveto
-        show
-    } ifelse
-
-
-
-  grestore
-} bind def
-
-
-
-% Draw a character whose height is proportional to symbol bits
-/ShowSymbol{ % interval character ShowSymbol
-    /char exch def
-    /interval exch def
-    /fraction_width exch def
-
-    /char_height
-       interval yaxis_scale div stack_height mul
-       stack_margin sub
-       dup
-       % if char_height is negative or very small replace with zero
-       % BUG FIX: This used to be '0.0 gt' but it seems that DrawHeight
-       % has a finite, non-zero minimum, which results in a rangecheck error
-       0.001 gt {}{pop 0.0} ifelse
-    def
-
-    char_height 0.0 gt {
-        show_boxes {
-            gsave
-                /ww char_height stack_margin add def
-                /hh stack_width def
-                stroke_width setlinewidth
-                hh 0 rlineto
-                0 ww rlineto
-                hh neg 0 rlineto
-                0 ww neg rlineto
-                stroke
-            grestore
-        } if
-
-        gsave
-            stack_margin stack_margin rmoveto
-            debug { char_height char_width DrawBox } if
-            1 fraction_width sub char_width mul 2 div  0 rmoveto
-            fraction_width char_width mul char_height char DrawChar
-        grestore
-
-    } if
-    0 interval yaxis_scale div stack_height mul rmoveto
-} bind def
-
-
-/DrawChar { % <width> <height> <char> ShowChar
-    /tc exch def    % The character
-    /ysize exch def % the y size of the character
-    /xsize exch def % the x size of the character
-    /xmulfactor 1 def
-    /ymulfactor 1 def
-
-    gsave
-        SetLogoFont
-        tc SetColor
-
-        % IReplacementHack
-        % Deal with the lack of bars on the letter 'I' in Arial and Helvetica
-        % by replacing with 'I' from Courier.
-        tc (I) eq {
-            /Courier findfont char_width  scalefont setfont
-        } if
-
-
-        shrink {
-            xsize 1 shrink_fraction sub 2 div mul
-            ysize 1 shrink_fraction sub 2 div mul rmoveto
-            shrink_fraction shrink_fraction scale
-        } if
-
-        % Calculate the font scaling factors
-        % Loop twice to catch small correction due to first scaling
-        2 {
-            gsave
-            xmulfactor ymulfactor scale
-
-            ysize % desired size of character in points
-            tc CharBoxHeight
-            dup 0.0 ne {
-                div % factor by which to scale up the character
-                /ymulfactor exch def
-            } {pop pop} ifelse
-
-            xsize % desired size of character in points
-            tc CharBoxWidth
-            dup 0.0 ne {
-                div % factor by which to scale up the character
-                /xmulfactor exch def
-            } {pop pop} ifelse
-            grestore
-        } repeat
-
-
-
-        % Draw the character
-        xmulfactor ymulfactor scale
-        % Move lower left corner of character to start point
-        tc CharBox pop pop % llx lly : Lower left corner
-        exch neg exch neg
-        rmoveto
-
-        tc show
-
-    grestore
-} bind def
-
-/SetColor{ % <char> SetColor
-  dup color_dict exch known {
-    color_dict exch get aload pop setrgbcolor
-  } {
-    pop
-    default_color aload pop setrgbcolor
-  } ifelse
-} bind def
-
-
-/DrawErrorbar{ % interval_down interval_up DrawErrorbar
-
-    gsave
-    /points_per_unit stack_height yaxis_scale div def
-    /height_up   exch points_per_unit mul def
-    /height_down exch points_per_unit mul def
-
-    show_errorbars {
-
-    stroke_width setlinewidth
-    errorbar_gray setgray
-    stack_width 2 div 0 rmoveto
-
-    /errorbar_width char_width errorbar_width_fraction mul def
-    /errorbar_width2 errorbar_width 2 div def
-
-    gsave
-        0 height_down neg rmoveto
-        errorbar_width2 neg 0 rlineto
-        errorbar_width 0 rlineto
-        errorbar_width2 neg 0 rlineto
-        0 height_down errorbar_fraction mul rlineto
-        stroke
-    grestore
-
-    gsave
-        0 height_up  rmoveto
-        errorbar_width2 neg 0 rlineto
-        errorbar_width 0 rlineto
-        errorbar_width2 neg 0 rlineto
-        0 height_up neg errorbar_fraction mul rlineto
-        stroke
-    grestore
-    } if
-
-    grestore
-
-} bind def
-
-/DrawErrorbarFirst{ % interval_down interval_up center DrawErrorbarFirst
-    gsave
-    /points_per_unit stack_height yaxis_scale div def
-    /center   exch points_per_unit mul def
-
-    0 center rmoveto
-    DrawErrorbar
-    grestore
-} bind def
-
-%%EndProlog
-
-%%Page: 1 1
-
-% Example Data
-%StartLogo
-%    StartLine
-%        (1) StartStack
-%            1.2 (C) ShowSymbol
-%            2.2 (I) ShowSymbol
-%            0.5 0.5 DrawErrorbar
-%        EndStack
-%        (2) StartStack
-%            0.5 (I) ShowSymbol
-%            0.9 (L) ShowSymbol
-%            1.0 (G) ShowSymbol
-%
-%            0.5 0.5 DrawErrorbar
-%        EndStack
-%        (234) StartStack
-%        EndStack
-%        (235) StartStack
-%        EndStack
-%    EndLine
-%EndLogo
-
-StartLogo
-
-StartLine
-() StartStack
- 1.000000 0.010108 (C) ShowSymbol
- 1.000000 0.040431 (G) ShowSymbol
- 1.000000 0.212261 (T) ShowSymbol
- 1.000000 0.232476 (A) ShowSymbol
- 0.232248 0.232248 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.015152 (C) ShowSymbol
- 1.000000 0.045455 (G) ShowSymbol
- 1.000000 0.136365 (T) ShowSymbol
- 1.000000 0.174245 (A) ShowSymbol
- 0.218101 0.218101 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.016616 (C) ShowSymbol
- 1.000000 0.037386 (G) ShowSymbol
- 1.000000 0.074773 (T) ShowSymbol
- 1.000000 0.074773 (A) ShowSymbol
- 0.169220 0.169220 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (A) ShowSymbol
- 1.000000 0.038953 (G) ShowSymbol
- 1.000000 0.155812 (C) ShowSymbol
- 1.000000 0.759583 (T) ShowSymbol
- 0.326656 0.326656 DrawErrorbar
-EndStack
-
-(5) StartStack
- 1.000000 0.019459 (C) ShowSymbol
- 1.000000 0.038917 (A) ShowSymbol
- 1.000000 0.116752 (T) ShowSymbol
- 1.000000 0.778345 (G) ShowSymbol
- 0.350333 0.350333 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.021020 (A) ShowSymbol
- 1.000000 0.168160 (C) ShowSymbol
- 1.000000 0.840802 (T) ShowSymbol
- 0.325915 0.325915 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (C) ShowSymbol
- 1.000000 0.083359 (T) ShowSymbol
- 1.000000 0.083359 (A) ShowSymbol
- 1.000000 0.854432 (G) ShowSymbol
- 0.347959 0.347959 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (T) ShowSymbol
- 1.000000 0.070036 (G) ShowSymbol
- 1.000000 0.070036 (C) ShowSymbol
- 1.000000 1.003846 (A) ShowSymbol
- 0.356819 0.356819 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.026714 (A) ShowSymbol
- 1.000000 0.040070 (G) ShowSymbol
- 1.000000 0.044523 (C) ShowSymbol
- 1.000000 0.106855 (T) ShowSymbol
- 0.196056 0.196056 DrawErrorbar
-EndStack
-
-(10) StartStack
- 1.000000 0.014496 (A) ShowSymbol
- 1.000000 0.016107 (G) ShowSymbol
- 1.000000 0.020939 (T) ShowSymbol
- 1.000000 0.027382 (C) ShowSymbol
- 0.078924 0.106593 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.018308 (G) ShowSymbol
- 1.000000 0.032954 (A) ShowSymbol
- 1.000000 0.054923 (C) ShowSymbol
- 1.000000 0.073231 (T) ShowSymbol
- 0.164679 0.164679 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.015253 (C) ShowSymbol
- 1.000000 0.021790 (T) ShowSymbol
- 1.000000 0.032685 (A) ShowSymbol
- 1.000000 0.037043 (G) ShowSymbol
- 0.106770 0.125094 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.022457 (C) ShowSymbol
- 1.000000 0.028072 (T) ShowSymbol
- 1.000000 0.028072 (A) ShowSymbol
- 1.000000 0.058950 (G) ShowSymbol
- 0.137551 0.153378 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.013621 (G) ShowSymbol
- 1.000000 0.021404 (C) ShowSymbol
- 1.000000 0.029188 (T) ShowSymbol
- 1.000000 0.031133 (A) ShowSymbol
- 0.095346 0.115803 DrawErrorbar
-EndStack
-
-(15) StartStack
- 1.000000 0.033669 (C) ShowSymbol
- 1.000000 0.067338 (A) ShowSymbol
- 1.000000 0.078561 (G) ShowSymbol
- 1.000000 0.370360 (T) ShowSymbol
- 0.303054 0.303054 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.056955 (T) ShowSymbol
- 1.000000 0.132896 (A) ShowSymbol
- 1.000000 0.740420 (C) ShowSymbol
- 0.331433 0.331433 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.014884 (C) ShowSymbol
- 1.000000 0.044653 (T) ShowSymbol
- 1.000000 0.148844 (G) ShowSymbol
- 1.000000 0.520953 (A) ShowSymbol
- 0.310748 0.310748 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.088853 (T) ShowSymbol
- 1.000000 0.126932 (A) ShowSymbol
- 1.000000 0.406183 (C) ShowSymbol
- 0.268423 0.268423 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.043760 (C) ShowSymbol
- 1.000000 0.065640 (T) ShowSymbol
- 1.000000 0.065640 (G) ShowSymbol
- 1.000000 0.361019 (A) ShowSymbol
- 0.304415 0.304415 DrawErrorbar
-EndStack
-
-(20) StartStack
- 1.000000 0.021502 (G) ShowSymbol
- 1.000000 0.027646 (C) ShowSymbol
- 1.000000 0.036861 (A) ShowSymbol
- 1.000000 0.064506 (T) ShowSymbol
- 0.150515 0.158545 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.000000 (G) ShowSymbol
- 1.000000 0.015671 (C) ShowSymbol
- 1.000000 0.282073 (A) ShowSymbol
- 1.000000 0.470122 (T) ShowSymbol
- 0.247172 0.247172 DrawErrorbar
-EndStack
-
-() StartStack
- 1.000000 0.022864 (G) ShowSymbol
- 1.000000 0.040011 (C) ShowSymbol
- 1.000000 0.080022 (A) ShowSymbol
- 1.000000 0.137181 (T) ShowSymbol
- 0.209363 0.209363 DrawErrorbar
-EndStack
-
-EndLine
-
-EndLogo
-
-
-%%EOF
-
Binary file tools/rgenetics/test.png has changed
--- a/tools/samtools/bam_to_sam.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,129 +0,0 @@
-#!/usr/bin/env python
-"""
-Converts BAM data to sorted SAM data.
-usage: bam_to_sam.py [options]
-   --input1: SAM file to be converted
-   --output1: output dataset in bam format
-"""
-
-import optparse, os, sys, subprocess, tempfile, shutil
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-#from galaxy import util
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' )
-    parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' )
-    parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' )
-    ( options, args ) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'Samtools %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Samtools version\n' )
-
-    tmp_dir = tempfile.mkdtemp()
-
-    try:
-        # exit if input file empty
-        if os.path.getsize( options.input1 ) == 0:
-            raise Exception, 'Initial BAM file empty'
-        # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command
-        # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted
-        # into memory ( controlled by option -m ).
-        tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name
-        tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name
-        tmp_sorted_aligns_file.close()
-        command = 'samtools sort %s %s' % ( options.input1, tmp_sorted_aligns_file_base )
-        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-        tmp_stderr = open( tmp, 'wb' )
-        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-        # exit if sorted BAM file empty
-        if os.path.getsize( tmp_sorted_aligns_file_name) == 0:
-            raise Exception, 'Intermediate sorted BAM file empty'
-    except Exception, e:
-        #clean up temp files
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-        stop_err( 'Error sorting alignments from (%s), %s' % ( options.input1, str( e ) ) )
-
-
-    try:
-        # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ).
-        if options.header:
-            view_options = "-h"
-        else:
-            view_options = ""
-        command = 'samtools view %s -o %s %s' % ( view_options, options.output1, tmp_sorted_aligns_file_name )
-        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-        tmp_stderr = open( tmp, 'wb' )
-        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        #clean up temp files
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-        stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) )
-    #clean up temp files
-    if os.path.exists( tmp_dir ):
-        shutil.rmtree( tmp_dir )
-    # check that there are results in the output file
-    if os.path.getsize( options.output1 ) > 0:
-        sys.stdout.write( 'BAM file converted to SAM' )
-    else:
-        stop_err( 'The output file is empty, there may be an error with your input file.' )
-
-if __name__=="__main__": __main__()
--- a/tools/samtools/bam_to_sam.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-<tool id="bam_to_sam" name="BAM-to-SAM" version="1.0.3">
-  <requirements>
-    <requirement type="package">samtools</requirement>
-  </requirements>
-  <description>converts BAM format to SAM format</description>
-  <command interpreter="python">
-    bam_to_sam.py
-      --input1=$input1
-      --output1=$output1
-      $header
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="bam" label="BAM File to Convert" />
-    <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" label="Include header in output" />
-  </inputs>
-  <outputs>
-    <data format="sam" name="output1" label="${tool.name} on ${on_string}: converted SAM" />
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      Bam-to-Sam command:
-      samtools view -o bam_to_sam_out1.sam test-data/bam_to_sam_in1.bam
-      bam_to_sam_in1.bam can be created from bam_to_sam_in1.sam
-      -->
-      <param name="input1" value="bam_to_sam_in1.bam" ftype="bam" />
-      <param name="header" value="" />
-      <output name="output1" file="bam_to_sam_out1.sam" sorted="True" />
-    </test>
-    <test>
-      <!--
-      Bam-to-Sam command:
-      samtools view -o bam_to_sam_out2.sam test-data/bam_to_sam_in2.bam
-      bam_to_sam_in2.bam can be created from bam_to_sam_in2.sam
-      -->
-      <param name="input1" value="bam_to_sam_in2.bam" ftype="bam" />
-      <param name="header" value="" />
-      <output name="output1" file="bam_to_sam_out2.sam" sorted="True" />
-    </test>
-    <test>
-      <!--
-      Bam-to-Sam command:
-      samtools view -h -o bam_to_sam_out3.sam test-data/bam_to_sam_in1.bam
-      bam_to_sam_in1.bam can be created from bam_to_sam_in1.sam
-      -->
-      <param name="input1" value="bam_to_sam_in1.bam" ftype="bam" />
-      <param name="header" value="--header" />
-      <output name="output1" file="bam_to_sam_out3.sam" sorted="True" lines_diff="6" /><!-- header param not working in func tests so won't produce correct 6-line header (fine in browser) -->
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool uses the SAMTools_ toolkit to produce a SAM file from a BAM file.
-
-.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
-
-  </help>
-</tool>
--- a/tools/samtools/pileup_interval.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Condenses pileup format into ranges of bases.
-
-usage: %prog [options]
-   -i, --input=i: Input pileup file
-   -o, --output=o: Output pileup
-   -c, --coverage=c: Coverage
-   -f, --format=f: Pileup format
-   -b, --base=b: Base to select
-   -s, --seq_column=s: Sequence column
-   -l, --loc_column=l: Base location column
-   -r, --base_column=r: Reference base column
-   -C, --cvrg_column=C: Coverage column
-"""
-
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-import sys
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    strout = ''
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    coverage = int(options.coverage)
-    fin = file(options.input, 'r')
-    fout = file(options.output, 'w')
-    inLine = fin.readline()
-    if options.format == 'six':
-        seqIndex = 0
-        locIndex = 1
-        baseIndex = 2
-        covIndex = 3
-    elif options.format == 'ten':
-        seqIndex = 0
-        locIndex = 1
-        if options.base == 'first':
-            baseIndex = 2
-        else:
-            baseIndex = 3
-        covIndex = 7
-    else:
-        seqIndex = int(options.seq_column) - 1
-        locIndex = int(options.loc_column) - 1
-        baseIndex = int(options.base_column) - 1
-        covIndex = int(options.cvrg_column) - 1
-    lastSeq = ''
-    lastLoc = -1
-    locs = []
-    startLoc = -1
-    bases = []
-    while inLine.strip() != '':
-        lineParts = inLine.split('\t')
-        try:
-            seq, loc, base, cov = lineParts[seqIndex], int(lineParts[locIndex]), lineParts[baseIndex], int(lineParts[covIndex])
-        except IndexError, ei:
-            if options.format == 'ten':
-                stop_err( 'It appears that you have selected 10 columns while your file has 6. Make sure that the number of columns you specify matches the number in your file.\n' + str( ei ) )
-            else:
-                stop_err( 'There appears to be something wrong with your column index values.\n' + str( ei ) )
-        except ValueError, ev:
-            if options.format == 'six':
-                stop_err( 'It appears that you have selected 6 columns while your file has 10. Make sure that the number of columns you specify matches the number in your file.\n' + str( ev ) )
-            else:
-                stop_err( 'There appears to be something wrong with your column index values.\n' + str( ev ) )
-#        strout += str(startLoc) + '\n'
-#        strout += str(bases) + '\n'
-#        strout += '%s\t%s\t%s\t%s\n' % (seq, loc, base, cov)
-        if loc == lastLoc+1 or lastLoc == -1:
-            if cov >= coverage:
-                if seq == lastSeq or lastSeq == '':
-                    if startLoc == -1:
-                        startLoc = loc
-                    locs.append(loc)
-                    bases.append(base)
-                else:
-                    if len(bases) > 0:
-                        fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
-                    startLoc = loc
-                    locs = [loc]
-                    bases = [base]
-            else:
-                if len(bases) > 0:
-                    fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
-                startLoc = -1
-                locs = []
-                bases = []
-        else:
-            if len(bases) > 0:
-                fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
-            if cov >= coverage:
-                startLoc = loc
-                locs = [loc]
-                bases = [base]
-            else:
-                startLoc = -1
-                locs = []
-                bases = []
-        lastSeq = seq
-        lastLoc = loc
-        inLine = fin.readline()
-    if len(bases) > 0:
-        fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
-    fout.close()
-    fin.close()
-
-#    import sys
-#    strout += file(fout.name,'r').read()
-#    sys.stderr.write(strout)
-
-if __name__ == "__main__" : __main__()
--- a/tools/samtools/pileup_interval.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,189 +0,0 @@
-<tool id="pileup_interval" name="Pileup-to-Interval" version="1.0.0">
-  <description>condenses pileup format into ranges of bases</description>
-  <requirements>
-    <requirement type="package">samtools</requirement>
-  </requirements>
-  <command interpreter="python">
-    pileup_interval.py
-      --input=$input
-      --output=$output
-      --coverage=$coverage
-      --format=$format_type.format
-      #if $format_type.format == "ten":
-       --base=$format_type.which_base
-       --seq_column="None"
-       --loc_column="None"
-       --base_column="None"
-       --cvrg_column="None"
-      #elif $format_type.format == "manual":
-       --base="None"
-       --seq_column=$format_type.seq_column
-       --loc_column=$format_type.loc_column
-       --base_column=$format_type.base_column
-       --cvrg_column=$format_type.cvrg_column
-      #else:
-       --base="None"
-       --seq_column="None"
-       --loc_column="None"
-       --base_column="None"
-       --cvrg_column="None"
-      #end if
-  </command>
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Choose a pileup file to condense:" />
-    <conditional name="format_type">
-      <param name="format" type="select" label="which contains:" help="See &quot;Types of pileup datasets&quot; below for examples">
-        <option value="six" selected="true">Pileup with six columns (simple)</option>
-        <option value="ten">Pileup with ten columns (with consensus)</option>
-        <option value="manual">Set columns manually</option>
-      </param>
-      <when value="six" />
-      <when value="ten">
-        <param name="which_base" type="select" label="Which base do you want to concatenate">
-          <option value="first" selected="true">Reference base (first)</option>
-          <option value="second">Consensus base (second)</option>
-        </param>
-      </when>
-      <when value="manual">
-        <param name="seq_column" label="Select column with sequence name" type="data_column" numerical="false" data_ref="input" />
-        <param name="loc_column" label="Select column with base location" type="data_column" numerical="false" data_ref="input" />
-        <param name="base_column" label="Select column with base to concatenate" type="data_column" numerical="false" data_ref="input" />
-        <param name="cvrg_column" label="Select column with coverage" type="data_column" numerical="true" data_ref="input" />
-      </when>
-    </conditional>
-    <param name="coverage" type="integer" value="3" label="Do not report bases with coverage less than:" />
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="pileup_interval_in1.tabular" />
-      <param name="format" value="six" />
-      <param name="coverage" value="3" />
-      <output name="output" file="pileup_interval_out1.tabular" />
-    </test>
-    <test>
-      <param name="input" value="pileup_interval_in2.tabular" />
-      <param name="format" value="ten" />
-      <param name="which_base" value="first" />
-      <param name="coverage" value="3" />
-      <output name="output" file="pileup_interval_out2.tabular" />
-    </test>
-    <test>
-      <param name="input" value="pileup_interval_in2.tabular" />
-      <param name="format" value="manual" />
-      <param name="seq_column" value="1" />
-      <param name="loc_column" value="2" />
-      <param name="base_column" value="3" />
-      <param name="cvrg_column" value="8" />
-      <param name="coverage" value="3" />
-      <output name="output" file="pileup_interval_out2.tabular" />
-    </test>
-  </tests>
-  <help>
-
-**What is does**
-
-Reduces the size of a results set by taking a pileup file and producing a condensed version showing consecutive sequences of bases meeting coverage criteria. The tool works on six and ten column pileup formats produced with *samtools pileup* command. You also can specify columns for the input file manually. The tool assumes that the pileup dataset was produced by *samtools pileup* command (although you can override this by setting column assignments manually).
-
---------
-
-**Types of pileup datasets**
-
-The description of pileup format below is largely based on information that can be found on SAMTools_ documentation page. The 6- and 10-column variants are described below.
-
-.. _SAMTools: http://samtools.sourceforge.net/pileup.shtml
-
-**Six column pileup**::
-
-    1    2  3  4        5        6
- ---------------------------------
- chrM  412  A  2       .,       II
- chrM  413  G  4     ..t,     IIIH
- chrM  414  C  4     ...a     III2
- chrM  415  C  4     TTTt     III7
-
-where::
-
- Column Definition
- ------ ----------------------------
-      1 Chromosome
-      2 Position (1-based)
-      3 Reference base at that position
-      4 Coverage (# reads aligning over that position)
-      5 Bases within reads where (see Galaxy wiki for more info)
-      6 Quality values (phred33 scale, see Galaxy wiki for more)
-
-**Ten column pileup**
-
-The `ten-column`__ pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::
-
-
-    1    2  3  4   5   6   7   8       9       10
- ------------------------------------------------
- chrM  412  A  A  75   0  25  2       .,       II
- chrM  413  G  G  72   0  25  4     ..t,     IIIH
- chrM  414  C  C  75   0  25  4     ...a     III2
- chrM  415  C  T  75  75  25  4     TTTt     III7
-
-where::
-
-  Column Definition
- ------- ----------------------------
-       1 Chromosome
-       2 Position (1-based)
-       3 Reference base at that position
-       4 Consensus bases
-       5 Consensus quality
-       6 SNP quality
-       7 Maximum mapping quality
-       8 Coverage (# reads aligning over that position)
-       9 Bases within reads where (see Galaxy wiki for more info)
-      10 Quality values (phred33 scale, see Galaxy wiki for more)
-
-
-.. __: http://samtools.sourceforge.net/cns0.shtml
-
-------
-
-**The output format**
-
-The output file condenses the information in the pileup file so that consecutive bases are listed together as sequences. The starting and ending points of the sequence range are listed, with the starting value converted to a 0-based value.
-
-Given the following input with minimum coverage set to 3::
-
-    1    2  3  4        5        6
- ---------------------------------
- chr1  112  G  3     ..Ta     III6
- chr1  113  T  2     aT..     III5
- chr1  114  A  5     ,,..     IIH2
- chr1  115  C  4      ,.,      III
- chrM  412  A  2       .,       II
- chrM  413  G  4     ..t,     IIIH
- chrM  414  C  4     ...a     III2
- chrM  415  C  4     TTTt     III7
- chrM  490  T  3        a        I
-
-the following would be the output::
-
-    1    2    3  4
- -------------------
- chr1  111  112  G
- chr1  113  115  AC
- chrM  412  415  GCC
- chrM  489  490  T
-
-where::
-
-  Column Definition
- ------- ----------------------------
-       1 Chromosome
-       2 Starting position (0-based)
-       3 Ending position (1-based)
-       4 Sequence of bases
-
-  </help>
-</tool>
-
-
--- a/tools/samtools/pileup_parser.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-use POSIX;
-
-
-die "Usage: pileup_parser.pl <in_file> <ref_base_column> <read_bases_column> <base_quality_column> <coverage column> <qv cutoff> <coverage cutoff> <SNPs only?> <output bed?> <coord_column> <out_file> <total_diff> <print_qual_bases>\n" unless @ARGV == 13;
-
-my $in_file = $ARGV[0];
-my $ref_base_column = $ARGV[1]-1; # 1 based
-my $read_bases_column = $ARGV[2]-1; # 1 based
-my $base_quality_column = $ARGV[3]-1; # 1 based
-my $cvrg_column = $ARGV[4]-1; # 1 based
-my $quality_cutoff = $ARGV[5]; # phred scale integer
-my $cvrg_cutoff = $ARGV[6]; # unsigned integer
-my $SNPs_only = $ARGV[7]; # set to "Yes" to print only positions with SNPs; set to "No" to pring everything
-my $bed = $ARGV[8]; #set to "Yes" to convert coordinates to bed format (0-based start, 1-based end); set to "No" to leave as is
-my $coord_column = $ARGV[9]-1; #1 based
-my $out_file = $ARGV[10];
-my $total_diff = $ARGV[11]; # set to "Yes" to print total number of deviant based
-my $print_qual_bases = $ARGV[12]; #set to "Yes" to print quality and read base columns
-
-my $invalid_line_counter = 0;
-my $first_skipped_line = "";
-my %SNPs = ('A',0,'T',0,'C',0,'G',0);
-my $above_qv_bases = 0;
-my $SNPs_exist = 0;
-my $out_string = "";
-my $diff_count = 0;
-
-open (IN, "<$in_file") or die "Cannot open $in_file $!\n";
-open (OUT, ">$out_file") or die "Cannot open $out_file $!\n";
-
-while (<IN>) {
-	chop;
-	next if m/^\#/;
-	my @fields = split /\t/;
-	next if $fields[ $ref_base_column ] eq "*"; # skip indel lines
- 	my $read_bases   = $fields[ $read_bases_column ];
- 	die "Coverage column" . ($cvrg_column+1) . " contains non-numeric values. Check your input parameters as well as format of input dataset." if ( not isdigit $fields[ $cvrg_column ] );
-    next if $fields[ $cvrg_column ] < $cvrg_cutoff;
-	my $base_quality = $fields[ $base_quality_column ];
-	if ($read_bases =~ m/[\$\^\+-]/) {
-		$read_bases =~ s/\^.//g; #removing the start of the read segement mark
-		$read_bases =~ s/\$//g; #removing end of the read segment mark
-		while ($read_bases =~ m/[\+-]{1}(\d+)/g) {
-			my $indel_len = $1;
-			$read_bases =~ s/[\+-]{1}$indel_len.{$indel_len}//; # remove indel info from read base field
-		}
-	}
-	if ( length($read_bases) != length($base_quality) ) {
-        $first_skipped_line = $. if $first_skipped_line eq "";
-        ++$invalid_line_counter;
-        next;
-	}
-	# after removing read block and indel data the length of read_base
-	# field should identical to the length of base_quality field
-
-	my @bases = split //, $read_bases;
-	my @qv    = split //, $base_quality;
-
-	for my $base ( 0 .. @bases - 1 ) {
-		if ( ord( $qv[ $base ] ) - 33 >= $quality_cutoff and $bases[ $base ] ne '*')
-		{
-			++$above_qv_bases;
-
-			if ( $bases[ $base ] =~ m/[ATGC]/i )
-			{
-				$SNPs_exist = 1;
-				$SNPs{ uc( $bases[ $base ] ) } += 1;
-				$diff_count += 1;
-			} elsif ( $bases[ $base ] =~ m/[\.,]/ ) {
-			    $SNPs{ uc( $fields[ $ref_base_column ] ) } += 1;
-		    }
-		}
-	}
-
-	if ($bed eq "Yes") {
-	       my $start = $fields[ $coord_column ] - 1;
-	       my $end   = $fields[ $coord_column ];
-	       $fields[ $coord_column ] = "$start\t$end";
-	}
-
-	if ($print_qual_bases ne "Yes") {
-	       $fields[ $base_quality_column ] = "";
-	       $fields[ $read_bases_column ] = "";
-	}
-
-
-	$out_string = join("\t", @fields); # \t$read_bases\t$base_quality";
-	foreach my $SNP (sort keys %SNPs) {
-			$out_string .= "\t$SNPs{$SNP}";
-	}
-
-	if ($total_diff eq "Yes") {
-	   $out_string .= "\t$above_qv_bases\t$diff_count\n";
-	} else {
-	   $out_string .= "\t$above_qv_bases\n";
-	}
-
-	$out_string =~ s/\t+/\t/g;
-
-	if ( $SNPs_only eq "Yes" ) {
-		print OUT $out_string if $SNPs_exist == 1;
-	} else {
-		print OUT $out_string;
-	}
-
-
-	%SNPs = ();
-	%SNPs = ('A',0,'T',0,'C',0,'G',0);
-	$above_qv_bases = 0;
-	$SNPs_exist = 0;
-	$diff_count = 0;
-
-
-}
-
-print "Skipped $invalid_line_counter invalid line(s) beginning with line $first_skipped_line\n" if $invalid_line_counter > 0;
-close IN;
-close OUT;
--- a/tools/samtools/pileup_parser.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,387 +0,0 @@
-<tool id="pileup_parser" name="Filter pileup" version="1.0.2">>
-  <description>on coverage and SNPs</description>
-  <command interpreter="perl">
-    #if   $pileup_type.type_select == "six"    #pileup_parser.pl $input "3" "5" "6" "4" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base
-    #elif $pileup_type.type_select == "ten"    #pileup_parser.pl $input "3" "9" "10" "8" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base
-    #elif $pileup_type.type_select == "manual" #pileup_parser.pl $input $pileup_type.ref_base_column $pileup_type.read_bases_column $pileup_type.read_qv_column $pileup_type.cvrg_column $qv_cutoff $cvrg_cutoff $snps_only $interval $pileup_type.coord_column $out_file1 $diff $qc_base
-    #end if#
-  </command>
-  <inputs>
-    <param name="input" type="data" format="tabular" label="Select dataset"/>
-    <conditional name="pileup_type">
-        <param name="type_select" type="select" label="which contains" help="See &quot;Types of pileup datasets&quot; below for examples">
-            <option value="six" selected="true">Pileup with six columns (simple)</option>
-            <option value="ten">Pileup with ten columns (with consensus)</option>
-            <option value="manual">Set columns manually</option>
-        </param>
-        <when value="manual">
-            <param name="ref_base_column" label="Select column with reference base" type="data_column" numerical="false" data_ref="input" />
-            <param name="read_bases_column" label="Select column with read bases" type="data_column" numerical="false" data_ref="input" help="something like this: ..,a.."/>
-            <param name="read_qv_column" label="Select column with base qualities" type="data_column" numerical="false" data_ref="input" help="something like this: IIIGIAI"/>
-            <param name="cvrg_column" label="Select column with coverage" type="data_column" numerical="true" data_ref="input" />
-            <param name="coord_column" label="Select coordinate column" type="data_column" numerical="true" data_ref="input" />
-        </when>
-        <when value="six">
-        </when>
-        <when value="ten">
-        </when>
-    </conditional>
-    <param name="qv_cutoff" label="Do not consider read bases with quality lower than" type="integer" value="20" help="No variants with quality below this value will be reported"/>
-    <param name="cvrg_cutoff" label="Do not report positions with coverage lower than" type="integer" value="3" help="Pileup lines with coverage lower than this value will be skipped"/>
-    <param name="snps_only" label="Only report variants?" type="select" help="See &quot;Examples 1 and 2&quot; below for explanation">
-        <option value="No">No</option>
-        <option value="Yes" selected="true">Yes</option>
-    </param>
-    <param name="interval" label="Convert coordinates to intervals?" type="select" help="See &quot;Output format&quot; below for explanation">
-        <option value="No" selected="true">No</option>
-        <option value="Yes">Yes</option>
-    </param>
-    <param name="diff" label="Print total number of differences?" type="select" help="See &quot;Example 3&quot; below for explanation">
-        <option value="No" selected="true">No</option>
-        <option value="Yes">Yes</option>
-    </param>
-    <param name="qc_base" label="Print quality and base string?" type="select" help="See &quot;Example 4&quot; below for explanation">
-        <option value="No">No</option>
-        <option value="Yes" selected="true">Yes</option>
-    </param>
-
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1">
-        <change_format>
-            <when input="interval" value="Yes" format="interval" />
-        </change_format>
-   </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="pileup_parser.6col.pileup"/>
-      <output name="out_file1" file="pileup_parser.6col.20-3-yes-yes.pileup.out"/>
-      <param name="type_select" value="six"/>
-      <param name="qv_cutoff" value="20" />
-      <param name="cvrg_cutoff" value="3" />
-      <param name="snps_only" value="Yes"/>
-      <param name="interval" value="Yes" />
-      <param name="diff" value="No" />
-      <param name="qc_base" value="Yes" />
-    </test>
-    <test>
-      <param name="input" value="pileup_parser.6col.pileup"/>
-      <output name="out_file1" file="pileup_parser.6col.20-3-yes-no.pileup.out"/>
-      <param name="type_select" value="six"/>
-      <param name="qv_cutoff" value="20" />
-      <param name="cvrg_cutoff" value="3" />
-      <param name="snps_only" value="Yes"/>
-      <param name="interval" value="No" />
-       <param name="diff" value="No" />
-      <param name="qc_base" value="Yes" />
-    </test>
-    <test>
-      <param name="input" value="pileup_parser.6col.pileup"/>
-      <output name="out_file1" file="pileup_parser.6col.20-3-no-no.pileup.out"/>
-      <param name="type_select" value="six"/>
-      <param name="qv_cutoff" value="20" />
-      <param name="cvrg_cutoff" value="3" />
-      <param name="snps_only" value="No"/>
-      <param name="interval" value="No" />
-       <param name="diff" value="No" />
-      <param name="qc_base" value="Yes" />
-    </test>
-    <test>
-      <param name="input" value="pileup_parser.10col.pileup"/>
-      <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes.pileup.out"/>
-      <param name="type_select" value="ten"/>
-      <param name="qv_cutoff" value="20" />
-      <param name="cvrg_cutoff" value="3" />
-      <param name="snps_only" value="Yes"/>q
-      <param name="interval" value="Yes" />
-       <param name="diff" value="No" />
-      <param name="qc_base" value="Yes" />
-    </test>
-    <test>
-      <param name="input" value="pileup_parser.10col.pileup"/>
-      <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes.pileup.out"/>
-      <param name="type_select" value="manual"/>
-      <param name="ref_base_column" value="3"/>
-      <param name="read_bases_column" value="9"/>
-      <param name="read_qv_column" value="10"/>
-      <param name="cvrg_column" value="8"/>
-      <param name="coord_column" value="2"/>
-      <param name="qv_cutoff" value="20" />
-      <param name="cvrg_cutoff" value="3" />
-      <param name="snps_only" value="Yes"/>
-      <param name="interval" value="Yes" />
-       <param name="diff" value="No" />
-      <param name="qc_base" value="Yes" />
-    </test>
-        <test>
-      <param name="input" value="pileup_parser.10col.pileup"/>
-      <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-yes.pileup.out"/>
-      <param name="type_select" value="manual"/>
-      <param name="ref_base_column" value="3"/>
-      <param name="read_bases_column" value="9"/>
-      <param name="read_qv_column" value="10"/>
-      <param name="cvrg_column" value="8"/>
-      <param name="coord_column" value="2"/>
-      <param name="qv_cutoff" value="20" />
-      <param name="cvrg_cutoff" value="3" />
-      <param name="snps_only" value="Yes"/>
-      <param name="interval" value="Yes" />
-       <param name="diff" value="Yes" />
-      <param name="qc_base" value="Yes" />
-    </test>
-    <test>
-      <param name="input" value="pileup_parser.10col.pileup"/>
-      <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-no.pileup.out"/>
-      <param name="type_select" value="manual"/>
-      <param name="ref_base_column" value="3"/>
-      <param name="read_bases_column" value="9"/>
-      <param name="read_qv_column" value="10"/>
-      <param name="cvrg_column" value="8"/>
-      <param name="coord_column" value="2"/>
-      <param name="qv_cutoff" value="20" />
-      <param name="cvrg_cutoff" value="3" />
-      <param name="snps_only" value="Yes"/>
-      <param name="interval" value="Yes" />
-       <param name="diff" value="Yes" />
-      <param name="qc_base" value="No" />
-    </test>
-
-
- </tests>
-<help>
-
-**What it does**
-
-Allows one to find sequence variants and/or sites covered by a specified number of reads with bases above a set quality threshold. The tool works on six and ten column pileup formats produced with *samtools pileup* command. However, it also allows you to specify columns in the input file manually. The tool assumes the following:
-
-- the quality scores follow phred33 convention, where input qualities are ASCII characters equal to the Phred quality plus 33.
-- the pileup dataset was produced by the *samtools pileup* command (although you can override this by setting column assignments manually).
-
---------
-
-**Types of pileup datasets**
-
-The descriptions of the following pileup formats are largely based on information that can be found on the SAMTools_ documentation page. The 6- and 10-column variants are described below.
-
-.. _SAMTools: http://samtools.sourceforge.net/pileup.shtml
-
-**Six column pileup**::
-
-    1    2  3  4        5        6
- ---------------------------------
- chrM  412  A  2       .,       II
- chrM  413  G  4     ..t,     IIIH
- chrM  414  C  4     ..Ta     III2
- chrM  415  C  4     TTTt     III7
-
-where::
-
-  Column Definition
- ------- ----------------------------
-       1 Chromosome
-       2 Position (1-based)
-       3 Reference base at that position
-       4 Coverage (# reads aligning over that position)
-       5 Bases within reads
-       6 Quality values (phred33 scale, see Galaxy wiki for more)
-
-**Ten column pileup**
-
-The `ten-column`__ pileup incorporates additional consensus information generated with the *-c* option of the *samtools pileup* command::
-
-
-    1    2  3  4   5   6   7   8       9       10
- ------------------------------------------------
- chrM  412  A  A  75   0  25  2       .,       II
- chrM  413  G  G  72   0  25  4     ..t,     IIIH
- chrM  414  C  C  75   0  25  4     ..Ta     III2
- chrM  415  C  T  75  75  25  4     TTTt     III7
-
-where::
-
-  Column Definition
- ------- ----------------------------
-       1 Chromosome
-       2 Position (1-based)
-       3 Reference base at that position
-       4 Consensus bases
-       5 Consensus quality
-       6 SNP quality
-       7 Maximum mapping quality
-       8 Coverage (# reads aligning over that position)
-       9 Bases within reads
-      10 Quality values (phred33 scale, see Galaxy wiki for more)
-
-
-.. __: http://samtools.sourceforge.net/cns0.shtml
-
-------
-
-**The output format**
-
-The tool modifies the input dataset in two ways:
-
-1. It appends five columns to the end of every reported line:
-
-- Number of **A** variants
-- Number of **C** variants
-- Number of **G** variants
-- Number of **T** variants
-- Number of read bases covering this position, where quality is equal to or higher than the value set by **Do not consider read bases with  quality lower than** option.
-
-Optionally, if **Print total number of differences?** is set to **Yes**, the tool will append the sixth column with the total number of deviants (see below).
-
-2. If **Convert coordinates to intervals?** is set to **Yes**, the tool replaces the position column (typically the second column) with a pair of tab-delimited start/end values.
-
-For example, if you are calling variants with base quality above 20 on this dataset::
-
- chrM  412  A  2       .,       II
- chrM  413  G  4     ..t,     III2
- chrM  414  C  4     ..Ta     III2
- chrM  415  C  4     TTTt     III7
-
-you will get::
-
- chrM  413  G  4  ..t,  IIIH  0  0  2  1  3
- chrM  414  C  4  ..Ta  III2  1  1  0  1  3
- chrM  415  C  4  TTTt  III7  0  0  0  4  4
-
-where::
-
-  Column Definition
- ------- ----------------------------
-       1 Chromosome
-       2 Position (1-based)
-       3 Reference base at that position
-       4 Coverage (# reads aligning over that position)
-       5 Bases within reads where
-       6 Quality values (phred33 scale, see Galaxy wiki for more)
-       7 Number of A variants
-       8 Number of C variants
-       9 Number of G variants
-      10 Number of T variants
-      11 Quality adjusted coverage:
-      12 Number of read bases (i.e., # of reads) with quality above the set threshold
-      13 Total number of deviants (if Convert coordinates to intervals? is set to yes)
-
-if **Print total number of differences?** is set to **Yes**, you will get::
-
- chrM  413  G  4  ..t,  IIIH  0  0  2  1  3  1
- chrM  414  C  4  ..Ta  III2  1  2  0  1  3  2
- chrM  415  C  4  TTTt  III7  0  0  0  4  4  0
-
-Note the additional column 13, that contains the number of deviant reads (e.g., there are two deviants, T and a, for position 414).
-
-
-Finally, if **Convert coordinates to intervals?** is set to **Yes**, you will get one additional column with the end coordinate::
-
- chrM  412 413  G  4  ..t,  III2  0  0  2  1  3
- chrM  414 415  C  4  ..Ta  III2  1  2  0  1  3
- chrM  414 415  C  4  TTTt  III7  0  0  0  4  4
-
-where::
-
-  Column Definition
- ------- ----------------------------
-       1 Chromosome
-       2 Start position (0-based)
-       3 End position (1-based)
-       4 Reference base at that position
-       5 Coverage (# reads aligning over that position)
-       6 Bases within reads
-       7 Quality values (phred33 scale, see Galaxy wiki for more)
-       8 Number of A variants
-       9 Number of C variants
-      10 Number of G variants
-      11 Number of T variants
-      12 Quality adjusted coverage
-      13 Total number of deviants (if Convert coordinates to intervals? is set to yes)
-
-
-Note that in this case the coordinates of SNPs were converted to intervals, where the start coordinate is 0-based and the end coordinate in 1-based using the UCSC Table Browser convention.
-
-Although three positions have variants in the original file (413, 414, and 415), only 413 and 415 are reported because the quality values associated with these two SNPs are above the threshold of 20. In the case of 414 the **a** allele has a quality value of 17 ( ord("2")-33 ), and is therefore not reported. Note that five columns have been added to each of the reported lines::
-
-  chrM  413  G  4  ..t,  IIIH  0  0  2  1  3
-
-Here, there is one variant, **t**. Because the fourth column represents **T** counts, it is incremented by 1. The last column shows that at this position, three reads have bases above the quality threshold of 20.
-
------
-
-**Example 1**: Just variants
-
-In this mode, the tool only outputs the lines from the input datasets where at least one read contains a sequence variant with quality above the threshold set by the **Do not consider read bases with quality lower than** option. For example, suppose one has a pileup dataset like the following::
-
- chrM  412  A  2       .,       II
- chrM  413  G  4     ..t,     III2
- chrM  414  C  4     ..Ta     III2
- chrM  415  C  4     TTTt     III7
-
-To call all variants (with no restriction by coverage) with quality above phred value of 20, we will need to set the parameters as follows:
-
-.. image:: ./static/images/pileup_parser_help1.png
-
-Running the tool with these parameters will return::
-
- chrM  413  G  4  ..t,  IIIH  0  0  0  1  3
- chrM  414  C  4  ..Ta  III2  0  2  0  1  3
- chrM  415  C  4  TTTt  III7  0  0  0  4  4
-
-**Note** that position 414 is not reported because the *a* variant has associated quality value of 17 (because ord('2')-33 = 17) and is below the phred threshold of 20 set by the **Count variants with quality above this value** parameter.
-
------
-
-**Example 2**: Report everything
-
-In addition to calling variants, it is often useful to know the quality adjusted coverage. Running the tool with these parameters:
-
-.. image:: ./static/images/pileup_parser_help2.png
-
-will report everything from the original file::
-
- chrM  412  A  2  .,    II    2  0  0  0  2
- chrM  413  G  4  ..t,  III2  0  0  2  1  3
- chrM  414  C  4  ..Ta  III2  0  2  0  1  3
- chrM  415  C  4  TTTt  III7  0  0  0  4  4
-
-Here, you can see that although the total coverage at position 414 is 4 (column 4), the quality adjusted coverage is 3 (last column). This is because only three out of four reads have bases with quality above the set threshold of 20 (the actual qualities are III2 or, after conversion,  40, 40, 40, 17).
-
-One can use the last column of this dataset to filter out (using Galaxy's **Filter** tool) positions where quality adjusted coverage (last column) is below a set threshold.
-
-------
-
-**Example 3**: Report everything and print total number of differences
-
-If you set the **Print total number of differences?** to **Yes** the tool will print an additional column with the total number of reads where a devinat base is above the quality threshold. So, seetiing parametrs like this:
-
-.. image:: ./static/images/pileup_parser_help3.png
-
-will produce this::
-
- chrM  412  A  2  .,    II    2  0  0  0  2  0
- chrM  413  G  4  ..t,  III2  0  0  2  1  3  1
- chrM  414  C  4  ..Ta  III2  0  2  0  1  3  1
- chrM  415  C  4  TTTt  III7  0  0  0  4  4  0
-
-
------
-
-**Example 4**: Report everything, print total number of differences, and ignore qualities and read bases
-
-Setting **Print quality and base string?** to **Yes** as shown here:
-
-.. image:: ./static/images/pileup_parser_help4.png
-
-will produce this::
-
- chrM  412  A  2  2  0  0  0  2  0
- chrM  413  G  4  0  0  2  1  3  1
- chrM  414  C  4  0  2  0  1  3  1
- chrM  415  C  4  0  0  0  4  4  0
-
-
-
-
-</help>
-</tool>
--- a/tools/samtools/sam2interval.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import optparse
-import re
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    usage = """%prog [options]
-
-options (listed below) default to 'None' if omitted
-    """
-    parser = optparse.OptionParser(usage=usage)
-
-    parser.add_option(
-        '-f','--input_sam_file',
-        metavar="INPUT_SAM_FILE",
-        dest='input_sam',
-        default = False,
-        help='Name of the SAM file to be filtered. STDIN is default')
-
-    parser.add_option(
-        '-c','--flag_column',
-        dest='flag_col',
-        default = '2',
-        help='Column containing SAM bitwise flag. 1-based')
-
-    parser.add_option(
-        '-s','--start_column',
-        dest='start_col',
-        default = '4',
-        help='Column containing position. 1-based')
-
-    parser.add_option(
-        '-g','--cigar_column',
-        dest='cigar_col',
-        default = '6',
-        help='Column containing CIGAR or extended CIGAR string')
-
-    parser.add_option(
-        '-r','--ref_column',
-        dest='ref_col',
-        default = '3',
-        help='Column containing name of the reference sequence coordinate. 1-based')
-
-    parser.add_option(
-        '-e','--read_column',
-        dest='read_col',
-        default = '1',
-        help='Column containing read name. 1-based')
-
-    parser.add_option(
-        '-p','--print_all',
-        dest='prt_all',
-        action='store_true',
-        default = False,
-        help='Print coordinates and original SAM?')
-
-    options, args = parser.parse_args()
-
-    if options.input_sam:
-        infile = open ( options.input_sam, 'r')
-    else:
-        infile = sys.stdin
-
-    cigar = re.compile( '\d+M|\d+N|\d+D|\d+P' )
-
-    print '#chrom\tstart\tend\tstrand\tread_name' # provide a (partial) header so that strand is automatically set in metadata
-
-    for line in infile:
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
-            fields = line.split( '\t' )
-            start = int( fields[ int( options.start_col ) - 1 ] ) - 1
-            end = 0
-            for op in cigar.findall( fields[ int( options.cigar_col) - 1 ] ):
-                end += int( op[ 0:len( op ) - 1 ] )
-
-            strand = '+'
-            if bool( int( fields[ int( options.flag_col ) - 1 ] ) & 0x0010 ):
-                strand = '-'
-            read_name = fields[ int( options.read_col ) - 1 ]
-            ref_name  = fields[ int( options.ref_col ) - 1 ]
-
-            if ref_name != '*':
-                # Do not print lines with unmapped reads that contain '*' instead of chromosome name
-                if options.prt_all:
-                    print '%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end+start), strand, line)
-                else:
-                    print '%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end+start), strand, read_name)
-
-if __name__ == "__main__": main()
-
--- a/tools/samtools/sam2interval.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="sam2interval" name="Convert SAM" version="1.0.1">
-  <description>to interval</description>
-  <command interpreter="python">sam2interval.py --input_sam_file=$input1 $print_all > $out_file1
-  </command>
-  <inputs>
-    <param format="sam" name="input1" type="data" label="Select dataset to convert"/>
-    <param name="print_all" type="select" label="Print all?" help="Do you want to retain original SAM fields? See example below.">
-        <option value="-p">Yes</option>
-        <option value="">No</option>
-    </param>
-  </inputs>
- <outputs>
-    <data format="interval" name="out_file1" label="Converted Interval" />
-  </outputs>
-<tests>
-    <test>
-        <param name="input1" value="sam_bioinf_example.sam" ftype="sam"/>
-        <param name="print_all" value="Yes"/>
-        <output name="out_file1" file="sam2interval_printAll.dat" ftype="interval"/>
-    </test>
-    <test>
-        <param name="input1" value="sam_bioinf_example.sam" ftype="sam"/>
-        <param name="print_all" value="No"/>
-        <output name="out_file1" file="sam2interval_noprintAll.dat" ftype="interval"/>
-    </test>
-    <test>
-        <param name="input1" value="sam2interval-test3.sam" ftype="sam"/>
-        <param name="print_all" value="No"/>
-        <output name="out_file1" file="sam2interval_with_unmapped_reads_noprintAll.dat" ftype="interval"/>
-    </test>
-
-</tests>
-  <help>
-
-**What it does**
-
-Converts positional information from a SAM dataset into interval format with 0-based start and 1-based end. CIGAR string of SAM format is used to compute the end coordinate.
-
------
-
-**Example**
-
-Converting the following dataset::
-
- r001 163 ref  7 30 8M2I4M1D3M = 37  39 TTAGATAAAGGATACTA *
- r002   0 ref  9 30 3S6M1P1I4M *  0   0 AAAAGATAAGGATA    *
- r003   0 ref  9 30       5H6M *  0   0 AGCTAA            * NM:i:1
- r004   0 ref 16 30    6M14N5M *  0   0 ATAGCTTCAGC       *
- r003  16 ref 29 30       6H5M *  0   0 TAGGC             * NM:i:0
- r001  83 ref 37 30         9M =  7 -39 CAGCGCCAT         *
-
-into Interval format will produce the following if *Print all?* is set to **Yes**::
-
- ref  6 22 + r001 163 ref  7 30 8M2I4M1D3M = 37  39 TTAGATAAAGGATACTA *
- ref  8 19 + r002   0 ref  9 30 3S6M1P1I4M *  0   0 AAAAGATAAGGATA    *
- ref  8 14 + r003   0 ref  9 30 5H6M       *  0   0 AGCTAA            * NM:i:1
- ref 15 40 + r004   0 ref 16 30 6M14N5M    *  0   0 ATAGCTTCAGC       *
- ref 28 33 - r003  16 ref 29 30 6H5M       *  0   0 TAGGC             * NM:i:0
- ref 36 45 - r001  83 ref 37 30 9M         =  7 -39 CAGCGCCAT         *
-
-Setting  *Print all?* to **No** will generate the following::
-
- ref  6 22 + r001
- ref  8 19 + r002
- ref  8 14 + r003
- ref 15 40 + r004
- ref 28 33 - r003
- ref 36 45 - r001
-
-
-  </help>
-</tool>
--- a/tools/samtools/sam_bitwise_flag_filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-# Refactored on 11/13/2010 by Kanwei Li
-
-import sys
-import optparse
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    usage = """%prog [options]
-
-options (listed below) default to 'None' if omitted
-    """
-    parser = optparse.OptionParser(usage=usage)
-
-    parser.add_option(
-        '--0x0001','--is_paired',
-        choices = ( '0','1' ),
-        dest='is_paired',
-        metavar="<0|1>",
-        help='The read is paired in sequencing')
-
-    parser.add_option(
-        '--0x0002','--is_proper_pair',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='is_proper_pair',
-        help='The read is mapped in a proper pair')
-
-    parser.add_option(
-        '--0x0004','--is_unmapped',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='is_unmapped',
-        help='The query sequence itself is unmapped')
-
-    parser.add_option(
-        '--0x0008','--mate_is_unmapped',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='mate_is_unmapped',
-        help='The mate is unmapped')
-
-    parser.add_option(
-        '--0x0010','--query_strand',
-        dest='query_strand',
-        metavar="<0|1>",
-        choices = ( '0','1' ),
-        help='Strand of the query: 0 = forward, 1 = reverse.')
-
-    parser.add_option(
-        '--0x0020','--mate_strand',
-        dest='mate_strand',
-        metavar="<0|1>",
-        choices = ('0','1'),
-        help='Strand of the mate: 0 = forward, 1 = reverse.')
-
-    parser.add_option(
-        '--0x0040','--is_first',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='is_first',
-        help='The read is the first read in a pair')
-
-    parser.add_option(
-        '--0x0080','--is_second',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='is_second',
-        help='The read is the second read in a pair')
-
-    parser.add_option(
-        '--0x0100','--is_not_primary',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='is_not_primary',
-        help='The alignment for the given read is not primary')
-
-    parser.add_option(
-        '--0x0200','--is_bad_quality',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='is_bad_quality',
-        help='The read fails platform/vendor quality checks')
-
-    parser.add_option(
-        '--0x0400','--is_duplicate',
-        choices = ( '0','1' ),
-        metavar="<0|1>",
-        dest='is_duplicate',
-        help='The read is either a PCR or an optical duplicate')
-
-    parser.add_option(
-        '-f','--input_sam_file',
-        metavar="INPUT_SAM_FILE",
-        dest='input_sam',
-        default = False,
-        help='Name of the SAM file to be filtered. STDIN is default')
-
-    parser.add_option(
-        '-c','--flag_column',
-        dest='flag_col',
-        default = '2',
-        help='Column containing SAM bitwise flag. 1-based')
-
-    options, args = parser.parse_args()
-
-    if options.input_sam:
-		infile = open ( options.input_sam, 'r')
-    else:
-    	infile = sys.stdin
-
-    opt_ary = [
-        options.is_paired,
-        options.is_proper_pair,
-        options.is_unmapped,
-        options.mate_is_unmapped,
-        options.query_strand,
-        options.mate_strand,
-        options.is_first,
-        options.is_second,
-        options.is_not_primary,
-        options.is_bad_quality,
-        options.is_duplicate
-    ]
-
-    opt_map = { '0': False, '1': True }
-    used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None]
-    flag_col = int( options.flag_col ) - 1
-
-    for line in infile:
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
-            fields = line.split( '\t' )
-            flags = int( fields[flag_col] )
-
-            valid_line = True
-            for index, opt_bool in used_indices:
-                if bool(flags & 0x0001 << index) != opt_bool:
-                    valid_line = False
-                    break
-
-            if valid_line:
-                print line
-
-if __name__ == "__main__": main()
-
--- a/tools/samtools/sam_bitwise_flag_filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-<tool id="sam_bw_filter" name="Filter SAM" version="1.0.0">
-  <description>on bitwise flag values</description>
-  <parallelism method="basic"></parallelism>
-  <command interpreter="python">
-    sam_bitwise_flag_filter.py
-      --input_sam_file=$input1
-      --flag_column=2
-      #for $bit in $bits
-       '${bit.flags}=${bit.states}'
-      #end for
-      > $out_file1
-  </command>
-  <inputs>
-    <param format="sam" name="input1" type="data" label="Select dataset to filter"/>
-    <repeat name="bits" title="Flag">
-      <param name="flags" type="select" label="Type">
-        <option value="--0x0001">Read is paired</option>
-        <option value="--0x0002">Read is mapped in a proper pair</option>
-        <option value="--0x0004">The read is unmapped</option>
-        <option value="--0x0008">The mate is unmapped</option>
-        <option value="--0x0010">Read strand</option>
-        <option value="--0x0020">Mate strand</option>
-        <option value="--0x0040">Read is the first in a pair</option>
-        <option value="--0x0080">Read is the second in a pair</option>
-        <option value="--0x0100">The alignment or this read is not primary</option>
-        <option value="--0x0200">The read fails platform/vendor quality checks</option>
-        <option value="--0x0400">The read is a PCR or optical duplicate</option>
-      </param>
-      <param name="states" type="select" display="radio" label="Set the states for this flag">
-         <option value="0">No</option>
-         <option value="1">Yes</option>
-       </param>
-    </repeat>
-  </inputs>
-  <outputs>
-    <data format="sam" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="sam_bw_filter.sam" ftype="sam"/>
-      <param name="flags" value="Read is mapped in a proper pair"/>
-      <param name="states" value="1"/>
-      <output name="out_file1" file="sam_bw_filter_0002-yes.sam" ftype="sam"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Allows parsing of SAM datasets using bitwise flag (the second column). The bits in the flag are defined as follows::
-
-    Bit Info
- ------ --------------------------------------------------------------------------
- 0x0001 the read is paired in sequencing, no matter whether it is mapped in a pair
- 0x0002 the read is mapped in a proper pair (depends on the protocol, normally
-        inferred during alignment) 1
- 0x0004 the query sequence itself is unmapped
- 0x0008 the mate is unmapped 1
- 0x0010 strand of the query (0 for forward; 1 for reverse strand)
- 0x0020 strand of the mate 1
- 0x0040 the read is the first read in a pair (see below)
- 0x0080 the read is the second read in a pair (see below)
- 0x0100 the alignment is not primary (a read having split hits may
-        have multiple primary alignment records)
- 0x0200 the read fails platform/vendor quality checks
- 0x0400 the read is either a PCR duplicate or an optical duplicate
-
-Note the following:
-
-- Flag 0x02, 0x08, 0x20, 0x40 and 0x80 are only meaningful when flag 0x01 is present.
-- If in a read pair the information on which read is the first in the pair is lost in the upstream analysis, flag 0x01 should be set, while 0x40 and 0x80 should both be zero.
-
------
-
-**Example**
-
-Suppose the following dataset was generated with BWA mapper::
-
- r001 163 ref  7 30 8M2I4M1D3M = 37  39 TTAGATAAAGGATACTA *
- r002   0 ref  9 30 3S6M1P1I4M *  0   0 AAAAGATAAGGATA    *
- r003   0 ref  9 30       5H6M *  0   0 AGCTAA            * NM:i:1
- r004   0 ref 16 30    6M14N5M *  0   0 ATAGCTTCAGC       *
- r003  16 ref 29 30       6H5M *  0   0 TAGGC             * NM:i:0
- r001  83 ref 37 30         9M =  7 -39 CAGCGCCAT         *
-
-To select properly mapped pairs, click the **Add new Flag** button and set *Read mapped in a proper pair* to **Yes**. The following two reads will be returned::
-
- r001 163 ref  7 30 8M2I4M1D3M = 37  39 TTAGATAAAGGATACTA *
- r001  83 ref 37 30         9M =  7 -39 CAGCGCCAT         *
-
-For more information, please consult the `SAM format description`__.
-
-.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
-
-
-  </help>
-</tool>
--- a/tools/samtools/sam_merge.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Merges any number of BAM files
-usage: %prog [options]
-    input1
-    output1
-    input2
-    [input3[,input4[,input5[,...]]]]
-"""
-
-import os, subprocess, sys, tempfile
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def __main__():
-    infile =  sys.argv[1]
-    outfile = sys.argv[2]
-    if len( sys.argv ) < 3:
-        stop_err( 'There are not enough files to merge' )
-    filenames = sys.argv[3:]
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'Samtools %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Samtools version\n' )
-    cmd = 'samtools merge %s %s %s' % ( outfile, infile, ' '.join( filenames ) )
-    tmp = tempfile.NamedTemporaryFile().name
-    try:
-        tmp_stderr = open( tmp, 'wb' )
-        proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-        if os.path.exists( tmp ):
-            os.unlink( tmp )
-    except Exception, e:
-        if os.path.exists( tmp ):
-            os.unlink( tmp )
-        stop_err( 'Error running SAMtools merge tool\n' + str( e ) )
-    if os.path.getsize( outfile ) > 0:
-        sys.stdout.write( '%s files merged.' % ( len( sys.argv ) - 2 ) )
-    else:
-        stop_err( 'The output file is empty, there may be an error with one of your input files.' )
-
-if __name__ == "__main__" : __main__()
--- a/tools/samtools/sam_merge.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="sam_merge2" name="Merge BAM Files" version="1.1.2">
-  <description>merges BAM files together</description>
-  <requirements>
-    <requirement type="package">picard</requirement>
-  </requirements>
-  <command>
-java -Xmx2G -jar ${GALAXY_DATA_INDEX_DIR}/shared/jars/MergeSamFiles.jar MSD=$mergeSD VALIDATION_STRINGENCY=LENIENT O=$output1 I=$input1 I=$input2
-      #for $i in $inputs
-        I=${i.input}
-      #end for
-    2&gt; $outlog
-  </command>
-  <inputs>
-    <param name="title" label="Name for the output merged bam file" type="text" default="Merged.bam"
-       help="This name will appear in your history so use it to remember what the new file in your history contains" />
-    <param name="mergeSD" value="true" type="boolean"  label="Merge all component bam file headers into the merged bam file"
-      truevalue="true" falsevalue="false" checked="yes"
-      help="Control the MERGE_SEQUENCE_DICTIONARIES flag for Picard MergeSamFiles. Default (true) correctly propagates read groups and other important metadata" />
-    <param name="input1" label="First file" type="data" format="bam" />
-    <param name="input2" label="with file" type="data" format="bam" help="Need to add more files? Use controls below." />
-    <repeat name="inputs" title="Input Files">
-      <param name="input" label="Add file" type="data" format="bam" />
-    </repeat>
-  </inputs>
-  <outputs>
-    <data format="bam" name="output1" label="${title}.bam" />
-    <data format="txt" name="outlog" label="${title}_${tool.name}.log" />
-  </outputs>
-  <tests>
-    <!-- TODO: add ability to test framework to test without at least
-         one repeat element value
-    -->
-    <test>
-      <param name="title" value="test1" />
-      <param name="mergeSD" value="true" />
-      <param name="input1" value="sam_merge_in1.bam" ftype="bam" />
-      <param name="input2" value="sam_merge_in2.bam" ftype="bam" />
-      <output name="output1" file="sam_merge_out1.bam" ftype="bam" />
-      <output name="outlog" file="sam_merge_out1.log" ftype="txt" lines_diff="10"/>
-    </test>
-    <test>
-      <param name="title" value="test2" />
-      <param name="mergeSD" value="true" />
-      <param name="input1" value="sam_merge_in1.bam" ftype="bam" />
-      <param name="input2" value="sam_merge_in2.bam" ftype="bam" />
-      <param name="input" value="sam_merge_in3.bam" ftype="bam" />
-      <output name="output1" file="sam_merge_out2.bam" ftype="bam" />
-      <output name="outlog" file="sam_merge_out2.log" ftype="txt" lines_diff="10"/>
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool uses the Picard_ merge command to merge any number of BAM files together into one BAM file while preserving the BAM
-metadata such as read groups
-
-.. _Picard: http://picard.sourceforge.net/command-line-overview.shtml#MergeSamFiles
-
-  </help>
-</tool>
--- a/tools/samtools/sam_merge_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-from galaxy.tools.parameters import DataToolParameter
-
-def validate_input( trans, error_map, param_values, page_param_map ):
-    dbkeys = set()
-    data_param_names = set()
-    data_params = 0
-    for name, param in page_param_map.iteritems():
-        if isinstance( param, DataToolParameter ):
-            # for each dataset parameter
-            if param_values.get(name, None) != None:
-                dbkeys.add( param_values[name].dbkey )
-                data_params += 1
-                # check meta data
-#                try:
-#                    param = param_values[name]
-#                    startCol = int( param.metadata.startCol )
-#                    endCol = int( param.metadata.endCol )
-#                    chromCol = int( param.metadata.chromCol )
-#                    if param.metadata.strandCol is not None:
-#                        strandCol = int ( param.metadata.strandCol )
-#                    else:
-#                        strandCol = 0
-#                except:
-#                    error_msg = "The attributes of this dataset are not properly set. " + \
-#                    "Click the pencil icon in the history item to set the chrom, start, end and strand columns."
-#                    error_map[name] = error_msg
-            data_param_names.add( name )
-    if len( dbkeys ) > 1:
-        for name in data_param_names:
-            error_map[name] = "All datasets must belong to same genomic build, " \
-                "this dataset is linked to build '%s'" % param_values[name].dbkey
-    if data_params != len(data_param_names):
-        for name in data_param_names:
-            error_map[name] = "A dataset of the appropriate type is required"
--- a/tools/samtools/sam_pileup.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Creates a pileup file from a bam file and a reference.
-
-usage: %prog [options]
-   -p, --input1=p: bam file
-   -o, --output1=o: Output pileup
-   -R, --ref=R: Reference file type
-   -n, --ownFile=n: User-supplied fasta reference file
-   -d, --dbkey=d: dbkey of user-supplied file
-   -x, --indexDir=x: Index directory
-   -b, --bamIndex=b: BAM index file
-   -s, --lastCol=s: Print the mapping quality as the last column
-   -i, --indels=i: Only output lines containing indels
-   -M, --mapCap=M: Cap mapping quality
-   -c, --consensus=c: Call the consensus sequence using MAQ consensu model
-   -T, --theta=T: Theta paramter (error dependency coefficient)
-   -N, --hapNum=N: Number of haplotypes in sample
-   -r, --fraction=r: Expected fraction of differences between a pair of haplotypes
-   -I, --phredProb=I: Phred probability of an indel in sequencing/prep
-
-"""
-
-import os, shutil, subprocess, sys, tempfile
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
-    seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR
-    seqPath = ''
-    for line in open( seqFile ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seqPath = fields[2].strip()
-                break
-    return seqPath
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    seqPath = check_seq_file( options.dbkey, options.indexDir )
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'Samtools %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Samtools version\n' )
-    #prepare file names
-    tmpDir = tempfile.mkdtemp()
-    tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir )
-    tmpf0_name = tmpf0.name
-    tmpf0.close()
-    tmpf0bam_name = '%s.bam' % tmpf0_name
-    tmpf0bambai_name = '%s.bam.bai' % tmpf0_name
-    tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir )
-    tmpf1_name = tmpf1.name
-    tmpf1.close()
-    tmpf1fai_name = '%s.fai' % tmpf1_name
-    #link bam and bam index to working directory (can't move because need to leave original)
-    os.symlink( options.input1, tmpf0bam_name )
-    os.symlink( options.bamIndex, tmpf0bambai_name )
-    #get parameters for pileup command
-    if options.lastCol == 'yes':
-        lastCol = '-s'
-    else:
-        lastCol = ''
-    if options.indels == 'yes':
-        indels = '-i'
-    else:
-        indels = ''
-    opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap )
-    if options.consensus == 'yes':
-        opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb )
-    #prepare basic pileup command
-    cmd = 'samtools pileup %s -f %s %s > %s'
-    try:
-        # have to nest try-except in try-finally to handle 2.4
-        try:
-            #index reference if necessary and prepare pileup command
-            if options.ref == 'indexed':
-                if not os.path.exists( "%s.fai" % seqPath ):
-                    raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey
-                cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 )
-            elif options.ref == 'history':
-                os.symlink( options.ownFile, tmpf1_name )
-                cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
-                tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                buffsize = 1048576
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                #did index succeed?
-                if returncode != 0:
-                    raise Exception, 'Error creating index file\n' + stderr
-                cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 )
-            #perform pileup command
-            tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            #did it succeed?
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            stop_err( 'Error running Samtools pileup tool\n' + str( e ) )
-    finally:
-        #clean up temp files
-        if os.path.exists( tmpDir ):
-            shutil.rmtree( tmpDir )
-    # check that there are results in the output file
-    if os.path.getsize( options.output1 ) > 0:
-        sys.stdout.write( 'Converted BAM to pileup' )
-    else:
-        stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
-
-if __name__ == "__main__" : __main__()
--- a/tools/samtools/sam_pileup.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
-<tool id="sam_pileup" name="Generate pileup" version="1.1.1">
-  <description>from BAM dataset</description>
-  <requirements>
-    <requirement type="package">samtools</requirement>
-  </requirements>
-  <command interpreter="python">
-    sam_pileup.py
-      --input1=$input1
-      --output=$output1
-      --ref=$refOrHistory.reference
-      #if $refOrHistory.reference == "history":
-        --ownFile=$refOrHistory.ownFile
-      #else:
-        --ownFile="None"
-      #end if
-       --dbkey=${input1.metadata.dbkey}
-       --indexDir=${GALAXY_DATA_INDEX_DIR}
-       --bamIndex=${input1.metadata.bam_index}
-       --lastCol=$lastCol
-       --indels=$indels
-       --mapCap=$mapCap
-       --consensus=$c.consensus
-      #if $c.consensus == "yes":
-        --theta=$c.theta
-        --hapNum=$c.hapNum
-        --fraction=$c.fraction
-        --phredProb=$c.phredProb
-       #else:
-        --theta="None"
-        --hapNum="None"
-        --fraction="None"
-        --phredProb="None"
-      #end if
-  </command>
-  <inputs>
-    <conditional name="refOrHistory">
-      <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
-           <validator type="unspecified_build" />
-           <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" />
-        </param>
-      </when>
-      <when value="history">
-        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
-      </when>
-    </conditional>
-    <param name="lastCol" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
-      <option value="no">Do not print the mapping quality as the last column</option>
-      <option value="yes">Print the mapping quality as the last column</option>
-    </param>
-    <param name="indels" type="select" label="Whether or not to print only output pileup lines containing indels">
-      <option value="no">Print all lines</option>
-      <option value="yes">Print only lines containing indels</option>
-    </param>
-    <param name="mapCap" type="integer" value="60" label="Where to cap mapping quality" />
-    <conditional name="c">
-      <param name="consensus" type="select" label="Call consensus according to MAQ model?">
-        <option selected="true" value="no">No</option>
-        <option value="yes">Yes</option>
-      </param>
-      <when value="no" />
-      <when value="yes">
-        <param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
-        <param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
-        <param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
-        <param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output1" label="${tool.name} on ${on_string}: converted pileup" />
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      Bam to pileup command:
-      samtools faidx chr_m.fasta
-      samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
-      chr_m.fasta is the prefix of the index
-      -->
-      <param name="reference" value="history" />
-      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
-      <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
-      <param name="lastCol" value="no" />
-      <param name="indels" value="no" />
-      <param name="mapCap" value="60" />
-      <param name="consensus" value="no" />
-      <output name="output1" file="sam_pileup_out1.pileup" />
-    </test>
-    <test>
-      <!--
-      Bam to pileup command:
-      samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
-      chr_m.fasta is the prefix of the index
-      -->
-      <param name="reference" value="indexed" />
-      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
-      <param name="lastCol" value="no" />
-      <param name="indels" value="no" />
-      <param name="mapCap" value="60" />
-      <param name="consensus" value="yes" />
-      <param name="theta" value="0.85" />
-      <param name="hapNum" value="2" />
-      <param name="fraction" value="0.001" />
-      <param name="phredProb" value="40" />
-      <output name="output1" file="sam_pileup_out2.pileup" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.
-
-.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
-
-------
-
-**Types of pileup datasets**
-
-The description of pileup format below is largely based on information that can be found on SAMTools Pileup_ documentation page. The 6- and 10-column variants are described below.
-
-.. _Pileup: http://samtools.sourceforge.net/pileup.shtml
-
-**Six column pileup**::
-
-    1    2  3  4        5        6
- ---------------------------------
- chrM  412  A  2       .,       II
- chrM  413  G  4     ..t,     IIIH
- chrM  414  C  4     ...a     III2
- chrM  415  C  4     TTTt     III7
-
-where::
-
-  Column Definition
- ------- ----------------------------
-       1 Chromosome
-       2 Position (1-based)
-       3 Reference base at that position
-       4 Coverage (# reads aligning over that position)
-       5 Bases within reads where (see Galaxy wiki for more info)
-       6 Quality values (phred33 scale, see Galaxy wiki for more)
-
-**Ten column pileup**
-
-The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::
-
-
-    1    2  3  4   5   6   7   8       9       10
- ------------------------------------------------
- chrM  412  A  A  75   0  25  2       .,       II
- chrM  413  G  G  72   0  25  4     ..t,     IIIH
- chrM  414  C  C  75   0  25  4     ...a     III2
- chrM  415  C  T  75  75  25  4     TTTt     III7
-
-where::
-
-  Column Definition
- ------- --------------------------------------------------------
-       1 Chromosome
-       2 Position (1-based)
-       3 Reference base at that position
-       4 Consensus bases
-       5 Consensus quality
-       6 SNP quality
-       7 Maximum mapping quality
-       8 Coverage (# reads aligning over that position)
-       9 Bases within reads where (see Galaxy wiki for more info)
-      10 Quality values (phred33 scale, see Galaxy wiki for more)
-
-
-.. _consensus: http://samtools.sourceforge.net/cns0.shtml
-
-
-  </help>
-</tool>
-
-
--- a/tools/samtools/sam_to_bam.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,197 +0,0 @@
-#!/usr/bin/env python
-"""
-Converts SAM data to sorted BAM data.
-usage: sam_to_bam.py [options]
-   --input1: SAM file to be converted
-   --dbkey: dbkey value
-   --ref_file: Reference file if choosing from history
-   --output1: output dataset in bam format
-   --index_dir: GALAXY_DATA_INDEX_DIR
-"""
-
-import optparse, os, sys, subprocess, tempfile, shutil, gzip
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-from galaxy import util
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def check_seq_file( dbkey, cached_seqs_pointer_file ):
-    seq_path = ''
-    for line in open( cached_seqs_pointer_file ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seq_path = fields[2].strip()
-                break
-    return seq_path
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' )
-    parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
-    parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
-    parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' )
-    parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
-    ( options, args ) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'Samtools %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Samtools version\n' )
-
-    cached_seqs_pointer_file = '%s/sam_fa_indices.loc' % options.index_dir
-    if not os.path.exists( cached_seqs_pointer_file ):
-        stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
-    # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
-    # and the equCab2.fa file will contain fasta sequences.
-    seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
-    tmp_dir = tempfile.mkdtemp()
-    if not options.ref_file or options.ref_file == 'None':
-        # We're using locally cached reference sequences( e.g., /galaxy/data/equCab2/sam_index/equCab2.fa ).
-        # The indexes for /galaxy/data/equCab2/sam_index/equCab2.fa will be contained in
-        # a file named /galaxy/data/equCab2/sam_index/equCab2.fa.fai
-        fai_index_file_base = seq_path
-        fai_index_file_path = '%s.fai' % seq_path
-        if not os.path.exists( fai_index_file_path ):
-            #clean up temp files
-            if os.path.exists( tmp_dir ):
-                shutil.rmtree( tmp_dir )
-            stop_err( 'No sequences are available for build (%s), request them by reporting this error.' % options.dbkey )
-    else:
-        try:
-            # Create indexes for history reference ( e.g., ~/database/files/000/dataset_1.dat ) using samtools faidx, which will:
-            # - index reference sequence in the FASTA format or extract subsequence from indexed reference sequence
-            # - if no region is specified, faidx will index the file and create <ref.fasta>.fai on the disk
-            # - if regions are specified, the subsequences will be retrieved and printed to stdout in the FASTA format
-            # - the input file can be compressed in the RAZF format.
-            # IMPORTANT NOTE: a real weakness here is that we are creating indexes for the history dataset
-            # every time we run this tool.  It would be nice if we could somehow keep track of user's specific
-            # index files so they could be re-used.
-            fai_index_file_base = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-            # At this point, fai_index_file_path will look something like /tmp/dataset_13.dat
-            os.symlink( options.ref_file, fai_index_file_base )
-            fai_index_file_path = '%s.fai' % fai_index_file_base
-            command = 'samtools faidx %s' % fai_index_file_base
-            tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-            if os.path.getsize( fai_index_file_path ) == 0:
-                raise Exception, 'Index file empty, there may be an error with your reference file or settings.'
-        except Exception, e:
-            #clean up temp files
-            if os.path.exists( tmp_dir ):
-                shutil.rmtree( tmp_dir )
-            stop_err( 'Error creating indexes from reference (%s), %s' % ( options.ref_file, str( e ) ) )
-    try:
-        # Extract all alignments from the input SAM file to BAM format ( since no region is specified, all the alignments will be extracted ).
-        tmp_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        tmp_aligns_file_name = tmp_aligns_file.name
-        tmp_aligns_file.close()
-        command = 'samtools view -bt %s -o %s %s' % ( fai_index_file_path, tmp_aligns_file_name, options.input1 )
-        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-        tmp_stderr = open( tmp, 'wb' )
-        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        #clean up temp files
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-        stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) )
-    try:
-        # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command
-        # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted
-        # into memory ( controlled by option -m ).
-        tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        tmp_sorted_aligns_file_name = tmp_sorted_aligns_file.name
-        tmp_sorted_aligns_file.close()
-        command = 'samtools sort %s %s' % ( tmp_aligns_file_name, tmp_sorted_aligns_file_name )
-        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-        tmp_stderr = open( tmp, 'wb' )
-        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-        returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        #clean up temp files
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-        stop_err( 'Error sorting alignments from (%s), %s' % ( tmp_aligns_file_name, str( e ) ) )
-    # Move tmp_aligns_file_name to our output dataset location
-    sorted_bam_file = '%s.bam' % tmp_sorted_aligns_file_name
-    shutil.move( sorted_bam_file, options.output1 )
-    #clean up temp files
-    if os.path.exists( tmp_dir ):
-        shutil.rmtree( tmp_dir )
-    # check that there are results in the output file
-    if os.path.getsize( options.output1 ) > 0:
-        sys.stdout.write( 'SAM file converted to BAM' )
-    else:
-        stop_err( 'Error creating sorted version of BAM file.' )
-
-if __name__=="__main__": __main__()
--- a/tools/samtools/sam_to_bam.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.2">
-  <description>converts SAM format to BAM format</description>
-  <requirements>
-    <requirement type="package">samtools</requirement>
-  </requirements>
-  <command interpreter="python">
-    sam_to_bam.py
-      --input1=$source.input1
-      #if $source.index_source == "history":
-        --dbkey=${ref_file.metadata.dbkey}
-        --ref_file=$source.ref_file
-      #else
-        --dbkey=${input1.metadata.dbkey}
-      #end if
-      --output1=$output1
-      --index_dir=${GALAXY_DATA_INDEX_DIR}
-  </command>
-  <inputs>
-    <conditional name="source">
-      <param name="index_source" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM File to Convert">
-           <validator type="unspecified_build" />
-           <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" />
-        </param>
-      </when>
-      <when value="history">
-        <param name="input1" type="data" format="sam" label="Convert SAM file" />
-        <param name="ref_file" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="bam" name="output1" label="${tool.name} on ${on_string}: converted BAM">
-      <actions>
-        <conditional name="source.index_source">
-          <when value="cached">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="source.input1" param_attribute="dbkey" />
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="source.ref_file" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      Sam-to-Bam command:
-      cp test-data/chr_m.fasta .
-      samtools faidx chr_m.fasta
-      samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam
-      samtools sort unsorted.bam sam_to_bam_out1
-      chr_m.fasta is the reference file (chrM from equCab2)
-      -->
-      <param name="index_source" value="history" />
-      <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" />
-      <param name="ref_file" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
-      <output name="output1" file="sam_to_bam_out1.bam" ftype="bam" />
-    </test>
-    <test>
-      <!--
-      Sam-to-Bam command:
-      samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam
-      samtools sort unsorted.bam sam_to_bam_out2
-      chr_m.fasta is the reference file and the index chr_m.fasta.fai
-      these should be in the same directory, and chrM is from equCab2
-      -->
-      <param name="index_source" value="cached" />
-      <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" dbkey="chrM" />
-      <output name="output1" file="sam_to_bam_out2.bam" ftype="bam" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool uses the SAMTools_ toolkit to produce an indexed BAM file based on a sorted input SAM file.
-
-.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
-
-  </help>
-</tool>
--- a/tools/samtools/samtools_flagstat.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-<tool id="samtools_flagstat" name="flagstat" version="1.0.0">
-  <requirements>
-    <requirement type="package">samtools</requirement>
-  </requirements>
-  <description>provides simple stats on BAM files</description>
-  <command>samtools flagstat $input1 > $output1
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="bam" label="BAM File to Convert" />
-  </inputs>
-  <outputs>
-    <data name="output1" format="txt" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="3unsorted.bam" ftype="bam" />
-      <output name="output1" file="samtools_flagstat_out1.txt" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool uses the SAMTools_ toolkit to produce simple stats on a BAM file.
-
-.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
-
-  </help>
-</tool>
--- a/tools/solid_tools/maq_cs_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,270 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-#MAQ mapper for SOLiD colourspace-reads
-
-import sys, os, zipfile, tempfile, subprocess
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def __main__():
-
-    out_fname = sys.argv[1].strip()
-    out_f2 = open(sys.argv[2].strip(),'r+')
-    ref_fname = sys.argv[3].strip()
-    f3_read_fname = sys.argv[4].strip()
-    f3_qual_fname = sys.argv[5].strip()
-    paired = sys.argv[6]
-    if paired == 'yes':
-        r3_read_fname = sys.argv[7].strip()
-        r3_qual_fname = sys.argv[8].strip()
-    min_mapqual = int(sys.argv[9].strip())
-    max_mismatch = int(sys.argv[10].strip())
-    out_f3name = sys.argv[11].strip()
-    subprocess_dict = {}
-
-    ref_csfa = tempfile.NamedTemporaryFile()
-    ref_bfa = tempfile.NamedTemporaryFile()
-    ref_csbfa = tempfile.NamedTemporaryFile()
-    cmd2_1 = 'maq fasta2csfa %s > %s 2>&1' %(ref_fname,ref_csfa.name)
-    cmd2_2 = 'maq fasta2bfa %s %s 2>&1' %(ref_csfa.name,ref_csbfa.name)
-    cmd2_3 = 'maq fasta2bfa %s %s 2>&1' %(ref_fname,ref_bfa.name)
-    try:
-        os.system(cmd2_1)
-        os.system(cmd2_2)
-        os.system(cmd2_3)
-    except Exception, erf:
-        stop_err(str(erf)+"Error processing reference sequence")
-
-    if paired == 'yes': #paired end reads
-        tmpf = tempfile.NamedTemporaryFile()    #forward reads
-        tmpr = tempfile.NamedTemporaryFile()    #reverse reads
-        tmps = tempfile.NamedTemporaryFile()    #single reads
-        tmpffastq = tempfile.NamedTemporaryFile()
-        tmprfastq = tempfile.NamedTemporaryFile()
-        tmpsfastq = tempfile.NamedTemporaryFile()
-
-        cmd1 = "solid2fastq_modified.pl 'yes' %s %s %s %s %s %s %s 2>&1" %(tmpf.name,tmpr.name,tmps.name,f3_read_fname,f3_qual_fname,r3_read_fname,r3_qual_fname)
-        try:
-            os.system(cmd1)
-            os.system('gunzip -c %s >> %s' %(tmpf.name,tmpffastq.name))
-            os.system('gunzip -c %s >> %s' %(tmpr.name,tmprfastq.name))
-            os.system('gunzip -c %s >> %s' %(tmps.name,tmpsfastq.name))
-
-        except Exception, eq:
-            stop_err("Error converting data to fastq format." + str(eq))
-
-        #make a temp directory where the split fastq files will be stored
-        try:
-            split_dir = tempfile.mkdtemp()
-            split_file_prefix_f = tempfile.mktemp(dir=split_dir)
-            split_file_prefix_r = tempfile.mktemp(dir=split_dir)
-            splitcmd_f = 'split -a 2 -l %d %s %s' %(32000000,tmpffastq.name,split_file_prefix_f) #32M lines correspond to 8M reads
-            splitcmd_r = 'split -a 2 -l %d %s %s' %(32000000,tmprfastq.name,split_file_prefix_r) #32M lines correspond to 8M reads
-
-            os.system(splitcmd_f)
-            os.system(splitcmd_r)
-            os.chdir(split_dir)
-            ii = 0
-            for fastq in os.listdir(split_dir):
-                if not fastq.startswith(split_file_prefix_f.split("/")[-1]):
-                    continue
-                fastq_r = split_file_prefix_r + fastq.split(split_file_prefix_f.split("/")[-1])[1] #find the reverse strand fastq corresponding to formward strand fastq
-                tmpbfq_f = tempfile.NamedTemporaryFile()
-                tmpbfq_r = tempfile.NamedTemporaryFile()
-                cmd3 = 'maq fastq2bfq %s %s 2>&1; maq fastq2bfq %s %s 2>&1; maq map -c %s.csmap %s %s %s 1>/dev/null 2>&1; maq mapview %s.csmap > %s.txt' %(fastq,tmpbfq_f.name,fastq_r,tmpbfq_r.name,fastq,ref_csbfa.name,tmpbfq_f.name,tmpbfq_r.name,fastq,fastq)
-                subprocess_dict['sp'+str(ii+1)] = subprocess.Popen([cmd3],shell=True,stdout=subprocess.PIPE)
-                ii += 1
-            while True:
-                all_done = True
-                for j,k in enumerate(subprocess_dict.keys()):
-                    if subprocess_dict['sp'+str(j+1)].wait() != 0:
-                        err = subprocess_dict['sp'+str(j+1)].communicate()[1]
-                        if err != None:
-                            stop_err("Mapping error: %s" %err)
-                        all_done = False
-                if all_done:
-                    break
-            cmdout = "for map in *.txt; do cat $map >> %s; done" %(out_fname)
-            os.system(cmdout)
-
-            tmpcsmap = tempfile.NamedTemporaryFile()
-            cmd_cat_csmap = "for csmap in *.csmap; do cat $csmap >> %s; done" %(tmpcsmap.name)
-            os.system(cmd_cat_csmap)
-
-            tmppileup = tempfile.NamedTemporaryFile()
-            cmdpileup = "maq pileup -m %s -q %s %s %s > %s" %(max_mismatch,min_mapqual,ref_bfa.name,tmpcsmap.name,tmppileup.name)
-            os.system(cmdpileup)
-            tmppileup.seek(0)
-            print >> out_f2, "#chr\tposition\tref_nt\tcoverage\tSNP_count\tA_count\tT_count\tG_count\tC_count"
-            for line in file(tmppileup.name):
-                elems = line.strip().split()
-                ref_nt = elems[2].capitalize()
-                read_nt = elems[4]
-                coverage = int(elems[3])
-                a,t,g,c = 0,0,0,0
-                ref_nt_count = 0
-                for ch in read_nt:
-                    ch = ch.capitalize()
-                    if ch not in ['A','T','G','C',',','.']:
-                        continue
-                    if ch in [',','.']:
-                        ch = ref_nt
-                        ref_nt_count += 1
-                    try:
-                        nt_ind = ['A','T','G','C'].index(ch)
-                        if nt_ind == 0:
-                            a+=1
-                        elif nt_ind == 1:
-                            t+=1
-                        elif nt_ind == 2:
-                            g+=1
-                        else:
-                            c+=1
-                    except ValueError, we:
-                        print >>sys.stderr, we
-                print >> out_f2, "%s\t%s\t%s\t%s\t%s\t%s" %("\t".join(elems[:4]),coverage-ref_nt_count,a,t,g,c)
-        except Exception, er2:
-            stop_err("Encountered error while mapping: %s" %(str(er2)))
-
-
-    else:   #single end reads
-        tmpf = tempfile.NamedTemporaryFile()
-        tmpfastq = tempfile.NamedTemporaryFile()
-        cmd1 = "solid2fastq_modified.pl 'no' %s %s %s %s %s %s %s 2>&1" %(tmpf.name,None,None,f3_read_fname,f3_qual_fname,None,None)
-        try:
-            os.system(cmd1)
-            os.system('gunzip -c %s >> %s' %(tmpf.name,tmpfastq.name))
-            tmpf.close()
-        except:
-            stop_err("Error converting data to fastq format.")
-
-        #make a temp directory where the split fastq files will be stored
-        try:
-            split_dir = tempfile.mkdtemp()
-            split_file_prefix = tempfile.mktemp(dir=split_dir)
-            splitcmd = 'split -a 2 -l %d %s %s' %(32000000,tmpfastq.name,split_file_prefix) #32M lines correspond to 8M reads
-            os.system(splitcmd)
-            os.chdir(split_dir)
-            for i,fastq in enumerate(os.listdir(split_dir)):
-                tmpbfq = tempfile.NamedTemporaryFile()
-                cmd3 = 'maq fastq2bfq %s %s 2>&1; maq map -c %s.csmap %s %s  1>/dev/null 2>&1; maq mapview %s.csmap > %s.txt' %(fastq,tmpbfq.name,fastq,ref_csbfa.name,tmpbfq.name,fastq,fastq)
-                subprocess_dict['sp'+str(i+1)] = subprocess.Popen([cmd3],shell=True,stdout=subprocess.PIPE)
-
-            while True:
-                all_done = True
-                for j,k in enumerate(subprocess_dict.keys()):
-                    if subprocess_dict['sp'+str(j+1)].wait() != 0:
-                        err = subprocess_dict['sp'+str(j+1)].communicate()[1]
-                        if err != None:
-                            stop_err("Mapping error: %s" %err)
-                        all_done = False
-                if all_done:
-                    break
-
-            cmdout = "for map in *.txt; do cat $map >> %s; done" %(out_fname)
-            os.system(cmdout)
-
-            tmpcsmap = tempfile.NamedTemporaryFile()
-            cmd_cat_csmap = "for csmap in *.csmap; do cat $csmap >> %s; done" %(tmpcsmap.name)
-            os.system(cmd_cat_csmap)
-
-            tmppileup = tempfile.NamedTemporaryFile()
-            cmdpileup = "maq pileup -m %s -q %s %s %s > %s" %(max_mismatch,min_mapqual,ref_bfa.name,tmpcsmap.name,tmppileup.name)
-            os.system(cmdpileup)
-            tmppileup.seek(0)
-            print >> out_f2, "#chr\tposition\tref_nt\tcoverage\tSNP_count\tA_count\tT_count\tG_count\tC_count"
-            for line in file(tmppileup.name):
-                elems = line.strip().split()
-                ref_nt = elems[2].capitalize()
-                read_nt = elems[4]
-                coverage = int(elems[3])
-                a,t,g,c = 0,0,0,0
-                ref_nt_count = 0
-                for ch in read_nt:
-                    ch = ch.capitalize()
-                    if ch not in ['A','T','G','C',',','.']:
-                        continue
-                    if ch in [',','.']:
-                        ch = ref_nt
-                        ref_nt_count += 1
-                    try:
-                        nt_ind = ['A','T','G','C'].index(ch)
-                        if nt_ind == 0:
-                            a+=1
-                        elif nt_ind == 1:
-                            t+=1
-                        elif nt_ind == 2:
-                            g+=1
-                        else:
-                            c+=1
-                    except:
-                        pass
-                print >> out_f2, "%s\t%s\t%s\t%s\t%s\t%s" %("\t".join(elems[:4]),coverage-ref_nt_count,a,t,g,c)
-        except Exception, er2:
-            stop_err("Encountered error while mapping: %s" %(str(er2)))
-
-    #Build custom track from pileup
-    chr_list=[]
-    out_f2.seek(0)
-    fcov = tempfile.NamedTemporaryFile()
-    fout_a = tempfile.NamedTemporaryFile()
-    fout_t = tempfile.NamedTemporaryFile()
-    fout_g = tempfile.NamedTemporaryFile()
-    fout_c = tempfile.NamedTemporaryFile()
-    fcov.write('''track type=wiggle_0 name="Coverage track" description="Coverage track (from Galaxy)" color=0,0,0 visibility=2\n''')
-    fout_a.write('''track type=wiggle_0 name="Track A" description="Track A (from Galaxy)" color=255,0,0 visibility=2\n''')
-    fout_t.write('''track type=wiggle_0 name="Track T" description="Track T (from Galaxy)" color=0,255,0 visibility=2\n''')
-    fout_g.write('''track type=wiggle_0 name="Track G" description="Track G (from Galaxy)" color=0,0,255 visibility=2\n''')
-    fout_c.write('''track type=wiggle_0 name="Track C" description="Track C (from Galaxy)" color=255,0,255 visibility=2\n''')
-
-    for line in out_f2:
-        if line.startswith("#"):
-            continue
-        elems = line.split()
-        chr = elems[0]
-
-        if chr not in chr_list:
-            chr_list.append(chr)
-            if not (chr.startswith('chr') or chr.startswith('scaffold')):
-                chr = 'chr'
-            header = "variableStep chrom=%s" %(chr)
-            fcov.write("%s\n" %(header))
-            fout_a.write("%s\n" %(header))
-            fout_t.write("%s\n" %(header))
-            fout_g.write("%s\n" %(header))
-            fout_c.write("%s\n" %(header))
-        try:
-            pos = int(elems[1])
-            cov = int(elems[3])
-            a = int(elems[5])
-            t = int(elems[6])
-            g = int(elems[7])
-            c = int(elems[8])
-        except:
-            continue
-        fcov.write("%s\t%s\n" %(pos,cov))
-        try:
-            a_freq = a*100./cov
-            t_freq = t*100./cov
-            g_freq = g*100./cov
-            c_freq = c*100./cov
-        except ZeroDivisionError:
-            a_freq=t_freq=g_freq=c_freq=0
-        fout_a.write("%s\t%s\n" %(pos,a_freq))
-        fout_t.write("%s\t%s\n" %(pos,t_freq))
-        fout_g.write("%s\t%s\n" %(pos,g_freq))
-        fout_c.write("%s\t%s\n" %(pos,c_freq))
-
-    fcov.seek(0)
-    fout_a.seek(0)
-    fout_g.seek(0)
-    fout_t.seek(0)
-    fout_c.seek(0)
-    os.system("cat %s %s %s %s %s | cat > %s" %(fcov.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_f3name))
-
-if __name__=="__main__":
-    __main__()
-
-
--- a/tools/solid_tools/maq_cs_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-<tool id="maq_cs_wrapper" name="MAQ for SOLiD" version="1.0.0">
-    <description> </description>
-    <command interpreter="python">
-    maq_cs_wrapper.py
-    $output1
-    $output2
-    $ref
-    $library_type.f3_reads
-    $library_type.f3_qual
-    $library_type.is_paired
-    #if $library_type.is_paired == "yes":
-     $library_type.r3_reads
-     $library_type.r3_qual
-    #else:
-     "None"
-     "None"
-    #end if
-    $min_mapqual
-    $max_mismatch
-    $output3
-
-    </command>
-
-    <inputs>
-        <param name="ref" type="data" format="fasta" label="Target Genome"/>
-        <conditional name="library_type">
-          <param name="is_paired" type="select" label="Is the library mate-paired?" multiple="false">
-             <option value="no">No</option>
-             <option value="yes">Yes</option>
-         </param>
-         <when value="no">
-           <param name="f3_reads" type="data" format="csfasta" label="F3 reads file"/>
-           <param format="qualsolid" name="f3_qual" type="data" label="F3 quality file" help="If your dataset doesn't show up in the menu, click the pencil icon next to your dataset and set the datatype to 'qualsolid'" />
-          </when>
-          <when value="yes">
-           <param name="f3_reads" type="data" format="csfasta" label="F3 reads file"/>
-           <param format="qualsolid" name="f3_qual" type="data" label="F3 quality file" help="If your dataset doesn't show up in the menu, click the pencil icon next to your dataset and set the datatype to 'qualsolid'" />
-           <param name="r3_reads" type="data" format="csfasta" label="R3 reads file"/>
-           <param format="qualsolid" name="r3_qual" type="data" label="R3 quality file" help="If your dataset doesn't show up in the menu, click the pencil icon next to your dataset and set the datatype to 'qualsolid'" />
-          </when>
-      </conditional>
-      <param name="min_mapqual" type="integer" size="3" value="0" label="Minimum mapping quality allowed for a read to be used" help="Reads below the specified mapping quality will not be considered in coverage and SNP analysis."/>
-      <param name="max_mismatch" type="integer" size="3" value="7" label="Maximum number of mismatches allowed for a read to be used" help="Reads above the specified threshold will not be considered in coverage and SNP analysis."/>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="output1" metadata_source="ref" />
-        <data format="tabular" name="output2" metadata_source="ref" />
-        <data format="customtrack" name="output3" metadata_source="ref" />
-    </outputs>
-
-    <!--  "ToolTestCase does not deal with multiple outputs properly yet."
-    <tests>
-
-        <test>
-            <param name="ref" value="phiX_mod.fasta" />
-            <param name="is_paired" value="no" />
-            <param name="f3_reads" value="phiX_solid.csfasta" />
-            <param name="f3_qual" value="phiX_solid.qualsolid" />
-            <param name="min_mapqual" value="0" />
-            <param name="max_mismatch" value="7" />
-            <output name="output1" file="phiX_solid_maq.map" />
-            <output name="output2" file="phiX_solid_maq.pileup" />
-            <output name="output3" file="phiX_solid_maq.ctrack" />
-
-        </test>
-    </tests>
-    -->
-<help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool maps SOLiD color-space reads against the target genome using MAQ. It produces three output datasets:
-
-
-**ALIGNMENT INFO** : contains the read alignment information,
-
-**PILEUP** : contains the coverage and SNP statistics for every nucleotide of the target genome,
-
-**CUSTOM TRACK** : contains the coverage and SNP statistics as custom tracks displayable in the UCSC browser.
-
------
-
-**The ALIGNMENT INFO dataset will contain the following fields:**
-
-* column 1  = read name
-* column 2  = chromosome
-* column 3  = position
-* column 4  = strand
-* column 5  = insert size from the outer coorniates of a pair
-* column 6  = paired flag
-* column 7  = mapping quality
-* column 8  = single-end mapping quality
-* column 9  = alternative mapping quality
-* column 10 = number of mismatches of the best hit
-* column 11 = sum of qualities of mismatched bases of the best hit
-* column 12 = number of 0-mismatch hits of the first 24bp
-* column 13 = number of 1-mismatch hits of the first 24bp on the reference
-* column 14 = length of the read
-* column 15 = read sequence
-* column 16 = read quality
-
-
-**The PILEUP dataset will contain the following fields:**
-
-* column 1  = chromosome
-* column 2  = position
-* column 3  = reference nucleotide
-* column 4  = coverage (number of reads that cover this position)
-* column 5  = number of SNPs
-* column 6  = number of As
-* column 7  = number of Ts
-* column 8  = number of Gs
-* column 9  = number of Cs
-
-</help>
-<code file="maq_cs_wrapper_code.py"/>
-
-</tool>
--- a/tools/solid_tools/maq_cs_wrapper_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-def exec_before_job(app, inp_data, out_data, param_dict, tool):
-    out_data['output1'].name = out_data['output1'].name + " [ ALIGNMENT INFO ]"
-    out_data['output2'].name = out_data['output2'].name + " [ PILEUP ]"
-    out_data['output3'].name = out_data['output3'].name + " [ CUSTOM TRACK ]"
-
--- a/tools/solid_tools/qualsolid_boxplot_graph.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-#!/bin/sh
-
-#    Modified fastq_quality_boxplot_graph.sh from FASTX-toolkit - FASTA/FASTQ preprocessing tools.
-#    Copyright (C) 2009  A. Gordon (gordon@cshl.edu)
-#
-#   This program is free software: you can redistribute it and/or modify
-#   it under the terms of the GNU Affero General Public License as
-#   published by the Free Software Foundation, either version 3 of the
-#   License, or (at your option) any later version.
-#
-#   This program is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU Affero General Public License for more details.
-#
-#    You should have received a copy of the GNU Affero General Public License
-#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-function usage()
-{
-	echo "SOLiD-Quality BoxPlot plotter"
-	echo "Generates a SOLiD quality score box-plot graph "
-	echo
-	echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
-	echo
-	echo "  [-p]           - Generate PostScript (.PS) file. Default is PNG image."
-	echo "  [-i INPUT.TXT] - Input file. Should be the output of \"solid_qual_stats\" program."
-	echo "  [-o OUTPUT]    - Output file name. default is STDOUT."
-	echo "  [-t TITLE]     - Title (usually the solid file name) - will be plotted on the graph."
-	echo
-	exit
-}
-
-#
-# Input Data columns: #pos	cnt	min	max	sum       	mean	Q1	med	Q3	IQR	lW	rW
-#  As produced by "solid_qual_stats" program
-
-TITLE=""					# default title is empty
-FILENAME=""
-OUTPUTTERM="set term png size 800,600"
-OUTPUTFILE="/dev/stdout"   			# Default output file is simply "stdout"
-while getopts ":t:i:o:ph" Option
-	do
-	case $Option in
-		# w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;;
-		t ) TITLE="for $OPTARG" ;;
-		i ) FILENAME=$OPTARG ;;
-		o ) OUTPUTFILE="$OPTARG" ;;
-		p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 4" ;;
-		h ) usage ;;
-		* ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
-	esac
-done
-shift $(($OPTIND - 1))
-
-
-if [ "$FILENAME" == "" ]; then
-	usage
-fi
-
-if [ ! -r "$FILENAME" ]; then
-	echo "Error: can't open input file ($1)." >&2
-	exit 1
-fi
-
-#Read number of cycles from the stats file (each line is a cycle, minus the header line)
-#But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now.
-NUM_CYCLES=$(cat "$FILENAME" | wc -l)
-
-GNUPLOTCMD="
-$OUTPUTTERM
-set boxwidth 0.8
-set size 1,1
-set key Left inside
-set xlabel \"read position\"
-set ylabel \"Quality Score \"
-set title  \"Quality Scores $TITLE\"
-#set auto x
-set bars 4.0
-set xrange [ 0: $NUM_CYCLES ]
-set yrange [-2:45]
-set y2range [-2:45]
-set xtics 1
-set x2tics 1
-set ytics 2
-set y2tics 2
-set tics out
-set grid ytics
-set style fill empty
-plot '$FILENAME' using 1:7:11:12:9 with candlesticks lt 1  lw 1 title 'Quartiles' whiskerbars, \
-      ''         using 1:8:8:8:8 with candlesticks lt -1 lw 2 title 'Medians'
-"
-
-echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"
--- a/tools/solid_tools/solid_qual_boxplot.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<tool id="solid_qual_boxplot" name="Draw quality score boxplot" version="1.0.0">
-	<description>for SOLiD data</description>
-
-	<command interpreter="bash">qualsolid_boxplot_graph.sh -t '$input.name' -i $input -o $output</command>
-
-	<inputs>
-		<param format="txt" name="input" type="data" label="Statistics report file (output of 'Quality Statistics for SOLiD data' tool)" />
-	</inputs>
-
-	<outputs>
-		<data format="png" name="output" metadata_source="input" />
-	</outputs>
-<help>
-
-**What it does**
-
-Creates a boxplot graph for the quality scores in the library.
-
-.. class:: infomark
-
-**TIP:** Use the **Quality Statistics for SOLiD data** tool to generate the report file needed for this tool.
-
------
-
-**Output Example**
-
-* Black horizontal lines are medians
-* Rectangular red boxes show the Inter-quartile Range (IQR) (top value is Q3, bottom value is Q1)
-* Whiskers show outliers at max. 1.5*IQR
-
-
-.. image:: ./static/images/solid_qual.png
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-</help>
-</tool>
--- a/tools/solid_tools/solid_qual_stats.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,140 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-
-import sys, os, zipfile, tempfile
-
-QUAL_UPPER_BOUND = 41
-QUAL_LOWER_BOUND = 1
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def unzip( filename ):
-    zip_file = zipfile.ZipFile( filename, 'r' )
-    tmpfilename = tempfile.NamedTemporaryFile().name
-    for name in zip_file.namelist():
-        file( tmpfilename, 'a' ).write( zip_file.read( name ) )
-    zip_file.close()
-    return tmpfilename
-
-def __main__():
-
-    infile_score_name = sys.argv[1].strip()
-    fout = open(sys.argv[2].strip(),'r+w')
-
-    infile_is_zipped = False
-    if zipfile.is_zipfile( infile_score_name ):
-        infile_is_zipped = True
-        infile_name = unzip( infile_score_name )
-    else:
-        infile_name = infile_score_name
-
-    readlen = None
-    invalid_lines = 0
-    j = 0
-    for line in file( infile_name ):
-        line = line.strip()
-        if not(line) or line.startswith("#") or line.startswith(">"):
-            continue
-        elems = line.split()
-        try:
-            for item in elems:
-                int(item)
-            if not readlen:
-                readlen = len(elems)
-            if len(elems) != readlen:
-                print "Note: Reads in the input dataset are of variable lengths."
-            j += 1
-        except ValueError:
-            invalid_lines += 1
-        if j > 10:
-            break
-
-    position_dict = {}
-    print >>fout, "column\tcount\tmin\tmax\tsum\tmean\tQ1\tmed\tQ3\tIQR\tlW\trW"
-    for k,line in enumerate(file( infile_name )):
-        line = line.strip()
-        if not(line) or line.startswith("#") or line.startswith(">"):
-            continue
-        elems = line.split()
-        if position_dict == {}:
-            for pos in range(readlen):
-                position_dict[pos] = [0]*QUAL_UPPER_BOUND
-        if len(elems) != readlen:
-            invalid_lines += 1
-            continue
-        for ind,item in enumerate(elems):
-            try:
-                item = int(item)
-                position_dict[ind][item]+=1
-            except:
-                pass
-
-    invalid_positions = 0
-    for pos in position_dict:
-        carr = position_dict[pos] #count array for position pos
-        total = sum(carr) #number of bases found in this column.
-        med_elem = int(round(total/2.0))
-        lowest = None   #Lowest quality score value found in this column.
-        highest = None  #Highest quality score value found in this column.
-        median = None   #Median quality score value found in this column.
-        qsum = 0.0      #Sum of quality score values for this column.
-        q1 = None       #1st quartile quality score.
-        q3 = None       #3rd quartile quality score.
-        q1_elem = int(round((total+1)/4.0))
-        q3_elem = int(round((total+1)*3/4.0))
-
-        try:
-            for ind,cnt in enumerate(carr):
-                qsum += ind*cnt
-
-                if cnt!=0:
-                    highest = ind
-
-                if lowest==None and cnt!=0:  #first non-zero count
-                    lowest = ind
-
-                if q1==None:
-                    if sum(carr[:ind+1]) >= q1_elem:
-                        q1 = ind
-
-                if median==None:
-                    if sum(carr[:ind+1]) < med_elem:
-                        continue
-                    median = ind
-                    if total%2 == 0: #even number of elements
-                        median2 = median
-                        if sum(carr[:ind+1]) < med_elem+1:
-                            for ind2,elem in enumerate(carr[ind+1:]):
-                                if elem != 0:
-                                    median2 = ind+ind2+1
-                                    break
-                        median = (median + median2)/2.0
-
-
-                if q3==None:
-                    if sum(carr[:ind+1]) >= q3_elem:
-                        q3 = ind
-
-
-            mean = qsum/total    #Mean quality score value for this column.
-            iqr = q3-q1
-            left_whisker = max(q1 - 1.5*iqr,lowest)
-            right_whisker = min(q3 + 1.5*iqr,highest)
-
-            print >>fout,"%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %(pos+1,total,lowest,highest,qsum,mean,q1,median,q3,iqr,left_whisker,right_whisker)
-        except:
-            invalid_positions += 1
-            nullvals = ['NA']*11
-            print >>fout,"%s\t%s" %(pos+1,'\t'.join(nullvals))
-
-    if invalid_lines:
-        print "Skipped %d reads as invalid." %invalid_lines
-    if invalid_positions:
-        print "Skipped stats computation for %d read positions." %invalid_positions
-
-if __name__=="__main__":
-    __main__()
-
-
--- a/tools/solid_tools/solid_qual_stats.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-<tool id="solid_qual_stats" name="Compute quality statistics" version="1.0.0">
-    <description>for SOLiD data</description>
-    <command interpreter="python">solid_qual_stats.py $input $output1</command>
-
-    <inputs>
-        <param format="qualsolid" name="input" type="data" label="SOLiD qual file" help="If your dataset doesn't show up in the menu, click the pencil icon next to your dataset and set the datatype to 'qualsolid'" />
-    </inputs>
-    <outputs>
-        <data format="txt" name="output1" metadata_source="input" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="input" value="qualscores.qualsolid" />
-            <output name="output1" file="qualsolid.stats" />
-        </test>
-    </tests>
-
-<help>
-
-**What it does**
-
-Creates quality statistics report for the given SOLiD quality score file.
-
-.. class:: infomark
-
-**TIP:** This statistics report can be used as input for **Quality Boxplot for SOLiD data** tool.
-
------
-
-**The output file will contain the following fields:**
-
-* column    = column number (position on the read)
-* count   = number of bases found in this column.
-* min     = Lowest quality score value found in this column.
-* max     = Highest quality score value found in this column.
-* sum     = Sum of quality score values for this column.
-* mean    = Mean quality score value for this column.
-* Q1    = 1st quartile quality score.
-* med   = Median quality score.
-* Q3    = 3rd quartile quality score.
-* IQR   = Inter-Quartile range (Q3-Q1).
-* lW    = 'Left-Whisker' value (for boxplotting).
-* rW    = 'Right-Whisker' value (for boxplotting).
-
-
-
-
-
-**Output Example**::
-
-    column  count   min max sum mean    Q1  med Q3  IQR lW  rW
-    1   6362991 2  32  250734117   20.41   5  9  28  23   2  31
-    2   6362991 2  32  250531036   21.37  10  26 30  20   5  31
-    3   6362991 2  34  248722469   19.09  10  26 30  20   5  31
-    4   6362991 2  34  247654797   18.92  10  26 30  20   5  31
-    .
-    .
-    32  6362991 2  31  143436943   16.54   3  10  25  22  2  31
-    33  6362991 2  32  114269843   16.96   3  10  25  22  2  31
-    34  6362991 2  29  140638447   12.10   3  10  25  22  2  29
-    35  6362991 2  29  138910532   11.83   3  10  25  22  2  29
-
-------
-
-This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
-
- .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-</help>
-</tool>
--- a/tools/sr_assembly/velvetg.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,301 +0,0 @@
-<tool id="velvetg" name="velvetg" version="1.0.0">
-  <description>Velvet sequence assembler for very short reads</description>
-  <command interpreter="python">
-    velvetg_wrapper.py
-           '$input.extra_files_path'
-           '$contigs' '$stats' '$LastGraph' '$velvet_asm' '$unused_reads_fasta'
-           #if $generate_amos.afg  == "yes":
-               -amos_file $generate_amos.afg
-           #end if
-           #if $unused_reads.generate_unused  == "yes":
-               -unused_reads $unused_reads.generate_unused
-           #end if
-           $read_trkg
-           #if $coverage.cutoff == "auto":
-               -cov_cutoff auto
-           #elif $coverage.cutoff == "value":
-               -cov_cutoff $coverage.cov_cutoff
-           #end if
-           #if $expected.coverage == "auto":
-               -exp_cov auto
-           #elif $expected.coverage == "value":
-               -exp_cov $expected.cov_cutoff
-           #end if
-           #if $contig_lgth.use_contig_lgth == "yes":
-               -min_contig_lgth $contig_lgth.min_contig_lgth
-           #end if
-           #if $reads.paired == "yes":
-               #if int($reads.ins_length) > 0:
-                   -ins_length $reads.ins_length
-               #end if
-               #if $reads.options.advanced == "yes":
-                   #if int($reads.options.ins_length_sd) > 0:
-                       -ins_length_sd $reads.options.ins_length_sd
-                   #end if
-                   #if int($reads.options.ins_length2) > 0:
-                       -ins_length2 $reads.options.ins_length2
-                   #end if
-                   #if int($reads.options.ins_length2_sd) > 0:
-                       -ins_length2_sd $reads.options.ins_length2_sd
-                   #end if
-                   #if int($reads.options.ins_length_long) > 0:
-                       -ins_length_long $reads.options.ins_length_long
-                   #end if
-                   #if int($reads.options.ins_length_long_sd) > 0:
-                       -ins_length_long_sd $reads.options.ins_length_long_sd
-                   #end if
-                   #if int($reads.options.max_branch_length) > 0:
-                       -max_branch_length $reads.options.max_branch_length
-                   #end if
-                   #if int($reads.options.max_divergence) > 0:
-                       -max_divergence $reads.options.max_divergence
-                   #end if
-                   #if int($reads.options.max_gap_count) > 0:
-                       -max_gap_count $reads.options.max_gap_count
-                   #end if
-                   #if int($reads.options.min_pair_count) > 0:
-                       -min_pair_count $reads.options.min_pair_count
-                   #end if
-                   #if int($reads.options.max_coverage) > 0:
-                       -max_coverage $reads.options.max_coverage
-                   #end if
-                   #if int($reads.options.long_mult_cutoff) > 0:
-                       -long_mult_cutoff $reads.options.long_mult_cutoff
-                   #end if
-                   $reads.options.scaffolding
-               #end if
-           #end if
-  </command>
-  <inputs>
-    <param name="input" type="data" format="velvet" label="Velvet Dataset" help="Prepared by velveth."/>
-    <conditional name="generate_amos">
-      <param name="afg" type="select" label="Generate a AMOS.afg file">
-        <option value="no">No</option>
-        <option value="yes">Yes</option>
-      </param>
-      <when value="no"/>
-      <when value="yes"/>
-    </conditional>
-
-    <conditional name="unused_reads">
-      <param name="generate_unused" type="select" label="Generate a UnusedReads fasta file">
-        <option value="no">No</option>
-        <option value="yes">Yes</option>
-      </param>
-      <when value="no"/>
-      <when value="yes"/>
-    </conditional>
-
-    <conditional name="last_graph">
-      <param name="generate_graph" type="select" label="Generate velvet LastGraph file">
-        <option value="no">No</option>
-        <option value="yes">Yes</option>
-      </param>
-      <when value="no"/>
-      <when value="yes"/>
-    </conditional>
-
-    <param name="read_trkg" type="boolean" checked="false" truevalue="-read_trkg yes" falsevalue="-read_trkg no" label="track short read positions" help=" tracking of short read positions in assembly"/>
-
-    <conditional name="coverage">
-      <param name="cutoff" type="select" label="Coverage cutoff" help="">
-        <option value="none">None</option>
-        <option value="auto">Automatically Determined</option>
-        <option value="value">Specify Cutoff Value</option>
-      </param>
-      <when value="none"/>
-      <when value="auto"/>
-      <when value="value">
-        <param name="cov_cutoff" value = "10.0" label="Remove nodes with coverage below" type="float" />
-      </when>
-    </conditional>
-
-    <conditional name="expected">
-      <param name="coverage" type="select" label="Expected Coverage of Unique Regions" help="">
-        <option value="none">None</option>
-        <option value="auto">Automatically Determined</option>
-        <option value="value">Specify Expected Value</option>
-      </param>
-      <when value="none"/>
-      <when value="auto"/>
-      <when value="value">
-        <param name="exp_cov" value = "10.0" label="Remove nodes with coverage below" type="float" />
-      </when>
-    </conditional>
-
-    <conditional name="contig_lgth">
-      <param name="use_contig_lgth" type="select" label=" Set minimum contig length" help="minimum contig length exported to contigs.fa file (default: hash length * 2).">
-        <option value="no">No</option>
-        <option value="yes">Yes</option>
-      </param>
-      <when value="no"/>
-      <when value="yes">
-        <param name="min_contig_lgth" value = "42" label="minimum contig length" type="integer" help="minimum contig length exported to contigs.fa file (default: hash length * 2)"/>
-      </when>
-    </conditional>
-
-    <conditional name="reads">
-      <param name="paired" type="select" label="Using Paired Reads">
-        <option value="no">No</option>
-        <option value="yes" selected="${input.metadata.paired_end_reads}">Yes</option>
-      </param>
-      <when value="no"/>
-      <when value="yes">
-        <param name="ins_length" value = "-1" label="Insert Length in Paired-End Read dataset (ignored when -1)" type="integer" help="Expected distance between two paired end reads"/>
-        <conditional name="options">
-          <param name="advanced" type="select" label="Velvet Advanced Options">
-            <option value="no">Use Defaults</option>
-            <option value="yes">Set Advanced Option Values</option>
-          </param>
-          <when value="no"/>
-          <when value="yes">
-            <param name="ins_length_sd" value = "-1" label="Estimate of Standard Deviation of Paired-End Read dataset(ignored when -1)" type="integer" help="Estimate of standard deviation of Paired-End Read dataset (default: 10% of corresponding length)"/>
-            <param name="ins_length2" value = "-1" label="Insert Length in 2nd Paired-End Short Read dataset (ignored when -1)" type="integer" help="Expected distance between two paired end reads in the second short-read dataset"/>
-            <param name="ins_length2_sd" value = "-1" label="Estimate of Standard Deviation of 2nd Paired-End Read dataset (ignored when -1)" type="integer" help="Estimate of standard deviation of 2nd Paired-End Read dataset (default: 10% of corresponding length)"/>
-            <param name="ins_length_long" value = "-1" label="Insert Length in Long Paired-End Read dataset (ignored when -1)" type="integer" help="Expected distance between two long paired-end reads"/>
-            <param name="ins_length_long_sd" value = "-1" label="Estimate of Standard Deviation of 2nd Paired-End Read dataset (ignored when -1)" type="integer" help="Estimate of standard deviation of Long Paired-End Read dataset (default: 10% of corresponding length)"/>
-            <param name="max_branch_length" value = "-1" label="Maximum branch length (ignored when -1)" type="integer" help="maximum length in base pair of bubble (default: 100)"/>
-            <param name="max_divergence" value = "-1." label="Maximum max_divergence (between .0 and 1., ignored when -1.)" type="float" help="maximum divergence rate between two branches in a bubble (default: .2)"/>
-            <param name="max_gap_count" value = "-1" label="Maximum gap count (ignored when -1)" type="integer" help="maximum number of gaps allowed in the alignment of the two branches of a bubble (default: 3)"/>
-            <param name="min_pair_count" value = "-1" label="Minimum read-pair count (ignored when -1)" type="integer" help="minimum number of paired end connections to justify the scaffolding of two long contigs (default: 10)"/>
-            <param name="max_coverage" value = "-1." label="Maximum coverage exclusion(ignored when -1.)" type="float" help="Exclude data that has coverage more than this maximum coverage value"/>
-            <param name="long_mult_cutoff" value = "-1" label="Minimum number of long reads required to merge contigs (ignored when -1)" type="integer" help="minimum number of long reads required to merge contigs (default: 2)"/>
-            <param name="scaffolding" type="boolean" checked="true" truevalue="-scaffolding yes" falsevalue="-scaffolding no" label="Use Scaffolding" help="Scaffold contigs that it cannot quite be connected (This results in sequences of Ns in the contigs)"/>
-
-          </when>
-        </conditional>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <!--
-    <data format="velvet_graph" name="LastGraph" />
-    -->
-    <data format="txt" name="LastGraph" label="${tool.name} on ${on_string}: LastGraph">
-      <filter>last_graph['generate_graph'] == "yes"</filter>
-    </data>
-    <data format="afg" name="velvet_asm" label="${tool.name} on ${on_string}: AMOS.afg">
-      <filter>generate_amos['afg'] == "yes"</filter>
-    </data>
-    <data format="fasta" name="unused_reads_fasta" label="${tool.name} on ${on_string}: Unused Reads">
-      <filter>unused_reads['generate_unused'] == "yes"</filter>
-    </data>
-    <data format="tabular" name="stats" label="${tool.name} on ${on_string}: Stats"/>
-    <data format="fasta" name="contigs" label="${tool.name} on ${on_string}: Contigs"/>
-  </outputs>
-  <requirements>
-    <requirement type="package">velvet</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="velveth_test1/output.html" ftype="velvet" >
-        <composite_data value='velveth_test1/Sequences' ftype="Sequences"/>
-        <composite_data value='velveth_test1/Roadmaps' ftype="Roadmaps"/>
-        <composite_data value='velveth_test1/Log'/>
-      </param>
-      <param name="afg" value="yes" />
-      <param name="generate_unused" value="yes" />
-      <param name="generate_graph" value="no" />
-      <param name="read_trkg" value="-read_trkg no" />
-      <param name="cutoff" value="auto" />
-      <param name="coverage" value="auto" />
-      <param name="use_contig_lgth" value="no" />
-      <param name="paired" value="no" />
-      <!--
-      <output name="LastGraph" file="velvetg_test1/lastgraph.txt" compare="diff"/>
-      -->
-      <output name="velvet_asm" file="velvetg_test1/amos.afg" compare="diff"/>
-      <output name="unused_reads_fasta" file="velvetg_test1/unusedreads.fa" compare="diff"/>
-      <output name="stats" file="velvetg_test1/stats.csv" compare="diff"/>
-      <output name="contigs" file="velvetg_test1/contigs.fa" compare="diff"/>
-    </test>
-  </tests>
-  <help>
-**Velvet Overview**
-
-Velvet_ is a de novo genomic assembler specially designed for short read sequencing technologies, such as Solexa or 454, developed by Daniel Zerbino and Ewan Birney at the European Bioinformatics Institute (EMBL-EBI), near Cambridge, in the United Kingdom.
-
-Velvet currently takes in short read sequences, removes errors then produces high quality unique contigs. It then uses paired-end read and long read information, when available, to retrieve the repeated areas between contigs.
-
-Read the Velvet `documentation`__ for details on using the Velvet Assembler.
-
-.. _Velvet: http://www.ebi.ac.uk/~zerbino/velvet/
-
-.. __: http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf
-
-------
-
-**Input formats**
-
-Velvet can input sequence files in the following formats: fasta fastq fasta.gz fastq.gz eland gerald
-
-The input files are prepared for the velvet assembler using **velveth**.
-
-------
-
-**Outputs**
-
-**Contigs**
-
-The *contigs.fa* file.
-This fasta file contains the sequences of the contigs longer than 2k, where k is the word-length used in velveth. If you have specified a min contig length threshold, then the contigs shorter than that value are omitted.
-Note that the length and coverage information provided in the header of each contig should therefore be understood in k-mers and in k-mer coverage (cf. 5.1) respectively.
-The N's in the sequence correspond to gaps between scaffolded contigs. The number of N's corresponds to the estimated length of the gap. For reasons of compatibility with the archives, any gap shorter than 10bp is represented by a sequence of 10 N's.
-
-**Stats**
-
-The *stats.txt* file.
-This file is a simple tabbed-delimited description of the nodes. The column names are pretty much self-explanatory. Note however that node lengths are given in k-mers. To obtain the length in nucleotides of each node you simply need to add k - 1, where k is the word-length used in velveth.
-The in and out columns correspond to the number of arcs on the 5' and 3' ends of the contig respectively.
-The coverages in columns short1 cov, short1 Ocov, short2 cov, and short2 Ocov are provided in k-mer coverage (5.1).
-Also, the difference between # cov and # Ocov is the way these values are computed. In the first count, slightly divergent sequences are added to the coverage tally. However, in the second, stricter count, only the sequences which map perfectly onto the consensus sequence are taken into account.
-
-**LastGraph**
-
-The *LastGraph* file.
-This file describes in its entirety the graph produced by Velvet.
-
-**AMOS.afg**
-
-The *velvet_asm.afg* file.
-This file is mainly designed to be read by the open-source AMOS genome assembly package. Nonetheless, a number of programs are available to transform this kind of file into other assembly file formats (namely ACE, TIGR, Arachne and Celera). See http://amos.sourceforge.net/ for more information.
-The file describes all the contigs contained in the contigs.fa file (cf 4.2.1).
-
-------
-
-**Velvet parameter list**
-
-This is a list of implemented Velvetg options::
-
-  Standard options:
-        -cov_cutoff  floating-point|auto : removal of low coverage nodes AFTER tour bus or allow the system to infer it
-                (default: no removal)
-        -ins_length  integer             : expected distance between two paired end reads (default: no read pairing)
-        -read_trkg  yes|no               : tracking of short read positions in assembly (default: no tracking)
-        -min_contig_lgth  integer        : minimum contig length exported to contigs.fa file (default: hash length * 2)
-        -amos_file  yes|no               : export assembly to AMOS file (default: no export)
-        -exp_cov  floating point|auto    : expected coverage of unique regions or allow the system to infer it
-                (default: no long or paired-end read resolution)
-
-  Advanced options:
-        -ins_length2  integer            : expected distance between two paired-end reads in the second short-read dataset (default: no read pairing)
-        -ins_length_long  integer        : expected distance between two long paired-end reads (default: no read pairing)
-        -ins_length*_sd  integer         : est. standard deviation of respective dataset (default: 10% of corresponding length)
-                [replace '*' by nothing, '2' or '_long' as necessary]
-        -scaffolding  yes|no             : scaffolding of contigs used paired end information (default: on)
-        -max_branch_length  integer      : maximum length in base pair of bubble (default: 100)
-        -max_divergence  floating-point  : maximum divergence rate between two branches in a bubble (default: 0.2)
-        -max_gap_count  integer          : maximum number of gaps allowed in the alignment of the two branches of a bubble (default: 3)
-        -min_pair_count  integer         : minimum number of paired end connections to justify the scaffolding of two long contigs (default: 10)
-        -max_coverage  floating point    : removal of high coverage nodes AFTER tour bus (default: no removal)
-        -long_mult_cutoff  int           : minimum number of long reads required to merge contigs (default: 2)
-        -unused_reads  yes|no            : export unused reads in UnusedReads.fa file (default: no)
-
-  Output:
-        directory/contigs.fa             : fasta file of contigs longer than twice hash length
-        directory/stats.txt              : stats file (tab-spaced) useful for determining appropriate coverage cutoff
-        directory/LastGraph              : special formatted file with all the information on the final graph
-        directory/velvet_asm.afg         : (if requested) AMOS compatible assembly file
-
-  </help>
-</tool>
--- a/tools/sr_assembly/velvetg_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Classes encapsulating decypher tool.
-James E Johnson - University of Minnesota
-"""
-import pkg_resources;
-import logging, os, string, sys, tempfile, glob, shutil, types, urllib
-import shlex, subprocess
-from optparse import OptionParser, OptionGroup
-from stat import *
-
-
-log = logging.getLogger( __name__ )
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    s = 'velvetg_wrapper.py:  argv = %s\n' % (sys.argv)
-    # print >> sys.stderr, s # so will appear as blurb for file
-    argcnt = len(sys.argv)
-    working_dir = sys.argv[1]
-    contigs = sys.argv[2]
-    stats = sys.argv[3]
-    LastGraph = sys.argv[4]
-    afgFile = sys.argv[5]
-    unusedReadsFile = sys.argv[6]
-    inputs = string.join(sys.argv[7:],' ')
-    cmdline = 'velvetg %s %s > /dev/null' % (working_dir, inputs)
-    # print >> sys.stderr, cmdline # so will appear as blurb for file
-    try:
-        proc = subprocess.Popen( args=cmdline, shell=True, stderr=subprocess.PIPE )
-        returncode = proc.wait()
-        # get stderr, allowing for case where it's very large
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += proc.stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        stop_err( 'Error running velvetg ' + str( e ) )
-    out = open(contigs,'w')
-    contigs_path = os.path.join(working_dir,'contigs.fa')
-    for line in open( contigs_path ):
-        out.write( "%s" % (line) )
-    out.close()
-    out = open(stats,'w')
-    stats_path = os.path.join(working_dir,'stats.txt')
-    for line in open( stats_path ):
-        out.write( "%s" % (line) )
-    out.close()
-    if LastGraph != 'None':
-        out = open(LastGraph,'w')
-        LastGraph_path = os.path.join(working_dir,'LastGraph')
-        for line in open( LastGraph_path ):
-            out.write( "%s" % (line) )
-        out.close()
-    if afgFile != 'None':
-        out = open(afgFile,'w')
-        afgFile_path = os.path.join(working_dir,'velvet_asm.afg')
-        try:
-            for line in open( afgFile_path ):
-                out.write( "%s" % (line) )
-        except:
-            logging.warn( 'error reading %s' %(afgFile_path))
-            pass
-        out.close()
-    if unusedReadsFile != 'None':
-        out = open(unusedReadsFile,'w')
-        unusedReadsFile_path = os.path.join(working_dir,'UnusedReads.fa')
-        try:
-            for line in open( unusedReadsFile_path ):
-                out.write( "%s" % (line) )
-        except:
-            logging.info( 'error reading %s' %(unusedReadsFile_path))
-            pass
-        out.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/sr_assembly/velveth.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-<tool id="velveth" name="velveth" version="1.0.0">
-  <description>Prepare a dataset for the Velvet velvetg Assembler</description>
-  <command interpreter="python">
-    velveth_wrapper.py
-           '$out_file1' '$out_file1.extra_files_path'
-           $hash_length
-           $strand_specific
-           #for $i in $inputs
-                ${i.file_format}
-                ${i.read_type}
-                ${i.input}
-           #end for
-  </command>
-  <inputs>
-    <param label="Hash Length" name="hash_length" type="select" help="k-mer length in base pairs of the words being hashed.">
-      <option value="11">11</option>
-      <option value="13">13</option>
-      <option value="15">15</option>
-      <option value="17">17</option>
-      <option value="19">19</option>
-      <option value="21" selected="yes">21</option>
-      <option value="23">23</option>
-      <option value="25">25</option>
-      <option value="27">27</option>
-      <option value="29">29</option>
-    </param>
-    <param name="strand_specific" type="boolean" checked="false" truevalue="-strand_specific" falsevalue="" label="Use strand specific transcriptome sequencing" help="If you are using a strand specific transcriptome sequencing protocol, you may wish to use this option for better results."/>
-    <repeat name="inputs" title="Input Files">
-      <param label="file format" name="file_format" type="select">
-        <option value="-fasta" selected="yes">fasta</option>
-        <option value="-fastq">fastq</option>
-        <option value="-eland">eland</option>
-        <option value="-gerald">gerald</option>
-      </param>
-      <param label="read type" name="read_type" type="select">
-        <option value="-short" selected="yes">short reads</option>
-        <option value="-shortPaired">shortPaired reads</option>
-        <option value="-short2">short2 reads</option>
-        <option value="-shortPaired2">shortPaired2 reads</option>
-        <option value="-long">long reads</option>
-        <option value="-longPaired">longPaired reads</option>
-      </param>
-
-      <param name="input" type="data" format="fasta,fastq,eland,gerald" label="Dataset"/>
-    </repeat>
-  </inputs>
-  <outputs>
-    <data format="velvet" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="package">velvet</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="hash_length" value="21" />
-      <param name="read_type" value="-shortPaired" />
-      <!-- <repeat name="inputs"> -->
-      <param name="file_format" value="fasta" />
-      <param name="read_type" value="shortPaired reads" />
-      <param name="input" value="velvet_test_reads.fa" ftype="fasta" />
-      <!-- </repeat> -->
-      <param name="strand_specific" value="" />
-      <output name="out_file1" file="velveth_test1/output.html" lines_diff="4">
-        <extra_files type="file" name='Sequences' value="velveth_test1/Sequences" compare="diff" />
-        <extra_files type="file" name='Roadmaps' value="velveth_test1/Roadmaps" compare="diff" />
-      </output>
-    </test>
-  </tests>
-  <help>
-**Velvet Overview**
-
-Velvet_ is a de novo genomic assembler specially designed for short read sequencing technologies, such as Solexa or 454, developed by Daniel Zerbino and Ewan Birney at the European Bioinformatics Institute (EMBL-EBI), near Cambridge, in the United Kingdom.
-
-Velvet currently takes in short read sequences, removes errors then produces high quality unique contigs. It then uses paired-end read and long read information, when available, to retrieve the repeated areas between contigs.
-
-Read the Velvet `documentation`__ for details on using the Velvet Assembler.
-
-.. _Velvet: http://www.ebi.ac.uk/~zerbino/velvet/
-
-.. __: http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf
-
-------
-
-**Velveth**
-
-Velveth takes in a number of sequence files, produces a hashtable, then outputs two files in an output directory (creating it if necessary), Sequences and Roadmaps, which are necessary to velvetg.
-
-------
-
-**Hash Length**
-
-The hash length, also known as k-mer length, corresponds to the length, in base pairs, of the words being hashed.
-
-The hash length is the length of the k-mers being entered in the hash table. Firstly, you must observe three technical constraints::
-
-# it must be an odd number, to avoid palindromes. If you put in an even number, Velvet will just decrement it and proceed.
-# it must be below or equal to MAXKMERHASH length (cf. 2.3.3, by default 31bp), because it is stored on 64 bits
-# it must be strictly inferior to read length, otherwise you simply will not observe any overlaps between reads, for obvious reasons.
-
-Now you still have quite a lot of possibilities. As is often the case, it's a trade- off between specificity and sensitivity. Longer kmers bring you more specificity (i.e. less spurious overlaps) but lowers coverage (cf. below). . . so there's a sweet spot to be found with time and experience.
-We like to think in terms of "k-mer coverage", i.e. how many times has a k-mer been seen among the reads. The relation between k-mer coverage Ck and standard (nucleotide-wise) coverage C is Ck = C # (L - k + 1)/L where k is your hash length, and L you read length.
-Experience shows that this kmer coverage should be above 10 to start getting decent results. If Ck is above 20, you might be "wasting" coverage. Experience also shows that empirical tests with different values for k are not that costly to run!
-
-**Input Files**
-
-Velvet works mainly with fasta and fastq formats. For paired-end reads, the assumption is that each read is next to its mate
-read. In other words, if the reads are indexed from 0, then reads 0 and 1 are paired, 2 and 3, 4 and 5, etc.
-
-Supported file formats are::
-
-  fasta
-  fastq
-  fasta.gz
-  fastq.gz
-  eland
-  gerald
-
-Read categories are::
-
-  short (default)
-  shortPaired
-  short2 (same as short, but for a separate insert-size library)
-  shortPaired2 (see above)
-  long (for Sanger, 454 or even reference sequences)
-  longPaired
-
-  </help>
-</tool>
--- a/tools/sr_assembly/velveth_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Classes encapsulating decypher tool.
-James E Johnson - University of Minnesota
-"""
-import pkg_resources
-import logging, os, string, sys, tempfile, glob, shutil, types, urllib
-import shlex, subprocess
-from optparse import OptionParser, OptionGroup
-from stat import *
-
-
-log = logging.getLogger( __name__ )
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    s = 'velveth_wrapper.py:  argv = %s\n' % (sys.argv)
-    argcnt = len(sys.argv)
-    html_file = sys.argv[1]
-    working_dir = sys.argv[2]
-    try: # for test - needs this done
-        os.makedirs(working_dir)
-    except Exception, e:
-        stop_err( 'Error running velveth ' + str( e ) )
-    hash_length = sys.argv[3]
-    inputs = string.join(sys.argv[4:],' ')
-    cmdline = 'velveth %s %s %s > /dev/null' % (working_dir, hash_length, inputs)
-    try:
-        proc = subprocess.Popen( args=cmdline, shell=True, stderr=subprocess.PIPE )
-        returncode = proc.wait()
-        # get stderr, allowing for case where it's very large
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += proc.stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        if returncode != 0:
-            raise Exception, stderr
-    except Exception, e:
-        stop_err( 'Error running velveth ' + str( e ) )
-    sequences_path = os.path.join(working_dir,'Sequences')
-    roadmaps_path = os.path.join(working_dir,'Roadmaps')
-    rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>']
-    rval.append('<div>%s<p/></div>' % (cmdline) )
-    rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
-    rval.append( '<li><a href="%s" type="text/plain">%s </a>%s</li>' % (sequences_path,'Sequences','Sequences' ) )
-    rval.append( '<li><a href="%s" type="text/plain">%s </a>%s</li>' % (roadmaps_path,'Roadmaps','Roadmaps' ) )
-    rval.append( '</ul></div></html>' )
-    f = file(html_file,'w')
-    f.write("\n".join( rval ))
-    f.write('\n')
-    f.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/sr_mapping/PerM.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,369 +0,0 @@
-<tool id="PerM" name="Map with PerM" version="1.1.2">
-  <description>for SOLiD and Illumina</description>
-  <!-- works with PerM version 0.2.6 -->
-  <requirements>
-      <requirement type="package">perm</requirement>
-  </requirements>
-  <command>
-    echo -n "PerM "; PerM 2>&amp;1 | grep "Version";
-    PerM
-      #if $s.sourceOfRef.refSource == "history"
-        $s.sourceOfRef.ref
-      #else
-        #if $s.space == "color"
-          "${ filter( lambda x: str( x[0] ) == str( $s.sourceOfRef.index ), $__app__.tool_data_tables[ 'perm_color_indexes' ].get_fields() )[0][-1] }"
-        #elif $s.space == "base"
-          "${ filter( lambda x: str( x[0] ) == str( $s.sourceOfRef.index ), $__app__.tool_data_tables[ 'perm_base_indexes' ].get_fields() )[0][-1] }"
-        #end if
-      #end if
-      #if $s.mate.singleOrPairs == "single":
-        $s.mate.reads
-      #else:
-        -1 $s.mate.reads1 -2 $s.mate.reads2
-        -U $s.mate.upperbound
-        -L $s.mate.lowerbound
-        $s.mate.excludeAmbiguousPairs
-      #end if
-      #if $s.space == "color":
-        --readFormat "csfastq"
-      #else:
-        --readFormat "fastq"
-      #end if
-      #if $int($str($valAlign)) &gt;= 0
-        -v $valAlign
-      #end if
-      #if $align.options == "full":
-        --seed $align.seed
-        -$align.alignments
-        #if $str($align.delimiter) != "None"
-          --delimiter $align.delimiter
-        #end if
-        -T $align.sTrimL
-        $align.includeReadsWN
-        $align.statsOnly
-        $align.ignoreQS
-      #end if
-      #if $str($bUnmappedRead) == "true" and $s.space == "color"
-        -u $unmappedReadOutCS
-      #elif $str($bUnmappedRead) == "true" and $s.space == "base"
-        -u $unmappedReadOut
-      #end if
-      -o $output
-      --outputFormat sam
-      --noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
-  </command>
-  <inputs>
-    <conditional name="s">
-      <param name="space" label="Is your data color space (SOLiD) or base space (Illumina)?" type="select">
-        <option value="color">Color space</option>
-        <option value="base">Base space</option>
-      </param>
-      <when value="color">
-        <conditional name="sourceOfRef">
-          <param name="refSource" label="Will you provide your own reference file from the history or use a built-in index?" type="select">
-            <option value="indexed">Built-in index</option>
-            <option value="history">Fasta file from history</option>
-          </param>
-          <when value="indexed">
-            <param name="index" type="select" label="Select a reference genome (with seed and read length)" help="if your genome of interest is not listed - contact Galaxy team">
-              <options from_data_table="perm_color_indexes"/>
-            </param>
-          </when>
-          <when value="history">
-            <param name="ref" format="fasta" type="data" label="Reference" />
-          </when>
-        </conditional>
-        <conditional name="mate">
-          <param name="singleOrPairs" label="Mate-paired?" type="select">
-            <option value="single">Single-end</option>
-            <option value="paired">Mate pairs</option>
-          </param>
-          <when value="single">
-            <param format="fastqcssanger" name="reads" type="data" label="Reads" />
-          </when>
-          <when value="paired">
-            <param name="reads1" format="fastqcssanger" label="Forward FASTQ file" type="data" />
-            <param name="reads2" format="fastqcssanger" label="Reverse FASTQ file" type="data" />
-            <param label="Upperbound of pairs separation (-U)" name="upperbound" type="integer" size="8" value="100000" />
-            <param label="Lowerbound of pairs separation (-L)" name="lowerbound" type="integer" size="8" value="0" />
-            <param label="Exclude ambiguous pairs (-e)" name="excludeAmbiguousPairs" type="boolean" checked="false" truevalue="-e" falsevalue="" />
-          </when>
-        </conditional>
-      </when>
-      <when value="base">
-        <conditional name="sourceOfRef">
-          <param name="refSource" label="Will you provide your own reference file from the history or use a built-in index?" type="select">
-            <option value="indexed">Built-in index</option>
-            <option value="history">Fasta file from history</option>
-          </param>
-          <when value="indexed">
-            <param name="index" type="select" label="Select a reference genome with seed and read length" help="if your genome of interest is not listed - contact Galaxy team">
-              <options from_data_table="perm_base_indexes"/>
-            </param>
-          </when>
-          <when value="history">
-            <param name="ref" format="fasta" type="data" label="Reference" />
-          </when>
-        </conditional>
-        <conditional name="mate">
-          <param name="singleOrPairs" label="Mate-paired?" type="select">
-            <option value="single">Single-end</option>
-            <option value="paired">Mate pairs</option>
-          </param>
-          <when value="single">
-            <param format="fastqsanger" name="reads" type="data" label="Reads" />
-          </when>
-          <when value="paired">
-            <param name="reads1" format="fastqsanger" label="Forward FASTQ file" type="data" />
-            <param name="reads2" format="fastqsanger" label="Reverse FASTQ file" type="data" />
-            <param label="Upperbound of pairs separation (-U)" name="upperbound" type="integer" size="8" value="100000" />
-            <param label="Lowerbound of pairs separation (-L)" name="lowerbound" type="integer" size="8" value="0" />
-            <param label="Exclude ambiguous pairs (-e)" name="excludeAmbiguousPairs" type="boolean" checked="false" truevalue="-e" falsevalue="" />
-          </when>
-        </conditional>
-      </when>
-    </conditional>
-    <param label="Maximum number of mismatches permitted in one end of full read (-v)" name="valAlign" type="integer" size="5" value="2" />
-    <conditional name="align">
-      <param help="Use default setting or specify full parameters list" label="PerM settings to use" name="options" type="select">
-        <option value="preSet">Commonly used</option>
-        <option value="full">Full parameter list</option>
-      </param>
-      <when value="preSet"/>
-      <when value="full">
-        <param label="Whether or not to report all valid alignments per read (-A/-B/-E)" name="alignments" type="select">
-          <option value="A">Report all valid alignments</option>
-          <option value="B">Report the best alignments in terms of number of mismatches</option>
-          <option value="E">Report only uniquely mapped reads</option>
-        </param>
-        <param label="Choose the seed full sensitive to different number of mismatches (--seed)" name="seed" type="select" >
-          <option value="F2">2 mismatches</option>
-          <option value="S11">1 SNP + 1 color error</option>
-          <option value="F3">3 mismatches</option>
-          <option value="F4">4 mismatches</option>
-        </param>
-        <param label="Choose the delimiter to identify read name (--delimiter)" name="delimiter" type="select">
-          <option value="None">Tab/Space/Comma</option>
-          <option value=":">Colon</option>
-          <option value="_">Underscore</option>
-        </param>
-        <param label="Use the first n bases of each read for alignment (-T)" name="sTrimL" type="integer" size="5" value="50" />
-        <param name="includeReadsWN" type="boolean" checked="true" truevalue="--includeReadsWN" falsevalue="" label="Include reads with 'N' or '.' by encoding '.' as 3, 'N' as 'A' (--includeReadsWN)" />
-        <param name="statsOnly" type="boolean" checked="false" truevalue="--statsOnly" falsevalue="" label="Output mapping stats only. Don't output alignments (--statsOnly)" />
-        <param name="ignoreQS" type="boolean" checked="false" truevalue="--ignoreQS" falsevalue="" label="Ignore quality scores (--ignoreQS)" />
-      </when>
-    </conditional> <!-- options -->
-    <param name="bUnmappedRead" type="select" label="Output the unmapped reads (-u)">
-      <option value="true">Yes</option>
-      <option value="false">No</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads" />
-    <data format="fastqsanger" name="unmappedReadOut" label="${tool.name} on ${on_string}: unmapped reads">
-      <filter>bUnmappedRead == "true" and s["space"] == "base"</filter>
-    </data>
-    <data format="fastqcssanger" name="unmappedReadOutCS" label="${tool.name} on ${on_string}: unmapped reads">
-      <filter>bUnmappedRead == "true" and s["space"] == "color"</filter>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      PerM command:
-      PerM /afs/bx.psu.edu/depot/data/genome/phiX/perm_index/phiX_base_F3_50.index -1 test-data/perm_in1.fastqsanger -2 test-data/perm_in2.fastqsanger -U 100000 -L 0 -e +readFormat fastq -v 0 +seed F3 -A -T 50 +includeReadsWN -o perm_out1.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
-      You need to replace the + with 2 dashes.
-      -->
-      <param name="space" value="base" />
-      <param name="refSource" value="indexed" />
-      <param name="index" value="phiX_F3_50" />
-      <param name="singleOrPairs" value="paired" />
-      <param name="reads1" value="perm_in1.fastqsanger" ftype="fastqsanger" />
-      <param name="reads2" value="perm_in2.fastqsanger" ftype="fastqsanger" />
-      <param name="upperbound" value="100000" />
-      <param name="lowerbound" value="0" />
-      <param name="excludeAmbiguousPairs" value="true" />
-      <param name="valAlign" value="0" />
-      <param name="options" value="full" />
-      <param name="alignments" value="A" />
-      <param name="seed" value="F3" />
-      <param name="delimiter" value="None" />
-      <param name="sTrimL" value="50" />
-      <param name="includeReadsWN" value="true" />
-      <param name="statsOnly" value="false" />
-      <param name="ignoreQS" value="false" />
-      <param name="bUnmappedRead" value="false" />
-      <output name="output" file="perm_out1.sam" ftype="sam" />
-    </test>
-    <test>
-      <!--
-      PerM command:
-      PerM test-data/chr_m.fasta test-data/perm_in3.fastqsanger +readFormat fastq -v 2 -u perm_out3.fastqsanger -o perm_out2.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
-      You need to replace the + with 2 dashes.
-      -->
-      <param name="space" value="base" />
-      <param name="refSource" value="history" />
-      <param name="ref" value="chr_m.fasta" ftype="fasta" />
-      <param name="singleOrPairs" value="single" />
-      <param name="reads" value="perm_in3.fastqsanger" ftype="fastqsanger" />
-      <param name="valAlign" value="2" />
-      <param name="options" value="preSet" />
-      <param name="bUnmappedRead" value="true" />
-      <output name="output" file="perm_out2.sam" ftype="sam" />
-      <output name="unmappedReadOut" file="perm_out3.fastqsanger" ftype="fastqsanger" />
-    </test>
-    <test>
-      <!--
-      PerM command:
-      PerM test-data/phiX.fasta test-data/perm_in4.fastqcssanger +readFormat csfastq -v 1 -o perm_out4.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
-      You need to replace the + with 2 dashes.
-      -->
-      <param name="space" value="color" />
-      <param name="refSource" value="history" />
-      <param name="ref" value="phiX.fasta" ftype="fasta" />
-      <param name="singleOrPairs" value="single" />
-      <param name="reads" value="perm_in4.fastqcssanger" ftype="fastqcssanger" />
-      <param name="valAlign" value="1" />
-      <param name="options" value="preSet" />
-      <param name="bUnmappedRead" value="false" />
-      <output name="output" file="perm_out4.sam" ftype="sam" />
-    </test>
-    <test>
-      <!--
-      PerM command:
-      PerM /afs/bx.psu.edu/depot/data/genome/equCab2/perm_index/equCab2_chrM_color_F2_50.index -1 test-data/perm_in5.fastqcssanger -2 test-data/perm_in6.fastqcssanger -U 90000 -L 10000 +readFormat csfastq -v 3 +seed F2-o perm_out5.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
-      You need to replace the + with 2 dashes.
-      -->
-      <param name="space" value="color" />
-      <param name="refSource" value="indexed" />
-      <param name="index" value="equCab2_chrM_F2_50" />
-      <param name="singleOrPairs" value="paired" />
-      <param name="reads1" value="perm_in5.fastqcssanger" ftype="fastqcssanger" />
-      <param name="reads2" value="perm_in6.fastqcssanger" ftype="fastqcssanger" />
-      <param name="upperbound" value="90000" />
-      <param name="lowerbound" value="10000" />
-      <param name="excludeAmbiguousPairs" value="false" />
-      <param name="valAlign" value="3" />
-      <param name="options" value="preSet" />
-      <param name="bUnmappedRead" value="false" />
-      <output name="output" file="perm_out5.sam" ftype="sam" />
-    </test>
-  </tests>
-  <help>
-**What it does**
-
-PerM is a short read aligner designed to be ultrafast with long SOLiD reads to the whole genome or transcriptions. PerM can be fully sensitive to alignments with up to four mismatches and highly sensitive to a higher number of mismatches.
-
-**Development team**
-
-PerM is developed by Ting Chen's group, Center of Excellence in Genomic Sciences at the University of Southern California. If you have any questions, please email yanghoch at usc.edu or check the `project page`__.
-
- .. __: http://code.google.com/p/perm/
-
-**Citation**
-
-PerM: Efficient mapping of short sequencing reads with periodic full sensitive spaced seeds. Bioinformatics, 2009, 25 (19): 2514-2521.
-
-**Input**
-
-The input files are read files and a reference. Users can use the pre-indexed reference in Galaxy or upload their own reference.
-
-The uploaded reference file should be in the fasta format. Multiple sequences like transcriptions should be concatenated together separated by a header line that starts with the ">" character.
-
-Reads files must be in either fastqsanger or fastqcssanger format to use in PerM. However, there are several possible starting formats that can be converted to one of those two: fastq (any type), color-space fastq, fasta, csfasta, or csfasta+qualsolid.
-
-An uploaded base-space fastq file MUST be checked/transformed with FASTQGroomer tools in Galaxy to be converted to the fastqsanger format (this is true even if the original file is in Sanger format).
-
-Uploaded fasta and csfasta without quality score files can be transformed to fastqsanger by the FASTQGroomer, with pseudo quality scores added.
-
-An uploaded csfasta + qual pair can also be transformed into fastqcssanger by solid2fastq.
-
-**Outputs**
-
-The output mapping result is in SAM format, and has the following columns::
-
-    Column  Description
-  --------  --------------------------------------------------------
-   1 QNAME  Query (pair) NAME
-   2 FLAG   bitwise FLAG
-   3 RNAME  Reference sequence NAME
-   4 POS    1-based leftmost POSition/coordinate of clipped sequence
-   5 MAPQ   MAPping Quality (Phred-scaled)
-   6 CIGAR  extended CIGAR string
-   7 MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-   8 MPOS   1-based Mate POSition
-   9 ISIZE  Inferred insert SIZE
-  10 SEQ    query SEQuence on the same strand as the reference
-  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALUE
-  12.1 NM   Number of mismatches (SOLiD-specific)
-  12.2 CS   Reads in color space (SOLiD-specific)
-  12.3 CQ   Bases quality in color spacehidden="true" (SOLiD-specific)
-
-The flags are as follows::
-
-    Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-Here is some sample output::
-
-  Qname	FLAG	Rname	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	NM	CS	CQ
-  491_28_332_F3   16      ref-1   282734  255     35M     *       0       0       AGTCAAACTCCGAATGCCAATGACTTATCCTTAGG    #%%%%%%%!!%%%!!%%%%%%%%!!%%%%%%%%%%      NM:i:3  CS:Z:C0230202330012130103100230121001212        CQ:Z:###################################
-  491_28_332_F3   16      ref-1   269436  255     35M     *       0       0       AGTCAAACTCCGAATGCCAATGACTTATCCTTAGG    #%%%%%%%!!%%%!!%%%%%%%%!!%%%%%%%%%%      NM:i:3  CS:Z:C0230202330012130103100230121001212        CQ:Z:###################################
-
-The user can check a checkbox for optional output containing the unmmaped reads in fastqsanger or fastqcssanger. The default is to produce it.
-
-**PerM parameter list**
-
-Below is a list of PerM command line options for PerM. Not all of these are relevant to Galaxy's implementation, but are included for completeness.
-
-The command for single-end::
-
-  PerM [ref_or_index] [read] [options]
-
-The command for paired-end::
-
-  PerM [ref_or_index] -1 [read1] -2 [read1] [options]
-
-The command-line options::
-
-  -A                Output all alignments within the given mismatch threshold, end-to-end.
-  -B                Output best alignments in terms of mismatches in the given mismatch threshold. [Default]
-  -E                Output only the uniquely mapped reads in the given mismatch threshold.
-  -m                Create the reference index, without reusing the saved index.
-  -s PATH           Save the reference index to accelerate the mapping in the future. If PATH is not specified, the default path will be used.
-  -v INT            Where INT is the number of mismatches allowed in one end. [Default=2]
-  -T INT            Where INT is the length to truncate read length to, so 30 means use only first 30 bases (signals). Leave blank if the full read is meant to be used.
-  -o PATH           Where PATH is for output the mapping of one read set. PerM's output are in .mapping or .sam format, determined by the ext name of PATH. Ex: -o out.sam will output in SAM format; -o out.mapping will output in .mapping format.
-  -d PATH           Where PATH is the directory for multiple read sets.
-  -u PATH           Print the fastq file of those unmapped reads to the file in PATH.
-  --noSamHeader     Print no SAM header so it is convenient to concatenate multiple SAM output files.
-  --includeReadsWN  Encodes N or "." with A or 3, respectively.
-  --statsOnly       Output the mapping statistics in stdout only, without saving alignments to files.
-  --ignoreQS        Ignore the quality scores in fastq or QUAL files.
-  --seed {F2 | S11 | F3 | F4}    Specify the seed pattern, which has a specific full sensitivity. Check the algorithm page (link below) for seed patterns to balance the sensitivity and running time.
-  --readFormat {fasta | fastq | csfasta | csfastq}    Read in reads in the specified format, instead of guessing according to the extension name.
-  --delimiter CHAR  Which is a character used as the delimiter to separate the the read id, and the additional info in the line with ">" in fasta or csfasta.
-
-Paired reads options::
-
-  -e        Exclude ambiguous paired.
-  -L INT    Mate-paired separate lower bound.
-  -U INT    Mate-paired separate upper bound.
-  -1 PATH   The forward reads file path.
-  -2 PATH   The reversed reads file path.
-
-See the PerM `algorithm page`__ for information on algorithms and seeds.
-
- .. __: http://code.google.com/p/perm/wiki/Algorithms
-  </help>
-</tool>
--- a/tools/sr_mapping/bfast_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,344 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs BFAST on single-end or paired-end data.
-TODO: more documentation
-
-TODO:
-    - auto-detect gzip or bz2
-    - split options (?)
-    - queue lengths (?)
-    - assumes reference always has been indexed
-    - main and secondary indexes
-    - scoring matrix file ?
-    - read group file ?
-
-usage: bfast_wrapper.py [options]
-    -r, --ref=r: The reference genome to use or index
-    -f, --fastq=f: The fastq file to use for the mapping
-    -F, --output=u: The file to save the output (SAM format)
-    -s, --fileSource=s: Whether to use a previously indexed reference sequence or one from history (indexed or history)
-    -p, --params=p: Parameter setting to use (pre_set or full)
-    -n, --numThreads=n: The number of threads to use
-    -A, --space=A: The encoding space (0: base 1: color)
-    -o, --offsets=o: The offsets for 'match'
-    -l, --loadAllIndexes=l: Load all indexes into memory
-    -k, --keySize=k: truncate key size in 'match'
-    -K, --maxKeyMatches=K: the maximum number of matches to allow before a key is ignored
-    -M, --maxNumMatches=M: the maximum number of matches to allow before the read is discarded
-    -w, --whichStrand=w: the strands to consider (0: both 1: forward 2: reverse)
-    -t, --timing=t: output timing information to stderr
-    -u, --ungapped=u: performed ungapped local alignment
-    -U, --unconstrained=U: performed local alignment without mask constraints
-    -O, --offset=O: the number of bases before and after each hit to consider in local alignment
-    -q, --avgMismatchQuality=q: average mismatch quality
-    -a, --algorithm=a: post processing algorithm (0: no filtering, 1: all passing filters, 2: unique, 3: best scoring unique, 4: best score all)
-    -P, --disallowPairing=P: do not choose alignments based on pairing
-    -R, --reverse=R: paired end reads are given on reverse strands
-    -z, --random=z: output a random best scoring alignment
-    -D, --dbkey=D: Dbkey for reference genome
-    -H, --suppressHeader=H: Suppress the sam header
-"""
-
-import optparse, os, shutil, subprocess, sys, tempfile
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to index and use' )
-    parser.add_option( '-f', '--fastq', dest='fastq', help='The fastq file to use for the mapping' )
-    parser.add_option( '-F', '--output', dest='output', help='The file to save the output (SAM format)' )
-    parser.add_option( '-A', '--space', dest='space', type="choice", default='0', choices=('0','1' ), help='The encoding space (0: base 1: color)' )
-    parser.add_option( '-H', '--suppressHeader', action="store_true", dest='suppressHeader', default=False, help='Suppress header' )
-    parser.add_option( '-n', '--numThreads', dest='numThreads', type="int", default="1", help='The number of threads to use' )
-    parser.add_option( '-t', '--timing', action="store_true", default=False, dest='timing', help='output timming information to stderr' )
-    parser.add_option( '-l', '--loadAllIndexes', action="store_true", default=False, dest='loadAllIndexes', help='Load all indexes into memory' )
-    parser.add_option( '-m', '--indexMask', dest='indexMask', help='String containing info on how to build custom indexes' )
-    parser.add_option( "-b", "--buildIndex", action="store_true", dest="buildIndex", default=False, help='String containing info on how to build custom indexes' )
-    parser.add_option( "--indexRepeatMasker", action="store_true", dest="indexRepeatMasker", default=False, help='Do not index lower case sequences. Such as those created by RepeatMasker' )
-    parser.add_option( '--indexContigOptions', dest='indexContigOptions', default="", help='The contig range options to use for the indexing' )
-    parser.add_option( '--indexExonsFileName', dest='indexExonsFileName', default="", help='The exons file to use for the indexing' )
-
-    parser.add_option( '-o', '--offsets', dest='offsets', default="", help='The offsets for \'match\'' )
-    parser.add_option( '-k', '--keySize', dest='keySize', type="int", default="-1", help='truncate key size in \'match\'' )
-    parser.add_option( '-K', '--maxKeyMatches', dest='maxKeyMatches', type="int", default="-1", help='the maximum number of matches to allow before a key is ignored' )
-    parser.add_option( '-M', '--maxNumMatches', dest='maxNumMatches', type="int", default="-1", help='the maximum number of matches to allow bfore the read is discarded' )
-    parser.add_option( '-w', '--whichStrand', dest='whichStrand', type="choice", default='0', choices=('0','1','2'), help='the strands to consider (0: both 1: forward 2: reverse)' )
-
-    parser.add_option( '--scoringMatrixFileName', dest='scoringMatrixFileName', help='Scoring Matrix file used to score the alignments' )
-    parser.add_option( '-u', '--ungapped', dest='ungapped', action="store_true", default=False, help='performed ungapped local alignment' )
-    parser.add_option( '-U', '--unconstrained', dest='unconstrained', action="store_true", default=False, help='performed local alignment without mask constraints' )
-    parser.add_option( '-O', '--offset', dest='offset', type="int", default="0", help='the number of bases before and after each hit to consider in local alignment' )
-    parser.add_option( '-q', '--avgMismatchQuality', type="int", default="-1", dest='avgMismatchQuality', help='average mismatch quality' )
-
-    parser.add_option( '-a', '--algorithm', dest='algorithm', default='0', type="choice", choices=('0','1','2','3','4' ), help='post processing algorithm (0: no filtering, 1: all passing filters, 2: unique, 3: best scoring unique, 4: best score all' )
-    parser.add_option( '--unpaired', dest='unpaired', action="store_true", default=False, help='do not choose alignments based on pairing' )
-    parser.add_option( '--reverseStrand', dest='reverseStrand', action="store_true", default=False, help='paired end reads are given on reverse strands' )
-    parser.add_option( '--pairedEndInfer', dest='pairedEndInfer', action="store_true", default=False, help='break ties when one end of a paired end read by estimating the insert size distribution' )
-    parser.add_option( '--randomBest', dest='randomBest', action="store_true", default=False, help='output a random best scoring alignment' )
-
-    (options, args) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='bfast 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( '%s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine BFAST version\n' )
-
-    buffsize = 1048576
-
-    # make temp directory for bfast, requires trailing slash
-    tmp_dir = '%s/' % tempfile.mkdtemp()
-
-    #'generic' options used in all bfast commands here
-    if options.timing:
-        all_cmd_options = "-t"
-    else:
-        all_cmd_options = ""
-
-    try:
-        if options.buildIndex:
-            reference_filepath = tempfile.NamedTemporaryFile( dir=tmp_dir, suffix='.fa' ).name
-            #build bfast indexes
-            os.symlink( options.ref, reference_filepath )
-
-            #bfast fast2brg
-            try:
-                nuc_space = [ "0" ]
-                if options.space == "1":
-                    #color space localalign appears to require nuc space brg
-                    nuc_space.append( "1" )
-                for space in nuc_space:
-                    cmd = 'bfast fasta2brg -f "%s" -A "%s" %s' % ( reference_filepath, space, all_cmd_options )
-                    tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                    tmp_stderr = open( tmp, 'wb' )
-                    proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                    returncode = proc.wait()
-                    tmp_stderr.close()
-                    # get stderr, allowing for case where it's very large
-                    tmp_stderr = open( tmp, 'rb' )
-                    stderr = ''
-                    try:
-                        while True:
-                            stderr += tmp_stderr.read( buffsize )
-                            if not stderr or len( stderr ) % buffsize != 0:
-                                break
-                    except OverflowError:
-                        pass
-                    tmp_stderr.close()
-                    if returncode != 0:
-                        raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error in \'bfast fasta2brg\'.\n' + str( e )
-
-            #bfast index
-            try:
-                all_index_cmds = 'bfast index %s -f "%s" -A "%s" -n "%s"' % ( all_cmd_options, reference_filepath, options.space, options.numThreads )
-
-                if options.indexRepeatMasker:
-                    all_index_cmds += " -R"
-
-                if options.indexContigOptions:
-                    index_contig_options = map( int, options.indexContigOptions.split( ',' ) )
-                    if index_contig_options[0] >= 0:
-                        all_index_cmds += ' -s "%s"' % index_contig_options[0]
-                    if index_contig_options[1] >= 0:
-                        all_index_cmds += ' -S "%s"' % index_contig_options[1]
-                    if index_contig_options[2] >= 0:
-                        all_index_cmds += ' -e "%s"' % index_contig_options[2]
-                    if index_contig_options[3] >= 0:
-                        all_index_cmds += ' -E "%s"' % index_contig_options[3]
-                elif options.indexExonsFileName:
-                    all_index_cmds += ' -x "%s"' % options.indexExonsFileName
-
-                index_count = 1
-                for mask, hash_width in [ mask.split( ':' ) for mask in options.indexMask.split( ',' ) ]:
-                    cmd = '%s -m "%s" -w "%s" -i "%i"' % ( all_index_cmds, mask, hash_width, index_count )
-                    tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                    tmp_stderr = open( tmp, 'wb' )
-                    proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                    returncode = proc.wait()
-                    tmp_stderr.close()
-                    # get stderr, allowing for case where it's very large
-                    tmp_stderr = open( tmp, 'rb' )
-                    stderr = ''
-                    try:
-                        while True:
-                            stderr += tmp_stderr.read( buffsize )
-                            if not stderr or len( stderr ) % buffsize != 0:
-                                break
-                    except OverflowError:
-                        pass
-                    tmp_stderr.close()
-                    if returncode != 0:
-                        raise Exception, stderr
-                    index_count += 1
-            except Exception, e:
-                raise Exception, 'Error in \'bfast index\'.\n' + str( e )
-
-        else:
-            reference_filepath = options.ref
-        assert reference_filepath and os.path.exists( reference_filepath ), 'A valid genome reference was not provided.'
-
-        # set up aligning and generate aligning command options
-        # set up temp output files
-        tmp_bmf = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        tmp_bmf_name = tmp_bmf.name
-        tmp_bmf.close()
-        tmp_baf = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        tmp_baf_name = tmp_baf.name
-        tmp_baf.close()
-
-        bfast_match_cmd = 'bfast match -f "%s" -r "%s" -n "%s" -A "%s" -T "%s" -w "%s" %s' % ( reference_filepath, options.fastq, options.numThreads, options.space, tmp_dir, options.whichStrand, all_cmd_options )
-        bfast_localalign_cmd = 'bfast localalign -f "%s" -m "%s" -n "%s" -A "%s" -o "%s" %s' % ( reference_filepath, tmp_bmf_name, options.numThreads, options.space, options.offset, all_cmd_options )
-        bfast_postprocess_cmd = 'bfast postprocess -O 1 -f "%s" -i "%s" -n "%s" -A "%s" -a "%s" %s' % ( reference_filepath, tmp_baf_name, options.numThreads, options.space, options.algorithm, all_cmd_options )
-
-        if options.offsets:
-            bfast_match_cmd += ' -o "%s"' % options.offsets
-        if options.keySize >= 0:
-            bfast_match_cmd += ' -k "%s"' % options.keySize
-        if options.maxKeyMatches >= 0:
-            bfast_match_cmd += ' -K "%s"' % options.maxKeyMatches
-        if options.maxNumMatches >= 0:
-            bfast_match_cmd += ' -M "%s"' % options.maxNumMatches
-            bfast_localalign_cmd += ' -M "%s"' % options.maxNumMatches
-        if options.scoringMatrixFileName:
-            bfast_localalign_cmd += ' -x "%s"' % options.scoringMatrixFileName
-            bfast_postprocess_cmd += ' -x "%s"' % options.scoringMatrixFileName
-        if options.ungapped:
-            bfast_localalign_cmd += ' -u'
-        if options.unconstrained:
-            bfast_localalign_cmd += ' -U'
-        if options.avgMismatchQuality >= 0:
-            bfast_localalign_cmd += ' -q "%s"' % options.avgMismatchQuality
-            bfast_postprocess_cmd += ' -q "%s"' % options.avgMismatchQuality
-        if options.algorithm == 3:
-            if options.pairedEndInfer:
-                bfast_postprocess_cmd += ' -P'
-            if options.randomBest:
-                bfast_postprocess_cmd += ' -z'
-        if options.unpaired:
-            bfast_postprocess_cmd += ' -U'
-        if options.reverseStrand:
-            bfast_postprocess_cmd += ' -R'
-
-        #instead of using temp files, should we stream through pipes?
-        bfast_match_cmd += " > %s" % tmp_bmf_name
-        bfast_localalign_cmd += " > %s" % tmp_baf_name
-        bfast_postprocess_cmd += " > %s" % options.output
-
-        # need to nest try-except in try-finally to handle 2.4
-        try:
-            # bfast 'match'
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=bfast_match_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error in \'bfast match\'. \n' + str( e )
-            # bfast 'localalign'
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=bfast_localalign_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error in \'bfast localalign\'. \n' + str( e )
-            # bfast 'postprocess'
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=bfast_postprocess_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error in \'bfast postprocess\'. \n' + str( e )
-            # remove header if necessary
-            if options.suppressHeader:
-                tmp_out = tempfile.NamedTemporaryFile( dir=tmp_dir)
-                tmp_out_name = tmp_out.name
-                tmp_out.close()
-                try:
-                    shutil.move( options.output, tmp_out_name )
-                except Exception, e:
-                    raise Exception, 'Error moving output file before removing headers. \n' + str( e )
-                fout = file( options.output, 'w' )
-                for line in file( tmp_out.name, 'r' ):
-                    if len( line ) < 3 or line[0:3] not in [ '@HD', '@SQ', '@RG', '@PG', '@CO' ]:
-                        fout.write( line )
-                fout.close()
-            # check that there are results in the output file
-            if os.path.getsize( options.output ) > 0:
-                if "0" == options.space:
-                    sys.stdout.write( 'BFAST run on Base Space data' )
-                else:
-                    sys.stdout.write( 'BFAST run on Color Space data' )
-            else:
-                raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
-        except Exception, e:
-            stop_err( 'The alignment failed.\n' + str( e ) )
-    finally:
-        # clean up temp dir
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-
-if __name__=="__main__": __main__()
--- a/tools/sr_mapping/bfast_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,379 +0,0 @@
-<tool id="bfast_wrapper" name="Map with BFAST" version="0.1.3">
-  <description></description>
-  <command interpreter="python">bfast_wrapper.py
-    --numThreads="4" ##HACK: hardcode numThreads for now, should come from a location file
-    --fastq="$input1"
-    #if $input1.extension.startswith( "fastqcs" ):
-        ##if extention starts with fastqcs, then we have a color space file
-        --space="1" ##color space
-    #else
-        --space="0"
-    #end if
-    --output="$output"
-    $suppressHeader
-
-    #if $refGenomeSource.refGenomeSource_type == "history":
-      ##build indexes on the fly
-      --buildIndex
-      --ref="${refGenomeSource.ownFile}"
-      --indexMask="${",".join( [ "%s:%s" % ( str( custom_index.get( 'mask' ) ).strip(), str( custom_index.get( 'hash_width' ) ).strip() ) for custom_index in $refGenomeSource.custom_index ] )}"
-      ${refGenomeSource.indexing_repeatmasker}
-      #if $refGenomeSource.indexing_option.indexing_option_selector == "contig_offset":
-        --indexContigOptions="${refGenomeSource.indexing_option.start_contig},${refGenomeSource.indexing_option.start_pos},${refGenomeSource.indexing_option.end_contig},${refGenomeSource.indexing_option.end_pos}"
-      #elif $refGenomeSource.indexing_option.indexing_option_selector == "exons_file":
-        --indexExonsFileName="${refGenomeSource.indexing_option.exons_file}"
-      #end if
-    #else:
-      ##use precomputed indexes
-      --ref="${ refGenomeSource.indices.fields.path }"
-    #end if
-
-    #if $params.source_select == "full":
-      --offsets="$params.offsets"
-      --keySize="$params.keySize"
-      --maxKeyMatches="$params.maxKeyMatches"
-      --maxNumMatches="$params.maxNumMatches"
-      --whichStrand="$params.whichStrand"
-
-      #if str( $params.scoringMatrixFileName ) != 'None':
-        --scoringMatrixFileName="$params.scoringMatrixFileName"
-      #end if
-      ${params.ungapped}
-      ${params.unconstrained}
-      --offset="${params.offset}"
-      --avgMismatchQuality="${params.avgMismatchQuality}"
-
-      --algorithm="${params.localalign_params.algorithm}"
-      ${params.unpaired}
-      ${params.reverseStrand}
-      #if $params.localalign_params.algorithm == "3":
-        ${params.localalign_params.pairedEndInfer}
-        ${params.localalign_params.randomBest}
-      #end if
-    #end if
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="fastqsanger,fastqcssanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
-    <conditional name="refGenomeSource">
-      <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in index?">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="indices" type="select" label="Select a reference genome index set">
-          <options from_data_table="bfast_indexes">
-            <filter type="multiple_splitter" column="2" separator=","/>
-            <filter type="param_value" column="2" ref="input1" ref_attribute="extension"/>
-            <filter type="sort_by" column="3"/>
-            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
-        <repeat name="custom_index" title="Custom indice" min="1" >
-            <param name="mask" type="text" value="" label="Specify the mask" size="20">
-              <!-- <validator type="no_options" message="No indexes are available for the selected input dataset"/> need is int validator here or regex all 01s-->
-            </param>
-            <param name="hash_width" type="integer" value="" label="Hash Width" />
-        </repeat>
-        <param name="indexing_repeatmasker" type="boolean" truevalue="--indexRepeatMasker" falsevalue="" checked="False" label="Do not index lower case sequences" help="Such as those created by RepeatMasker"/>
-        <conditional name="indexing_option">
-          <param name="indexing_option_selector" type="select" label="BFAST indexing settings to use" help="For most indexing needs use default settings. If you want full control use the other options.">
-            <option value="default">Default</option>
-            <option value="contig_offset">Contig Offset</option>
-            <option value="exons_file">Exons file</option>
-          </param>
-          <when value="default">
-            <!-- nothing here -->
-          </when>
-          <when value="contig_offset">
-            <param name="start_contig" type="integer" value="-1" label="Start Contig" help="Specifies the first contig to include when building indexes. (advanced users only)" />
-            <param name="start_pos" type="integer" value="-1" label="Start Position" help="Specifies the first position in the first contig to include when building indexes. (advanced users only)" />
-            <param name="end_contig" type="integer" value="-1" label="End Contig" help="Specifies the last contig to include when building indexes. (advanced users only)" />
-            <param name="end_pos" type="integer" value="-1" label="End Position" help="Specifies the last position in the last contig to include when building indexes. (advanced users only)" />
-          </when>
-          <when value="exons_file">
-            <param name="exons_file" type="data" format="tabular" label="Select an exons file from history" help="See BFAST manual for file format requirements. (advanced users only)"/>
-          </when>
-        </conditional>
-      </when>
-    </conditional>
-    <conditional name="params">
-      <param name="source_select" type="select" label="BFAST matching settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
-        <option value="pre_set">Commonly Used</option>
-        <option value="full">Full Parameter List</option>
-      </param>
-      <when value="pre_set">
-        <!-- nothing here -->
-      </when>
-      <when value="full">
-        <param name="offsets" type="text" value="" label="The offsets for 'bfast match'" help="Set if not all offsets from the 5' end of the read are to be examined (advanced users only)" />
-        <param name="keySize" type="integer" value="-1" label="Truncate key size in 'match'" help="Set this to reduce the effective key size of all indexes in 'bfast match' (advanced users only)" />
-        <param name="maxKeyMatches" type="integer" value="8" label="The maximum number of matches to allow before a key is ignored" help="Lower values will result in more unique regions being examined, while larger values will allow include repetitive regions" />
-        <param name="maxNumMatches" type="integer" value="384" label="The maximum number of matches to allow before a read is discarded" help="Larger values will allow more hits to be examined" />
-        <param name="whichStrand" type="select" label="The strands to consider" help="Both strands, forward strand only, or reverse strand only">
-          <option value="0">Both strands</option>
-          <option value="1">Forward strand only</option>
-          <option value="2">Reverse strand only</option>
-        </param>
-
-        <param name="scoringMatrixFileName" type="data" format="text" optional="True" label="Scoring Matrix file used to score the alignments" help="See BFAST manual for file format requirements. (advanced users only)"/>
-        <param name="ungapped" type="boolean" truevalue="--ungapped" falsevalue="" checked="no" label="Perform ungapped local alignment" help="Performing ungapped local alignment will not consider indels while providing a significant speed increase" />
-        <param name="unconstrained" type="boolean" truevalue="--unconstrained" falsevalue="" checked="no" label="Perform unconstrained local alignment" help="Performing unconstrained local alignment will not use mask constraints at the cost of speed" />
-        <param name="offset" type="integer" value="20" label="The number of bases before and after each hit to consider in local alignment" help="Larger values will allow for larger insertions and deletions to be detected at the cost of speed" />
-        <param name="avgMismatchQuality" type="integer" value="10" label="The average mismatch quality" help="This can be used as a scaling factor for mapping quality (advanced users only)" />
-
-        <conditional name="localalign_params">
-          <param name="algorithm" type="select" label="The post processing algorithm" help="This determines how reads with multiple candidate alignments are returned.  Unique alignments will return an alignment if the read has only one candidate alignment.  Uniquely best scoring alignments will return one alignment for a read if that alignment has a better alignment score than the rest of the candidate alignments.  All best scoring alignments will return all alignments that have the best alignment score for a read.">
-              <option value="0" selected="True">No filtering</option>
-              <option value="1">All alignments that pass filtering</option>
-              <option value="2">Unique alignments</option>
-              <option value="3">Uniquely best scoring alignments</option>
-              <option value="4">All best scoring alignments</option>
-          </param>
-          <when value="0">
-            <!-- nothing here -->
-          </when>
-          <when value="1">
-            <!-- nothing here -->
-          </when>
-          <when value="2">
-            <!-- nothing here -->
-          </when>
-          <when value="4">
-            <!-- nothing here -->
-          </when>
-          <when value="3">
-            <param name="pairedEndInfer" type="boolean" truevalue="--pairedEndInfer" falsevalue="" checked="no" label="pairedEndInfer" help="break ties when one end of a paired end read by estimating the insert size distribution" />
-            <param name="randomBest" type="boolean" truevalue="--randomBest" falsevalue="" checked="no" label="Random alignments" help="output a random best scoring alignment (advanced users only)" />
-          </when>
-        </conditional>
-        <param name="unpaired" type="boolean" truevalue="--unpaired" falsevalue="" checked="no" label="Disallow pairing" help="do not choose alignments based on pairing" />
-        <param name="reverseStrand" type="boolean" truevalue="--reverseStrand" falsevalue="" checked="no" label="Reverse paired ends" help="paired end reads are given on reverse strands" />
-
-      </when>
-    </conditional>
-    <param name="suppressHeader" type="boolean" truevalue="--suppressHeader" falsevalue="" checked="False" label="Suppress the header in the output SAM file" help="BFAST produces SAM with several lines of header information" />
-  </inputs>
-  <outputs>
-    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
-      <actions>
-        <conditional name="refGenomeSource.refGenomeSource_type">
-          <when value="indexed">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" column="1" name="bfast_indexes">
-                <filter type="param_value" ref="refGenomeSource.indices" column="0" />
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <help>
-**What it does**
-
-BFAST facilitates the fast and accurate mapping of short reads to reference sequences. Some advantages of BFAST include:
-* Speed: enables billions of short reads to be mapped quickly.
-* Accuracy: A priori probabilities for mapping reads with defined set of variants
-* An easy way to measurably tune accuracy at the expense of speed.
-Specifically, BFAST was designed to facilitate whole-genome resequencing, where mapping billions of short reads with variants is of utmost importance.
-
-BFAST supports both Illumina and ABI SOLiD data, as well as any other Next-Generation Sequencing Technology (454, Helicos), with particular emphasis on sensitivity towards errors, SNPs and especially indels. Other algorithms take short-cuts by ignoring errors, certain types of variants (indels), and even require further alignment, all to be the "fastest" (but still not complete). BFAST is able to be tuned to find variants regardless of the error-rate, polymorphism rate, or other factors.
-
-------
-
-Please cite the website "http://bfast.sourceforge.net" as well as the accompanying
-papers:
-
-Homer N, Merriman B, Nelson SF.
-BFAST: An alignment tool for large scale genome resequencing.
-PMID: 19907642
-PLoS ONE. 2009 4(11): e7767.
-http://dx.doi.org/10.1371/journal.pone.0007767
-
-Homer N, Merriman B, Nelson SF.
-Local alignment of two-base encoded DNA sequence.
-BMC Bioinformatics. 2009 Jun 9;10(1):175.
-PMID: 19508732
-http://dx.doi.org/10.1186/1471-2105-10-175
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
-.. __: http://bfast.sourceforge.net/
-
-------
-
-**Input formats**
-
-BFAST accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
-
-------
-
-**Outputs**
-
-The output is in SAM format, and has the following columns::
-
-    Column  Description
-  --------  --------------------------------------------------------
-  1  QNAME  Query (pair) NAME
-  2  FLAG   bitwise FLAG
-  3  RNAME  Reference sequence NAME
-  4  POS    1-based leftmost POSition/coordinate of clipped sequence
-  5  MAPQ   MAPping Quality (Phred-scaled)
-  6  CIGAR  extended CIGAR string
-  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-  8  MPOS   1-based Mate POSition
-  9  ISIZE  Inferred insert SIZE
-  10 SEQ    query SEQuence on the same strand as the reference
-  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
-
-The flags are as follows::
-
-  Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-It looks like this (scroll sideways to see the entire example)::
-
-  QNAME  FLAG  RNAME  POS  MAPQ  CIAGR  MRNM  MPOS  ISIZE  SEQ  QUAL  OPT
-  HWI-EAS91_1_30788AAXX:1:1:1761:343  4  *  0  0  *  *  0  0  AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG  hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
-  HWI-EAS91_1_30788AAXX:1:1:1578:331  4  *  0  0  *  *  0  0  GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG  hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
-
--------
-
-**BFAST settings**
-
-All of the options have a default value. You can change any of them. Most of the options in BFAST have been implemented here.
-
-------
-
-**BFAST parameter list**
-
-This is an exhaustive list of BFAST options:
-
-For **match**::
-
-  -o  STRING   Specifies the offset [Use all]
-  -l      Specifies to load all main or secondary indexes into memory
-  -A  INT    0: NT space 1: Color space [0]
-  -k  INT    Specifies to truncate all indexes to have the given key size
-  (must be greater than the hash width) [Not Using]
-  -K  INT    Specifies the maximum number of matches to allow before a key
-  is ignored [8]
-  -M  INT    Specifies the maximum total number of matches to consider
-  before the read is discarded [384]
-  -w  INT    0: consider both strands 1: forward strand only 2: reverse
-  strand only [0]
-  -n  INT   Specifies the number of threads to use [1]
-  -t         Specifies to output timing information
-
-For **localalign**::
-
-  -x  FILE  Specifies the file name storing the scoring matrix
-  -u        Do ungapped local alignment (the default is gapped).
-  -U         Do not use mask constraints from the match step
-  -A  INT    0: NT space 1: Color space [0]
-  -o  INT    Specifies the number of bases before and after the match to
-  include in the reference genome
-  -M  INT    Specifies the maximum total number of matches to consider
-  before the read is discarded [384]
-  -q  INT    Specifies the average mismatch quality
-  -n  INT   Specifies the number of threads to use [1]
-  -t         Specifies to output timing information
-
-For **postprocess**::
-
-  -a  INT    Specifies the algorithm to choose the alignment for each end of the read:
-
-    0: No filtering will occur.
-    1: All alignments that pass the filters will be output
-    2: Only consider reads that have been aligned uniquely
-    3: Choose uniquely the alignment with the best score
-    4: Choose all alignments with the best score
-
-  -A  INT    0: NT space 1: Color space [0]
-  -U      Specifies that pairing should not be performed
-  -R          Specifies that paired reads are on opposite strands
-  -q   INT    Specifies the average mismatch quality
-  -x  FILE  Specifies the file name storing the scoring matrix
-  -z          Specifies to output a random best scoring alignment (with -a 3)
-  -r   FILE  Specifies to add the RG in the specified file to the SAM
-  header and updates the RG tag (and LB/PU tags if present) in
-  the reads (SAM only)
-  -n  INT   Specifies the number of threads to use [1]
-  -t         Specifies to output timing information
-
-  </help>
-  <requirements>
-    <requirement type="package">bfast</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger" />
-      <param name="refGenomeSource_type" value="history" />
-      <param name="ownFile" ftype="fasta" value="phiX.fasta" />
-      <param name="mask" value="111111111111111111" />
-      <param name="hash_width" value="14" />
-      <param name="source_select" value="pre_set" />
-      <param name="indexing_repeatmasker" value="False" />
-      <param name="indexing_option_selector" value="default" />
-      <param name="suppressHeader" value="" />
-      <output name="output" ftype="sam" file="bfast_out1.sam" />
-    </test>
-    <test>
-      <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger"/>
-      <param name="refGenomeSource_type" value="history" />
-      <param name="ownFile" ftype="fasta" value="phiX.fasta" />
-      <param name="mask" value="111111111111111111" />
-      <param name="hash_width" value="14" />
-      <param name="source_select" value="pre_set" />
-      <param name="indexing_repeatmasker" value="False" />
-      <param name="indexing_option_selector" value="default" />
-      <param name="suppressHeader" value="--suppressHeader" />
-      <output name="output" ftype="sam" file="bfast_out1.sam" lines_diff="3" /><!--  3 headers exist in compare file, but headers are suppressed -->
-    </test>
-    <test>
-      <param name="input1" ftype="fastqcssanger" value="random_phiX_1.fastqcssanger" />
-      <param name="refGenomeSource_type" value="history" />
-      <param name="ownFile" ftype="fasta" value="phiX.fasta" />
-      <param name="mask" value="111111111111111111" />
-      <param name="hash_width" value="14" />
-      <param name="source_select" value="pre_set" />
-      <param name="indexing_repeatmasker" value="False" />
-      <param name="indexing_option_selector" value="default" />
-      <param name="suppressHeader" value="" />
-      <output name="output" ftype="sam" file="bfast_out2.sam" />
-    </test>
-    <!-- test of pre-indexed data now -->
-    <test>
-      <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger" />
-      <param name="refGenomeSource_type" value="indexed" />
-      <param name="indices" value="phiX_nt_50" />
-      <param name="source_select" value="pre_set" />
-      <param name="suppressHeader" value="" />
-      <output name="output" ftype="sam" file="bfast_out3.sam" lines_diff="2" /><!-- MD:Z:11T38 instead of MD:Z:50 on one line-->
-    </test>
-  </tests>
-</tool>
--- a/tools/sr_mapping/bowtie_color_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,714 +0,0 @@
-<tool id="bowtie_color_wrapper" name="Map with Bowtie for SOLiD" version="1.1.2">
-  <requirements><requirement type='package'>bowtie</requirement></requirements>
-  <description></description>
-  <command interpreter="python">
-    bowtie_wrapper.py
-    ## Hackish setting of number of threads
-    --threads="4"
-    ## Outputs
-      --output=$output
-      #if str( $singlePaired.sPaired ) == "single"
-        #if $output_unmapped_reads_l
-          --output_unmapped_reads=$output_unmapped_reads_l
-        #end if
-        #if $output_suppressed_reads_l
-          --output_suppressed_reads=$output_suppressed_reads_l
-        #end if
-      #else
-        #if $output_unmapped_reads_l and $output_unmapped_reads_r
-          --output_unmapped_reads_l=$output_unmapped_reads_l
-          --output_unmapped_reads_r=$output_unmapped_reads_r
-        #end if
-        #if $output_suppressed_reads_l and $output_suppressed_reads_l
-          --output_suppressed_reads_l=$output_suppressed_reads_l
-          --output_suppressed_reads_r=$output_suppressed_reads_r
-        #end if
-      #end if
-    ## Inputs
-    --dataType="solid"
-    --suppressHeader=$suppressHeader
-    --genomeSource=$refGenomeSource.genomeSource
-    #if $refGenomeSource.genomeSource == "history":
-      ##index already exists
-      #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ):
-        ##user previously built
-        --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}"
-        --do_not_build_index
-      #else:
-        ##build index on the fly
-        --ref=$refGenomeSource.ownFile
-        --indexSettings=$refGenomeSource.indexParams.indexSettings
-        #if $refGenomeSource.indexParams.indexSettings == "indexFull":
-          --iautoB=$refGenomeSource.indexParams.autoBehavior.autoB
-          #if $refGenomeSource.indexParams.autoBehavior.autoB == "set":
-            --ipacked=$refGenomeSource.indexParams.autoBehavior.packed
-            --ibmax=$refGenomeSource.indexParams.autoBehavior.bmax
-            --ibmaxdivn=$refGenomeSource.indexParams.autoBehavior.bmaxdivn
-            --idcv=$refGenomeSource.indexParams.autoBehavior.dcv
-          #end if
-          --inodc=$refGenomeSource.indexParams.nodc
-          --inoref=$refGenomeSource.indexParams.noref
-          --ioffrate=$refGenomeSource.indexParams.offrate
-          --iftab=$refGenomeSource.indexParams.ftab
-          --intoa=$refGenomeSource.indexParams.ntoa
-          --iendian=$refGenomeSource.indexParams.endian
-          --iseed=$refGenomeSource.indexParams.seed
-          --icutoff=$refGenomeSource.indexParams.cutoff
-        #end if
-      #end if
-    #else
-      ##use pre-built index
-      --ref="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ 'bowtie_indexes_color' ].get_fields() )[0][-1] }"
-    #end if
-    --paired=$singlePaired.sPaired
-    #if $singlePaired.sPaired == "single":
-      --input1=$singlePaired.sInput1
-      --params=$singlePaired.sParams.sSettingsType
-      #if $singlePaired.sParams.sSettingsType == "full":
-        --skip=$singlePaired.sParams.sSkip
-        --alignLimit=$singlePaired.sParams.sAlignLimit
-        --trimH=$singlePaired.sParams.sTrimH
-        --trimL=$singlePaired.sParams.sTrimL
-        --mismatchSeed=$singlePaired.sParams.sMismatchSeed
-        --mismatchQual=$singlePaired.sParams.sMismatchQual
-        --seedLen=$singlePaired.sParams.sSeedLen
-        --rounding=$singlePaired.sParams.sRounding
-        --maqSoapAlign=$singlePaired.sParams.sMaqSoapAlign
-        --tryHard=$singlePaired.sParams.sTryHard
-        --valAlign=$singlePaired.sParams.sValAlign
-        --allValAligns=$singlePaired.sParams.sAllValAligns
-        --suppressAlign=$singlePaired.sParams.sSuppressAlign
-        --best=$singlePaired.sParams.sBestOption.sBest
-        #if $singlePaired.sParams.sBestOption.sBest == "doBest":
-          --maxBacktracks=$singlePaired.sParams.sBestOption.sdMaxBacktracks
-          --strata=$singlePaired.sParams.sBestOption.sdStrata
-        #else:
-          --maxBacktracks=$singlePaired.sParams.sBestOption.snMaxBacktracks
-        #end if
-        --offrate=$singlePaired.sParams.sOffrate
-        --seed=$singlePaired.sParams.sSeed
-        --snpphred=$singlePaired.sParams.sSnpphred
-        --snpfrac=$singlePaired.sParams.sSnpfrac
-        --keepends=$singlePaired.sParams.sKeepends
-      #end if
-    #else:
-      --input1=$singlePaired.pInput1
-      --input2=$singlePaired.pInput2
-      --maxInsert=$singlePaired.pMaxInsert
-      --mateOrient=$singlePaired.pMateOrient
-      --params=$singlePaired.pParams.pSettingsType
-      #if $singlePaired.pParams.pSettingsType == "full":
-        --skip=$singlePaired.pParams.pSkip
-        --alignLimit=$singlePaired.pParams.pAlignLimit
-        --trimH=$singlePaired.pParams.pTrimH
-        --trimL=$singlePaired.pParams.pTrimL
-        --mismatchSeed=$singlePaired.pParams.pMismatchSeed
-        --mismatchQual=$singlePaired.pParams.pMismatchQual
-        --seedLen=$singlePaired.pParams.pSeedLen
-        --rounding=$singlePaired.pParams.pRounding
-        --maqSoapAlign=$singlePaired.pParams.pMaqSoapAlign
-        --minInsert=$singlePaired.pParams.pMinInsert
-        --maxAlignAttempt=$singlePaired.pParams.pMaxAlignAttempt
-        --forwardAlign=$singlePaired.pParams.pForwardAlign
-        --reverseAlign=$singlePaired.pParams.pReverseAlign
-        --tryHard=$singlePaired.pParams.pTryHard
-        --valAlign=$singlePaired.pParams.pValAlign
-        --allValAligns=$singlePaired.pParams.pAllValAligns
-        --suppressAlign=$singlePaired.pParams.pSuppressAlign
-        --best=$singlePaired.pParams.pBestOption.pBest
-        #if $singlePaired.pParams.pBestOption.pBest == "doBest":
-          --maxBacktracks=$singlePaired.pParams.pBestOption.pdMaxBacktracks
-          --strata=$singlePaired.pParams.pBestOption.pdStrata
-        #else:
-          --maxBacktracks=$singlePaired.pParams.pBestOption.pnMaxBacktracks
-        #end if
-        --offrate=$singlePaired.pParams.pOffrate
-        --seed=$singlePaired.pParams.pSeed
-        --snpphred=$singlePaired.pParams.pSnpphred
-        --snpfrac=$singlePaired.pParams.pSnpfrac
-        --keepends=$singlePaired.pParams.pKeepends
-      #end if
-    #end if
-  </command>
-  <inputs>
-    <conditional name="refGenomeSource">
-      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="index" type="select" label="Select the reference genome" help="if your genome of interest is not listed - contact Galaxy team">
-          <options from_data_table="bowtie_indexes_color">
-            <filter type="sort_by" column="2" />
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="bowtie_color_index,fasta" metadata_name="dbkey" label="Select the reference genome" />
-        <conditional name="indexParams">
-          <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own" help="These settings are ignored when using a prebuilt index">
-            <option value="indexPreSet">Default</option>
-            <option value="indexFull">Set your own</option>
-          </param>
-          <when value="indexPreSet" />
-          <when value="indexFull">
-            <conditional name="autoBehavior">
-              <param name="autoB" type="select" label="Choose to use automatic or specified behavior for some parameters (-a)" help="Allows you to set --packed, --bmax, --bmaxdivn, and --dcv">
-                <option value="auto">Automatic behavior</option>
-                <option value="set">Set values (sets --noauto and allows others to be set)</option>
-              </param>
-              <when value="auto" />
-              <when value="set">
-                <param name="packed" type="select" label="Whether or not to use a packed representation for DNA strings (--packed)">
-                  <option value="unpacked">Use regular representation</option>
-                  <option value="packed">Use packed representation</option>
-                </param>
-                <param name="bmax" type="integer" value="-1" label="Maximum number of suffixes allowed in a block (--bmax)" help="-1 for not specified. Must be at least 1" />
-                <param name="bmaxdivn" type="integer" value="4" label="Maximum number of suffixes allowed in a block as a fraction of the length of the reference (--bmaxdivn)" />
-                <param name="dcv" type="integer" value="1024" label="The period for the difference-cover sample (--dcv)" />
-              </when>
-            </conditional>
-            <param name="nodc" type="select" label="Whether or not to disable the use of the difference-cover sample (--nodc)" help="Suffix sorting becomes quadratic-time in the worst case (with a very repetitive reference)">
-              <option value="dc">Use difference-cover sample</option>
-              <option value="nodc">Disable difference-cover sample</option>
-            </param>
-            <param name="noref" type="select" label="Whether or not to build the part of the reference index used only in paired-end alignment (-r)">
-              <option value="ref">Build all index files</option>
-              <option value="noref">Do not build paired-end alignment index files</option>
-            </param>
-            <param name="offrate" type="integer" value="5" label="How many rows get marked during annotation of some or all of the Burrows-Wheeler rows (-o)" />
-            <param name="ftab" type="integer" value="10" label="The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query (-t)" help="ftab is 4^(n+1) bytes" />
-            <param name="ntoa" type="select" label="Whether or not to convert Ns in the reference sequence to As (--ntoa)">
-              <option value="no">Do not convert Ns</option>
-              <option value="yes">Convert Ns to As</option>
-            </param>
-            <param name="endian" type="select" label="Endianness to use when serializing integers to the index file (--big/--little)" help="Little is most appropriate for Intel- and AMD-based architecture">
-              <option value="little">Little</option>
-              <option value="big">Big</option>
-            </param>
-            <param name="seed" type="integer" value="-1" label="Seed for the pseudorandom number generator (--seed)" help="Use -1 to use default" />
-            <param name="cutoff" type="integer" value="-1" label="Number of first bases of the reference sequence to index (--cutoff)" help="Use -1 to use default" />
-          </when>  <!-- indexFull -->
-        </conditional>  <!-- indexParams -->
-      </when>  <!-- history -->
-    </conditional>  <!-- refGenomeSource -->
-    <conditional name="singlePaired">
-      <param name="sPaired" type="select" label="Is this library mate-paired?">
-        <option value="single">Single-end</option>
-        <option value="paired">Paired-end</option>
-      </param>
-      <when value="single">
-        <param name="sInput1" type="data" format="fastqcssanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
-        <conditional name="sParams">
-          <param name="sSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
-            <option value="preSet">Commonly used</option>
-            <option value="full">Full parameter list</option>
-          </param>
-          <when value="preSet" />
-          <when value="full">
-            <param name="sSkip" type="integer" value="0" label="Skip the first n reads (-s)" />
-            <param name="sAlignLimit" type="integer" value="-1" label="Only align the first n reads (-u)" help="-1 for off" />
-            <param name="sTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" />
-            <param name="sTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" />
-            <param name="sMismatchSeed" type="integer" value="2" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" />
-            <param name="sMismatchQual" type="integer" value="70" label="Maximum permitted total of quality values at mismatched read positions (-e)" />
-            <param name="sSeedLen" type="integer" value="28" label="Seed length (-l)" help="Minimum value is 5" />
-            <param name="sRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)">
-              <option value="round">Round to nearest 10</option>
-              <option value="noRound">Do not round to nearest 10</option>
-            </param>
-            <param name="sMaqSoapAlign" type="integer" value="-1" label="Number of mismatches for SOAP-like alignment policy (-v)" help="-1 for default MAQ-like alignment policy" />
-            <param name="sTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode">
-              <option value="noTryHard">Do not try hard</option>
-              <option value="doTryHard">Try hard</option>
-            </param>
-            <param name="sValAlign" type="integer" value="1" label="Report up to n valid alignments per read (-k)" />
-            <param name="sAllValAligns" type="select" label="Whether or not to report all valid alignments per read (-a)">
-              <option value="noAllValAligns">Do not report all valid alignments</option>
-              <option value="doAllValAligns">Report all valid alignments</option>
-            </param>
-            <param name="sSuppressAlign" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="-1 for no limit" />
-            <param name="sMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" />
-            <param name="sUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" />
-            <conditional name="sBestOption">
-              <param name="sBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option">
-                <option value="noBest">Do not use best</option>
-                <option value="doBest">Use best</option>
-              </param>
-              <when value="noBest">
-                <param name="snMaxBacktracks" type="integer" value="125" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-              </when>
-              <when value="doBest">
-                <param name="sdMaxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-                <param name="sdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)">
-                  <option value="noStrata">Do not use strata option</option>
-                  <option value="doStrata">Use strata option</option>
-                </param>
-              </when>
-            </conditional> <!-- sBestOption -->
-            <param name="sOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" />
-            <param name="sSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" />
-            <param name="sSnpphred" type="integer" value="-1" label="SNP penalty (ratio of SNPs per base in the subject genome) (--snpphred)" help="Enter this OR Ratio of SNPs per base" />
-            <param name="sSnpfrac" type="float" value="0.001" label="Ratio of SNPs per base (estimated ratio for colorspace alignments) (--snpfrac)" help="Enter this OR SNP penalty" />
-            <param name="sKeepends" type="select" label="Keep the extreme-ends nucleotides and qualities rather than trimming them (--col-keepends)">
-              <option value="doKeepends">Keep ends</option>
-              <option value="noKeepends">Trim ends</option>
-            </param>
-          </when> <!-- full -->
-        </conditional> <!-- sParams -->
-      </when> <!-- single -->
-      <when value="paired">
-        <param name="pInput1" type="data" format="fastqcssanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
-        <param name="pInput2" type="data" format="fastqcssanger" label="Reverse FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
-        <param name="pMaxInsert" type="integer" value="1000" label="Maximum insert size for valid paired-end alignments (-X)" />
-        <param name="pMateOrient" type="select" label="The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand (--fr/--rf/--ff)">
-          <option value="ff">FF (for SOLiD)</option>
-          <option value="fr">FR (for Illumina)</option>
-          <option value="rf">RF</option>
-        </param>
-        <conditional name="pParams">
-          <param name="pSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
-            <option value="preSet">Commonly used</option>
-            <option value="full">Full parameter list</option>
-          </param>
-          <when value="preSet" />
-          <when value="full">
-            <param name="pSkip" type="integer" value="0" label="Skip the first n pairs (-s)" />
-            <param name="pAlignLimit" type="integer" value="-1" label="Only align the first n pairs (-u)" help="-1 for off" />
-            <param name="pTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" />
-            <param name="pTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" />
-            <param name="pMismatchSeed" type="integer" value="2" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" />
-            <param name="pMismatchQual" type="integer" value="70" label="Maximum permitted total of quality values at mismatched read positions (-e)" />
-            <param name="pSeedLen" type="integer" value="28" label="Seed length (-l)" help="Minimum value is 5" />
-            <param name="pRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)">
-              <option value="round">Round to nearest 10</option>
-              <option value="noRound">Do not round to nearest 10</option>
-            </param>
-            <param name="pMaqSoapAlign" type="integer" value="-1" label="Number of mismatches for SOAP-like alignment policy (-v)" help="-1 for default MAQ-like alignment policy" />
-            <param name="pMinInsert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments (-I)" />
-            <param name="pMaxAlignAttempt" type="integer" value="100" label="Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate (--pairtries)" />
-            <param name="pForwardAlign" type="select" label="Choose whether or not to attempt to align the forward reference strand (--nofw)">
-              <option value="forward">Align against the forward reference strand</option>
-              <option value="noForward">Do not align against the forward reference strand</option>
-            </param>
-            <param name="pReverseAlign" type="select" label="Choose whether or not to align against the reverse-complement reference strand (--norc)">
-              <option value="reverse">Align against the reverse-complement reference strand</option>
-              <option value="noReverse">Do not align against the reverse-complement reference strand</option>
-            </param>
-            <param name="pTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode">
-              <option value="noTryHard">Do not try hard</option>
-              <option value="doTryHard">Try hard</option>
-            </param>
-            <param name="pValAlign" type="integer" value="1" label="Report up to n valid arguments per pair (-k)" />
-            <param name="pAllValAligns" type="select" label="Whether or not to report all valid alignments per pair (-a)">
-              <option value="noAllValAligns">Do not report all valid alignments</option>
-              <option value="doAllValAligns">Report all valid alignments</option>
-            </param>
-            <param name="pSuppressAlign" type="integer" value="-1" label="Suppress all alignments for a pair if more than n reportable alignments exist (-m)" help="-1 for no limit" />
-            <param name="pMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" />
-            <param name="pUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" />
-            <conditional name="pBestOption">
-              <param name="pBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option">
-                <option value="noBest">Do not use best</option>
-                <option value="doBest">Use best</option>
-              </param>
-              <when value="noBest">
-                <param name="pnMaxBacktracks" type="integer" value="125" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-              </when>
-              <when value="doBest">
-                <param name="pdMaxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-                <param name="pdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)">
-                  <option value="noStrata">Do not use strata option</option>
-                  <option value="doStrata">Use strata option</option>
-                </param>
-              </when>
-            </conditional>  <!-- pBestOption -->
-            <param name="pOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" />
-            <param name="pSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" />
-            <param name="pSnpphred" type="integer" value="-1" label="SNP penalty (ratio of SNPs per base in the subject genome) (--snpphred)" help="Enter this OR Ratio of SNPs per base" />
-            <param name="pSnpfrac" type="float" value="0.001" label="Ratio of SNPs per base (estimated ratio for colorspace alignments) (--snpfrac)" help="Enter this OR SNP penalty" />
-            <param name="pKeepends" type="select" label="Keep the extreme-ends nucleotides and qualities rather than trimming them (--col-keepends)">
-              <option value="doKeepends">Keep ends</option>
-              <option value="noKeepends">Trim ends</option>
-            </param>
-          </when> <!-- full -->
-        </conditional> <!-- pParams -->
-      </when> <!-- paired -->
-    </conditional> <!-- singlePaired -->
-    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information by default" />
-  </inputs>
-  <outputs>
-    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
-      <actions>
-        <conditional name="refGenomeSource.genomeSource">
-          <when value="indexed">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="bowtie_indexes_color" column="1" offset="0">
-                <filter type="param_value" column="0" value="#" filter_by="startswith" keep="False"/>
-                <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="fastqcssanger" name="output_suppressed_reads_l" label="${tool.name} on ${on_string}: suppressed reads (L)">
-      <filter>((
-          singlePaired['sPaired'] == "single" and
-          singlePaired['sParams']['sSettingsType'] == "full" and
-          singlePaired['sParams']['sMaxFile'] is True
-        ) or (
-          singlePaired['sPaired'] == "paired" and
-          singlePaired['pParams']['pSettingsType'] == "full" and
-          singlePaired['pParams']['pMaxFile'] is True
-        ))
-      </filter>
-    </data>
-    <data format="fastqcssanger" name="output_suppressed_reads_r" label="${tool.name} on ${on_string}: suppressed reads (R)">
-      <filter>singlePaired['sPaired'] == "paired"</filter>
-      <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter>
-      <filter>singlePaired['pParams']['pMaxFile'] is True</filter>
-    </data>
-    <data format="fastqcssanger" name="output_unmapped_reads_l" label="${tool.name} on ${on_string}: unmapped reads (L)">
-      <filter>
-        ((
-          singlePaired['sPaired'] == "single" and
-          singlePaired['sParams']['sSettingsType'] == "full" and
-          singlePaired['sParams']['sUnmappedFile'] is True
-        ) or (
-          singlePaired['sPaired'] == "paired" and
-          singlePaired['pParams']['pSettingsType'] == "full" and
-          singlePaired['pParams']['pUnmappedFile'] is True
-        ))
-      </filter>
-    </data>
-    <data format="fastqcssanger" name="output_unmapped_reads_r" label="${tool.name} on ${on_string}: unmapped reads (R)">
-      <filter>singlePaired['sPaired'] == "paired"</filter>
-      <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter>
-      <filter>singlePaired['pParams']['pUnmappedFile'] is True</filter>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      Bowtie command:
-      bowtie -q -p 4 -S +sam-nohead -C chrM_color test-data/bowtie_in1.fastqcssanger > bowtie_out1_u.sam
-      sort bowtie_out1_u.sam > bowtie_out1.sam
-      -p is the number of threads, which is hardcoded above. You need to replace the + with 2 dashes.
-      chrM_color needs to be the base location/name of the index files.
-      -->
-      <param name="genomeSource" value="indexed" />
-      <param name="index" value="equCab2chrM" />
-      <param name="sPaired" value="single" />
-      <param name="sInput1" ftype="fastqcssanger" value="bowtie_in1.fastqcssanger" />
-      <param name="sSettingsType" value="preSet" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out1.sam" sort="True" />
-    </test>
-    <test>
-      <!--
-      Bowtie command:
-      bowtie-build -C -f test-data/chr_m.fasta chrM_color
-      bowtie -q -X 1000 +ff -p 4 -S +sam-nohead -C -n 2 -e 70 -l 28 -X 250 +pairtries 100 +maxbts 125 -k 1 +snpfrac 0.001 +col-keepends +un bowtie_out3_u.fastq chrM_color -1 test-data/bowtie_in3.fastqcssanger -2 test-data/bowtie_in4.fastqcssanger > bowtie_out2_u.sam
-      sort bowtie_out2_u.sam > bowtie_out2.sam
-      sort bowtie_out3_u_1.sam > bowtie_out3_1.sam
-      sort bowtie_out3_u_2.sam > bowtie_out3_2.sam
-      Then also need to modify bowtie_out3_1.sam and bowtie_out3_2.sam so that all @ lines come before sequence lines.
-      The two unmapped output files will be named bowtie_out4_1.fastq and bowtie_out4_2.fastq
-      -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes.
-      chrM_base is the index files' location/base name.
-      -->
-      <param name="genomeSource" value="history" />
-      <param name="ownFile" value="chr_m.fasta" />
-      <param name="indexSettings" value="indexPreSet" />
-      <param name="sPaired" value="paired" />
-      <param name="pInput1" ftype="fastqcssanger" value="bowtie_in3.fastqcssanger" />
-      <param name="pInput2" ftype="fastqcssanger" value="bowtie_in4.fastqcssanger" />
-      <param name="pMaxInsert" value="1000" />
-      <param name="pMateOrient" value="ff" />
-      <param name="pSettingsType" value="full" />
-      <param name="pSkip" value="0" />
-      <param name="pAlignLimit" value="-1" />
-      <param name="pTrimH" value="0" />
-      <param name="pTrimL" value="0" />
-      <param name="pMismatchSeed" value="2" />
-      <param name="pMismatchQual" value="70" />
-      <param name="pSeedLen" value="28" />
-      <param name="pRounding" value="round" />
-      <param name="pMaqSoapAlign" value="-1" />
-      <param name="pMinInsert" value="0" />
-      <param name="pMaxAlignAttempt" value="100" />
-      <param name="pForwardAlign" value="forward" />
-      <param name="pReverseAlign" value="reverse" />
-      <param name="pTryHard" value="noTryHard" />
-      <param name="pValAlign" value="1" />
-      <param name="pAllValAligns" value="noAllValAligns" />
-      <param name="pSuppressAlign" value="-1" />
-      <param name="pUnmappedFile" value="true" />
-      <param name="pMaxFile" value="false" />
-      <param name="pBest" value="noBest" />
-      <param name="pnMaxBacktracks" value="125" />
-      <param name="pOffrate" value="-1" />
-      <param name="pSeed" value="-1" />
-      <param name="pSnpphred" value="-1" />
-      <param name="pSnpfrac" value="0.001" />
-      <param name="pKeepends" value="doKeepends" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out2.sam" sort="True" />
-      <output name="output_unmapped_reads_l" ftype="fastqcssanger" file="bowtie_out3_1.fastq" sort="True" />
-      <output name="output_unmapped_reads_r" ftype="fastqcssanger" file="bowtie_out3_2.fastq" sort="True" />
-    </test>
-    <test>
-      <!--
-      Bowtie command:
-      bowtie -q -p 4 -S +sam-nohead -C -n 2 -e 70 -l 28 +maxbts 125 -k 1 +snpfrac 0.001 +col-keepends chrM_color test-data/bowtie_in1.fastqcssanger > bowtie_out4_u.sam
-      sort bowtie_out4_u.sam > bowtie_out4.sam
-      -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes.
-      chrM_base is the index files' location/base name.
-      -->
-      <param name="genomeSource" value="indexed" />
-      <param name="index" value="equCab2chrM" />
-      <param name="sPaired" value="single" />
-      <param name="sInput1" ftype="fastqcssanger" value="bowtie_in1.fastqcssanger" />
-      <param name="sSettingsType" value="full" />
-      <param name="sSkip" value="0" />
-      <param name="sAlignLimit" value="-1" />
-      <param name="sTrimH" value="0" />
-      <param name="sTrimL" value="0" />
-      <param name="sMismatchSeed" value="2" />
-      <param name="sMismatchQual" value="70" />
-      <param name="sSeedLen" value="28" />
-      <param name="sRounding" value="round" />
-      <param name="sMaqSoapAlign" value="-1" />
-      <param name="sTryHard" value="noTryHard" />
-      <param name="sValAlign" value="1" />
-      <param name="sAllValAligns" value="noAllValAligns" />
-      <param name="sSuppressAlign" value="-1" />
-      <param name="sUnmappedFile" value="false" />
-      <param name="sMaxFile" value="false" />
-      <param name="sBest" value="noBest" />
-      <param name="snMaxBacktracks" value="125" />
-      <param name="sOffrate" value="-1" />
-      <param name="sSeed" value="-1" />
-      <param name="sSnpphred" value="-1" />
-      <param name="sSnpfrac" value="0.001" />
-      <param name="sKeepends" value="doKeepends" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out4.sam" sort="True" />
-    </test>
-    <test>
-      <!--
-      Bowtie command:
-      bowtie-build +noauto +bmaxdivn 4 +dcv 1024 +offrate 5 +ftabchars 10 +little -C -f test-data/chr_m.fasta chrM_color
-      bowtie -q -X 1000 +ff -p 4 -S +sam-nohead -C chrM_color -1 test-data/bowtie_in3.fastqcssanger -2 test-data/bowtie_in4.fastqcssanger > bowtie_out5_u.sam
-      sort bowtie_out5_u.sam > bowtie_out5.sam
-      -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes.
-      chrM_base is the index files' location/base name.
-      -->
-      <param name="genomeSource" value="history" />
-      <param name="ownFile" value="chr_m.fasta" />
-      <param name="indexSettings" value="indexFull" />
-      <param name="autoB" value="set" />
-      <param name="packed" value="unpacked" />
-      <param name="bmax" value="-1" />
-      <param name="bmaxdivn" value="4" />
-      <param name="dcv" value="1024" />
-      <param name="nodc" value="dc" />
-      <param name="noref" value="ref" />
-      <param name="offrate" value="5" />
-      <param name="ftab" value="10" />
-      <param name="ntoa" value="no" />
-      <param name="endian" value="little" />
-      <param name="seed" value="-1" />
-      <param name="cutoff" value="-1" />
-      <param name="sPaired" value="paired" />
-      <param name="pInput1" ftype="fastqcssanger" value="bowtie_in3.fastqcssanger" />
-      <param name="pInput2" ftype="fastqcssanger" value="bowtie_in4.fastqcssanger" />
-      <param name="pMaxInsert" value="1000" />
-      <param name="pMateOrient" value="ff" />
-      <param name="pSettingsType" value="preSet" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out5.sam" sort="True" />
-    </test>
-  </tests>
-
-  <help>
-
-**What it does**
-
-Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. It is developed by Ben Langmead and Cole Trapnell. Please cite: Langmead B, Trapnell C, Pop M, Salzberg SL. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25.
-
-.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
- .. __: http://bowtie-bio.sourceforge.net/index.shtml
-
-------
-
-**Input formats**
-
-Bowtie accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
-
-------
-
-**A Note on Built-in Reference Genomes**
-
-Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).
-
-------
-
-**Outputs**
-
-The output is in SAM format, and has the following columns::
-
-    Column  Description
-  --------  --------------------------------------------------------
-   1 QNAME  Query (pair) NAME
-   2 FLAG   bitwise FLAG
-   3 RNAME  Reference sequence NAME
-   4 POS    1-based leftmost POSition/coordinate of clipped sequence
-   5 MAPQ   MAPping Quality (Phred-scaled)
-   6 CIGAR  extended CIGAR string
-   7 MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-   8 MPOS   1-based Mate POSition
-   9 ISIZE  Inferred insert SIZE
-  10 SEQ    query SEQuence on the same strand as the reference
-  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALUE
-
-The flags are as follows::
-
-    Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-It looks like this (scroll sideways to see the entire example)::
-
-  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
-  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
-  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
-
--------
-
-**Bowtie settings**
-
-All of the options have a default value. You can change any of them. Most of the options in Bowtie have been implemented here.
-
-------
-
-**Bowtie parameter list**
-
-This is an exhaustive list of Bowtie options:
-
-For indexing (bowtie-build)::
-
-  -a               No auto behavior. Disable the default behavior where bowtie automatically
-                   selects values for --bmax/--bmaxdivn/--dcv/--packed parameters according
-                   to the memory available. [off]
-  --packed         Packing. Use a packed representation for DNA strings. [auto]
-  --bmax INT       Suffix maximum. The maximum number of suffixes allowed in a block. [auto]
-  --bmaxdivn INT   Suffix maximum fraction. The maximum number of suffixes allowed in a block
-                   expressed as a fraction of the length of the reference. [4]
-  --dcv INT        Difference-cover sample. Use INT as the period for the difference-cover
-                   sample. [1024]
-  --nodc INT       No difference-cover sample. Disable the difference-cover sample. [off]
-  -r               No reference indexes. Do not build the NAME.3.ebwt and NAME.4.ebwt portions
-                   of the index. Used only for paired-end alignment. [off]
-  -o               Offrate. How many Burrows-Wheeler rows get marked by the indexer. The
-                   indexer will mark every 2^INT rows. The marked rows correspond to rows on
-                   the genome. [5]
-  -t INT           Ftab. The lookup table used to calculate an initial Burrows-Wheeler range
-                   with respect to the first INT characters of the query. Ftab is 4^INT+1
-                   bytes. [10]
-  --ntoa           N conversion. Convert Ns to As before building the index. Otherwise, Ns are
-                   simply excluded from the index and Bowtie will not find alignments that
-                   overlap them. [off]
-  --big            Endianness. Endianness to use when serializing integers to the index file. [off]
-  --little         Endianness. [--little]
-  --seed INT       Random seed. Use INT as the seed for the pseudo-random number generator. [off]
-  --cutoff INT     Cutoff. Index only the first INT bases of the reference sequences (cumulative
-                   across sequences) and ignore the rest. [off]
-
-For aligning (bowtie)::
-
-  -s INT           Skip. Do not align the first INT reads or pairs in the input. [off]
-  -u INT           Align limit. Only align the first INT reads/pairs from the input. [no limit]
-  -5 INT           High-quality trim. Trim INT bases from the high-quality (left) end of each
-                   read before alignment. [0]
-  -3 INT           Low-quality trim. Trim INT bases from the low-quality (right) end of each
-                   read before alignment. [0]
-  -n INT           Mismatch seed. Maximum number of mismatches permitted in the seed (defined
-                   with seed length option). Can be 0, 1, 2, or 3. [2]
-  -e INT           Mismatch quality. Maximum permitted total of quality values at mismatched
-                   read positions. Bowtie rounds quality values to the nearest 10 and saturates
-                   at 30. [70]
-  -l INT           Seed length. The number of bases on the high-quality end of the read to
-                   which the -n ceiling applies. Must be at least 5. [28]
-  --nomaqround     Suppress MAQ rounding. Values are internally rounded to the nearest 10 and
-                   saturate at 30. This options turns off that rounding. [off]
-  -v INT           MAQ- or SOAP-like alignment policy. This option turns off the default
-                   MAQ-like alignment policy in favor of a SOAP-like one. End-to-end alignments
-                   with at most INT mismatches. [off]
-  -I INT           Minimum insert. The minimum insert size for valid paired-end alignments.
-                   Does checking on untrimmed reads if -5 or -3 is used. [0]
-  -X INT           Maximum insert. The maximum insert size for valid paired-end alignments.
-                   Does checking on untrimmed reads if -5 or -3 is used. [250]
-  --fr             Mate orientation. The upstream/downstream mate orientations for a valid
-                   paired-end alignment against the forward reference strand. [--fr]
-  --rf             Mate orientation. [off]
-  --ff             Mate orientation. [off]
-  --pairtries INT  Maximum alignment attempts for paired-end data. [100]
-  --nofw           No forward aligning. Choosing this option means that Bowtie will not attempt
-                   to align against the forward reference strand. [off]
-  --norc           No reverse-complement aligning. Setting this will mean that Bowtie will not
-                   attempt to align against the reverse-complement reference strand. [off]
-  --maxbts INT     Maximum backtracks. The maximum number of backtracks permitted when aligning
-                   a read in -n 2 or -n 3 mode. [125 without --best] [800 with --best]
-  -y               Try hard. Try as hard as possible to find valid alignments when they exist,
-                   including paired-end alignments. [off]
-  --chunkmbs INT   Thread memory. The number of megabytes of memory a given thread is given to
-                   store path descriptors in --best mode. [32]
-  -k INT           Valid alignments. The number of valid alignments per read or pair. [off]
-  -a               All valid alignments. Choosing this means that all valid alignments per read
-                   or pair will be reported. [off]
-  -m INT           Suppress alignments. Suppress all alignments for a particular read or pair
-                   if more than INT reportable alignments exist for it. [no limit]
-  --best           Best mode. Make Bowtie guarantee that reported singleton alignments are
-                   "best" in terms of stratum (the number of mismatches) and quality values at
-                   mismatched position. [off]
-  --strata         Best strata. When running in best mode, report alignments that fall into the
-                   best stratum if there are ones falling into more than one. [off]
-  -o INT           Offrate override. Override the offrate of the index with INT. Some row
-                   markings are discarded when index read into memory. INT must be greater than
-                   the value used to build the index (default: 5). [off]
-  --seed INT       Random seed. Use INT as the seed for the pseudo-random number generator. [off]
-  --snpphred INT   Use INT as the SNP penalty for decoding colorspace alignments. True ratio of
-                   SNPs per base in the subject genome. [see --snpfrac]
-  --snpfrac DEC    Use DEC as the estimated ratio of SNPs per base when decoding colorspace
-                   alignments. [0.001]
-  --col-keepends   Keep the extreme-end nucleotides and qualities when decoding colorspace
-                   alignments. [off]
-
-  </help>
-</tool>
--- a/tools/sr_mapping/bowtie_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,469 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs Bowtie on single-end or paired-end data.
-For use with Bowtie v. 0.12.7
-
-usage: bowtie_wrapper.py [options]
-    -t, --threads=t: The number of threads to run
-    -o, --output=o: The output file
-    --output_unmapped_reads=: File name for unmapped reads (single-end)
-    --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
-    --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
-    --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
-    --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
-    --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
-    -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
-    -I, --input2=I: The reverse reads file in Sanger FASTQ format
-    -4, --dataType=4: The type of data (SOLiD or Solexa)
-    -2, --paired=2: Whether the data is single- or paired-end
-    -g, --genomeSource=g: The type of reference provided
-    -r, --ref=r: The reference genome to use or index
-    -s, --skip=s: Skip the first n reads
-    -a, --alignLimit=a: Only align the first n reads
-    -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
-    -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
-    -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
-    -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
-    -l, --seedLen=l: Seed length
-    -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
-    -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy
-    -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
-    -v, --valAlign=v: Report up to n valid arguments per read
-    -V, --allValAligns=V: Whether or not to report all valid alignments per read
-    -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
-    -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
-    -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
-    -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
-    -j, --minInsert=j: Minimum insert size for valid paired-end alignments
-    -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
-    -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
-    -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
-    -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
-    -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
-    -F, --offrate=F: Override the offrate of the index to n
-    -8, --snpphred=8: SNP penalty on Phred scale
-    -6, --snpfrac=6: Fraction of sites expected to be SNP sites
-    -7, --keepends=7: Keep extreme-end nucleotides and qualities
-    -S, --seed=S: Seed for pseudo-random number generator
-    -C, --params=C: Whether to use default or specified parameters
-    -u, --iautoB=u: Automatic or specified behavior
-    -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
-    -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
-    -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
-    -D, --idcv=D: The period for the difference-cover sample
-    -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
-    -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
-    -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
-    -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
-    -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
-    -N, --iendian=N: Endianness to use when serializing integers to the index file
-    -Z, --iseed=Z: Seed for the pseudorandom number generator
-    -c, --icutoff=c: Number of first bases of the reference sequence to index
-    -x, --indexSettings=x: Whether or not indexing options are to be set
-    -H, --suppressHeader=H: Suppress header
-    --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
-"""
-
-import optparse, os, shutil, subprocess, sys, tempfile
-
-#Allow more than Sanger encoded variants
-DEFAULT_ASCII_ENCODING = '--phred33-quals'
-GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
-#FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
-    parser.add_option( '-o', '--output', dest='output', help='The output file' )
-    parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
-    parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
-    parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
-    parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
-    parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
-    parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
-    parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
-    parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
-    parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
-    parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
-    parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
-    parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
-    parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
-    parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
-    parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
-    parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
-    parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
-    parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
-    parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
-    parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
-    parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' )
-    parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
-    parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' )
-    parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' )
-    parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
-    parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
-    parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
-    parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
-    parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
-    parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
-    parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
-    parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
-    parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
-    parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
-    parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
-    parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
-    parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
-    parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
-    parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
-    parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
-    parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
-    parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
-    parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
-    parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
-    parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
-    parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
-    parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
-    parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
-    parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
-    parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
-    parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
-    parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
-    parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' )
-    parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
-    parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
-    parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
-    parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
-    (options, args) = parser.parse_args()
-    stdout = ''
-
-    # make temp directory for placement of indices and copy reference file there if necessary
-    tmp_index_dir = tempfile.mkdtemp()
-    # get type of data (solid or solexa)
-    if options.dataType == 'solid':
-        colorspace = '-C'
-    else:
-        colorspace = ''
-    # index if necessary
-    if options.genomeSource == 'history' and not options.do_not_build_index:
-        # set up commands
-        if options.index_settings =='indexPreSet':
-            indexing_cmds = '%s' % colorspace
-        else:
-            try:
-                if options.iautoB and options.iautoB == 'set':
-                    iautoB = '--noauto'
-                else:
-                    iautoB = ''
-                if options. ipacked and options.ipacked == 'packed':
-                    ipacked = '--packed'
-                else:
-                    ipacked = ''
-                if options.ibmax and int( options.ibmax ) >= 1:
-                    ibmax = '--bmax %s' % options.ibmax
-                else:
-                    ibmax = ''
-                if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
-                    ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
-                else:
-                    ibmaxdivn = ''
-                if options.idcv and int( options.idcv ) > 0:
-                    idcv = '--dcv %s' % options.idcv
-                else:
-                    idcv = ''
-                if options.inodc and options.inodc == 'nodc':
-                    inodc = '--nodc'
-                else:
-                    inodc = ''
-                if options.inoref and options.inoref == 'noref':
-                    inoref = '--noref'
-                else:
-                    inoref = ''
-                if options.iftab and int( options.iftab ) >= 0:
-                    iftab = '--ftabchars %s' % options.iftab
-                else:
-                    iftab = ''
-                if options.intoa and options.intoa == 'yes':
-                    intoa = '--ntoa'
-                else:
-                    intoa = ''
-                if options.iendian and options.iendian == 'big':
-                    iendian = '--big'
-                else:
-                    iendian = '--little'
-                if options.iseed and int( options.iseed ) > 0:
-                    iseed = '--seed %s' % options.iseed
-                else:
-                    iseed = ''
-                if options.icutoff and int( options.icutoff ) > 0:
-                    icutoff = '--cutoff %s' % options.icutoff
-                else:
-                    icutoff = ''
-                indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \
-                                ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
-                                  inoref, options.ioffrate, iftab, intoa, iendian,
-                                  iseed, icutoff, colorspace )
-            except ValueError, e:
-                # clean up temp dir
-                if os.path.exists( tmp_index_dir ):
-                    shutil.rmtree( tmp_index_dir )
-                stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
-        ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
-        ref_file_name = ref_file.name
-        ref_file.close()
-        os.symlink( options.ref, ref_file_name )
-        cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
-        try:
-            tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            # clean up temp dir
-            if os.path.exists( tmp_index_dir ):
-                shutil.rmtree( tmp_index_dir )
-            stop_err( 'Error indexing reference sequence\n' + str( e ) )
-        stdout += 'File indexed. '
-    else:
-        ref_file_name = options.ref
-    # set up aligning and generate aligning command options
-    # automatically set threads in both cases
-    tmp_suppressed_file_name = None
-    tmp_unmapped_file_name = None
-    if options.suppressHeader == 'true':
-        suppressHeader = '--sam-nohead'
-    else:
-        suppressHeader = ''
-    if options.maxInsert and int( options.maxInsert ) > 0:
-        maxInsert = '-X %s' % options.maxInsert
-    else:
-        maxInsert = ''
-    if options.mateOrient:
-        mateOrient = '--%s' % options.mateOrient
-    else:
-        mateOrient = ''
-    quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
-    if options.params == 'preSet':
-        aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
-                ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
-    else:
-        try:
-            if options.skip and int( options.skip ) > 0:
-                skip = '-s %s' % options.skip
-            else:
-                skip = ''
-            if options.alignLimit and int( options.alignLimit ) >= 0:
-                alignLimit = '-u %s' % options.alignLimit
-            else:
-                alignLimit = ''
-            if options.trimH and int( options.trimH ) > 0:
-                trimH = '-5 %s' % options.trimH
-            else:
-                trimH = ''
-            if options.trimL and int( options.trimL ) > 0:
-                trimL = '-3 %s' % options.trimL
-            else:
-                trimL = ''
-            if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0:
-                maqSoapAlign = '-v %s' % options.maqSoapAlign
-            else:
-                maqSoapAlign = ''
-            if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
-                        or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
-                mismatchSeed = '-n %s' % options.mismatchSeed
-            else:
-                mismatchSeed = ''
-            if options.mismatchQual and int( options.mismatchQual ) >= 0:
-                mismatchQual = '-e %s' % options.mismatchQual
-            else:
-                mismatchQual = ''
-            if options.seedLen and int( options.seedLen ) >= 5:
-                seedLen = '-l %s' % options.seedLen
-            else:
-                seedLen = ''
-            if options.rounding == 'noRound':
-                rounding = '--nomaqround'
-            else:
-                rounding = ''
-            if options.minInsert and int( options.minInsert ) > 0:
-                minInsert = '-I %s' % options.minInsert
-            else:
-                minInsert = ''
-            if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
-                maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
-            else:
-                maxAlignAttempt = ''
-            if options.forwardAlign == 'noForward':
-                forwardAlign = '--nofw'
-            else:
-                forwardAlign = ''
-            if options.reverseAlign == 'noReverse':
-                reverseAlign = '--norc'
-            else:
-                reverseAlign = ''
-            if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
-                    ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
-                maxBacktracks = '--maxbts %s' % options.maxBacktracks
-            else:
-                maxBacktracks = ''
-            if options.tryHard == 'doTryHard':
-                tryHard = '-y'
-            else:
-                tryHard = ''
-            if options.valAlign and int( options.valAlign ) >= 0:
-                valAlign = '-k %s' % options.valAlign
-            else:
-                valAlign = ''
-            if options.allValAligns == 'doAllValAligns':
-                allValAligns = '-a'
-            else:
-                allValAligns = ''
-            if options.suppressAlign and int( options.suppressAlign ) >= 0:
-                suppressAlign = '-m %s' % options.suppressAlign
-            else:
-                suppressAlign = ''
-            if options.best == 'doBest':
-                best = '--best'
-            else:
-                best = ''
-            if options.strata == 'doStrata':
-                strata = '--strata'
-            else:
-                strata = ''
-            if options.offrate and int( options.offrate ) >= 0:
-                offrate = '-o %s' % options.offrate
-            else:
-                offrate = ''
-            if options.seed and int( options.seed ) >= 0:
-                seed = '--seed %s' % options.seed
-            else:
-                seed = ''
-            if options.paired == 'paired':
-                if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
-                    tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
-                    tmp_unmapped_file_name = tmp_unmapped_file.name
-                    tmp_unmapped_file.close()
-                    output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
-                else:
-                    output_unmapped_reads = ''
-                if options.output_suppressed_reads:
-                    tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
-                    tmp_suppressed_file_name = tmp_suppressed_file.name
-                    tmp_suppressed_file.close()
-                    output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
-                else:
-                    output_suppressed_reads = ''
-            else:
-                if options.output_unmapped_reads:
-                    output_unmapped_reads = '--un %s' % options.output_unmapped_reads
-                else:
-                    output_unmapped_reads = ''
-                if options.output_suppressed_reads:
-                    output_suppressed_reads = '--max %s' % options.output_suppressed_reads
-                else:
-                    output_suppressed_reads = ''
-            snpfrac = ''
-            if options.snpphred and int( options.snpphred ) >= 0:
-                snpphred = '--snpphred %s' % options.snpphred
-            else:
-                snpphred = ''
-                if options.snpfrac and float( options.snpfrac ) >= 0:
-                    snpfrac = '--snpfrac %s' % options.snpfrac
-            if options.keepends and options.keepends == 'doKeepends':
-                keepends = '--col-keepends'
-            else:
-                keepends = ''
-            aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
-                            '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
-                            ( maxInsert, mateOrient, options.threads, suppressHeader,
-                              colorspace, skip, alignLimit, trimH, trimL, maqSoapAlign,
-                              mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
-                              maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
-                              tryHard, valAlign, allValAligns, suppressAlign, best,
-                              strata, offrate, seed, snpphred, snpfrac, keepends,
-                              output_unmapped_reads, output_suppressed_reads,
-                              quality_score_encoding )
-        except ValueError, e:
-            # clean up temp dir
-            if os.path.exists( tmp_index_dir ):
-                shutil.rmtree( tmp_index_dir )
-            stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
-    try:
-        # have to nest try-except in try-finally to handle 2.4
-        try:
-            # prepare actual mapping commands
-            if options.paired == 'paired':
-                cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
-            else:
-                cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
-            # align
-            tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-            # get suppressed and unmapped reads output files in place if appropriate
-            if options.paired == 'paired' and tmp_suppressed_file_name and \
-                               options.output_suppressed_reads_l and options.output_suppressed_reads_r:
-                try:
-                    left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
-                    right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
-                    shutil.move( left, options.output_suppressed_reads_l )
-                    shutil.move( right, options.output_suppressed_reads_r )
-                except Exception, e:
-                    sys.stdout.write( 'Error producing the suppressed output file.\n' )
-            if options.paired == 'paired' and tmp_unmapped_file_name and \
-                               options.output_unmapped_reads_l and options.output_unmapped_reads_r:
-                try:
-                    left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
-                    right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
-                    shutil.move( left, options.output_unmapped_reads_l )
-                    shutil.move( right, options.output_unmapped_reads_r )
-                except Exception, e:
-                    sys.stdout.write( 'Error producing the unmapped output file.\n' )
-            # check that there are results in the output file
-            if os.path.getsize( options.output ) == 0:
-                raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
-        except Exception, e:
-            stop_err( 'Error aligning sequence. ' + str( e ) )
-    finally:
-        # clean up temp dir
-        if os.path.exists( tmp_index_dir ):
-            shutil.rmtree( tmp_index_dir )
-    stdout += 'Sequence file aligned.\n'
-    sys.stdout.write( stdout )
-
-if __name__=="__main__": __main__()
--- a/tools/sr_mapping/bowtie_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,838 +0,0 @@
-<tool id="bowtie_wrapper" name="Map with Bowtie for Illumina" version="1.1.2">
-  <requirements><requirement type='package'>bowtie</requirement></requirements>
-  <description></description>
-  <parallelism method="basic"></parallelism>
-  <command interpreter="python">
-    bowtie_wrapper.py
-      ## Hackish setting of number of threads
-      --threads="4"
-      ## Outputs
-      --output=$output
-      #if str( $singlePaired.sPaired ) == "single"
-        #if $output_unmapped_reads_l
-          --output_unmapped_reads=$output_unmapped_reads_l
-        #end if
-        #if $output_suppressed_reads_l
-          --output_suppressed_reads=$output_suppressed_reads_l
-        #end if
-        --galaxy_input_format="${singlePaired.sInput1.ext}"
-      #else
-        #if $output_unmapped_reads_l and $output_unmapped_reads_r
-          --output_unmapped_reads_l=$output_unmapped_reads_l
-          --output_unmapped_reads_r=$output_unmapped_reads_r
-        #end if
-        #if $output_suppressed_reads_l and $output_suppressed_reads_l
-          --output_suppressed_reads_l=$output_suppressed_reads_l
-          --output_suppressed_reads_r=$output_suppressed_reads_r
-        #end if
-        --galaxy_input_format="${singlePaired.pInput1.ext}"
-      #end if
-      ## Inputs
-      --dataType="solexa" ##this indicates that nucleotide base space is used in the wrapper
-      --suppressHeader=$suppressHeader
-      --genomeSource=$refGenomeSource.genomeSource
-      #if $refGenomeSource.genomeSource == "history":
-        ##index already exists
-        #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ):
-          ##user previously built
-          --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}"
-          --do_not_build_index
-        #else:
-          ##build index on the fly
-          --ref=$refGenomeSource.ownFile
-          --indexSettings=$refGenomeSource.indexParams.indexSettings
-          #if $refGenomeSource.indexParams.indexSettings == "indexFull":
-            --iautoB=$refGenomeSource.indexParams.autoBehavior.autoB
-            #if $refGenomeSource.indexParams.autoBehavior.autoB == "set":
-              --ipacked=$refGenomeSource.indexParams.autoBehavior.packed
-              --ibmax=$refGenomeSource.indexParams.autoBehavior.bmax
-              --ibmaxdivn=$refGenomeSource.indexParams.autoBehavior.bmaxdivn
-              --idcv=$refGenomeSource.indexParams.autoBehavior.dcv
-            #end if
-            --inodc=$refGenomeSource.indexParams.nodc
-            --inoref=$refGenomeSource.indexParams.noref
-            --ioffrate=$refGenomeSource.indexParams.offrate
-            --iftab=$refGenomeSource.indexParams.ftab
-            --intoa=$refGenomeSource.indexParams.ntoa
-            --iendian=$refGenomeSource.indexParams.endian
-            --iseed=$refGenomeSource.indexParams.seed
-            --icutoff=$refGenomeSource.indexParams.cutoff
-          #end if
-        #end if
-      #else
-        ##use pre-built index
-        ##--ref="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1] }"
-        --ref="${ refGenomeSource.index.fields.path }"
-      #end if
-      --paired=$singlePaired.sPaired
-      #if $singlePaired.sPaired == "single":
-        --input1=$singlePaired.sInput1
-        --params=$singlePaired.sParams.sSettingsType
-        #if $singlePaired.sParams.sSettingsType == "full":
-          --skip=$singlePaired.sParams.sSkip
-          --alignLimit=$singlePaired.sParams.sAlignLimit
-          --trimH=$singlePaired.sParams.sTrimH
-          --trimL=$singlePaired.sParams.sTrimL
-          --mismatchSeed=$singlePaired.sParams.sMismatchSeed
-          --mismatchQual=$singlePaired.sParams.sMismatchQual
-          --seedLen=$singlePaired.sParams.sSeedLen
-          --rounding=$singlePaired.sParams.sRounding
-          --maqSoapAlign=$singlePaired.sParams.sMaqSoapAlign
-          --tryHard=$singlePaired.sParams.sTryHard
-          --valAlign=$singlePaired.sParams.sValAlign
-          --allValAligns=$singlePaired.sParams.sAllValAligns
-          --suppressAlign=$singlePaired.sParams.sSuppressAlign
-          --best=$singlePaired.sParams.sBestOption.sBest
-          #if $singlePaired.sParams.sBestOption.sBest == "doBest":
-            --maxBacktracks=$singlePaired.sParams.sBestOption.sdMaxBacktracks
-            --strata=$singlePaired.sParams.sBestOption.sdStrata
-          #else:
-            --maxBacktracks=$singlePaired.sParams.sBestOption.snMaxBacktracks
-          #end if
-          --offrate=$singlePaired.sParams.sOffrate
-          --seed=$singlePaired.sParams.sSeed
-        #end if
-      #else:
-        --input1=$singlePaired.pInput1
-        --input2=$singlePaired.pInput2
-        --maxInsert=$singlePaired.pMaxInsert
-        --mateOrient=$singlePaired.pMateOrient
-        --params=$singlePaired.pParams.pSettingsType
-        #if $singlePaired.pParams.pSettingsType == "full":
-          --skip=$singlePaired.pParams.pSkip
-          --alignLimit=$singlePaired.pParams.pAlignLimit
-          --trimH=$singlePaired.pParams.pTrimH
-          --trimL=$singlePaired.pParams.pTrimL
-          --mismatchSeed=$singlePaired.pParams.pMismatchSeed
-          --mismatchQual=$singlePaired.pParams.pMismatchQual
-          --seedLen=$singlePaired.pParams.pSeedLen
-          --rounding=$singlePaired.pParams.pRounding
-          --maqSoapAlign=$singlePaired.pParams.pMaqSoapAlign
-          --minInsert=$singlePaired.pParams.pMinInsert
-          --maxAlignAttempt=$singlePaired.pParams.pMaxAlignAttempt
-          --forwardAlign=$singlePaired.pParams.pForwardAlign
-          --reverseAlign=$singlePaired.pParams.pReverseAlign
-          --tryHard=$singlePaired.pParams.pTryHard
-          --valAlign=$singlePaired.pParams.pValAlign
-          --allValAligns=$singlePaired.pParams.pAllValAligns
-          --suppressAlign=$singlePaired.pParams.pSuppressAlign
-          --best=$singlePaired.pParams.pBestOption.pBest
-          #if $singlePaired.pParams.pBestOption.pBest == "doBest":
-            --maxBacktracks=$singlePaired.pParams.pBestOption.pdMaxBacktracks
-            --strata=$singlePaired.pParams.pBestOption.pdStrata
-          #else:
-            --maxBacktracks=$singlePaired.pParams.pBestOption.pnMaxBacktracks
-          #end if
-          --offrate=$singlePaired.pParams.pOffrate
-          --seed=$singlePaired.pParams.pSeed
-        #end if
-      #end if
-  </command>
-  <inputs>
-    <conditional name="refGenomeSource">
-      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
-          <options from_data_table="bowtie_indexes">
-            <filter type="sort_by" column="2" />
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="bowtie_base_index,fasta" metadata_name="dbkey" label="Select the reference genome" />
-        <conditional name="indexParams">
-          <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own" help="These settings are ignored when using a prebuilt index">
-            <option value="indexPreSet">Default</option>
-            <option value="indexFull">Set your own</option>
-          </param>
-          <when value="indexPreSet" />
-          <when value="indexFull">
-            <conditional name="autoBehavior">
-              <param name="autoB" type="select" label="Choose to use automatic or specified behavior for some parameters (-a)" help="Allows you to set --packed, --bmax, --bmaxdivn, and --dcv">
-                <option value="auto">Automatic behavior</option>
-                <option value="set">Set values (sets --noauto and allows others to be set)</option>
-              </param>
-              <when value="auto" />
-              <when value="set">
-                <param name="packed" type="select" label="Whether or not to use a packed representation for DNA strings (--packed)">
-                  <option value="unpacked">Use regular representation</option>
-                  <option value="packed">Use packed representation</option>
-                </param>
-                <param name="bmax" type="integer" value="-1" label="Maximum number of suffixes allowed in a block (--bmax)" help="-1 for not specified. Must be at least 1" />
-                <param name="bmaxdivn" type="integer" value="4" label="Maximum number of suffixes allowed in a block as a fraction of the length of the reference (--bmaxdivn)" />
-                <param name="dcv" type="integer" value="1024" label="The period for the difference-cover sample (--dcv)" />
-              </when>
-            </conditional>
-            <param name="nodc" type="select" label="Whether or not to disable the use of the difference-cover sample (--nodc)" help="Suffix sorting becomes quadratic-time in the worst case (with a very repetitive reference)">
-              <option value="dc">Use difference-cover sample</option>
-              <option value="nodc">Disable difference-cover sample</option>
-            </param>
-            <param name="noref" type="select" label="Whether or not to build the part of the reference index used only in paired-end alignment (-r)">
-              <option value="ref">Build all index files</option>
-              <option value="noref">Do not build paired-end alignment index files</option>
-            </param>
-            <param name="offrate" type="integer" value="5" label="How many rows get marked during annotation of some or all of the Burrows-Wheeler rows (-o)" />
-            <param name="ftab" type="integer" value="10" label="The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query (-t)" help="ftab is 4^(n+1) bytes" />
-            <param name="ntoa" type="select" label="Whether or not to convert Ns in the reference sequence to As (--ntoa)">
-              <option value="no">Do not convert Ns</option>
-              <option value="yes">Convert Ns to As</option>
-            </param>
-            <param name="endian" type="select" label="Endianness to use when serializing integers to the index file (--big/--little)" help="Little is most appropriate for Intel- and AMD-based architecture">
-              <option value="little">Little</option>
-              <option value="big">Big</option>
-            </param>
-            <param name="seed" type="integer" value="-1" label="Seed for the pseudorandom number generator (--seed)" help="Use -1 to use default" />
-            <param name="cutoff" type="integer" value="-1" label="Number of first bases of the reference sequence to index (--cutoff)" help="Use -1 to use default" />
-          </when>  <!-- indexFull -->
-        </conditional>  <!-- indexParams -->
-      </when>  <!-- history -->
-    </conditional>  <!-- refGenomeSource -->
-    <conditional name="singlePaired">
-      <param name="sPaired" type="select" label="Is this library mate-paired?">
-        <option value="single">Single-end</option>
-        <option value="paired">Paired-end</option>
-      </param>
-      <when value="single">
-        <param name="sInput1" type="data" format="fastqsanger,fastqillumina,fastqsolexa" label="FASTQ file" help="Must have ASCII encoded quality scores"/>
-        <conditional name="sParams">
-          <param name="sSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
-            <option value="preSet">Commonly used</option>
-            <option value="full">Full parameter list</option>
-            </param>
-          <when value="preSet" />
-          <when value="full">
-            <param name="sSkip" type="integer" value="0" label="Skip the first n reads (-s)" />
-            <param name="sAlignLimit" type="integer" value="-1" label="Only align the first n reads (-u)" help="-1 for off" />
-            <param name="sTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" />
-            <param name="sTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" />
-            <param name="sMismatchSeed" type="integer" value="2" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" />
-            <param name="sMismatchQual" type="integer" value="70" label="Maximum permitted total of quality values at mismatched read positions (-e)" />
-            <param name="sSeedLen" type="integer" value="28" label="Seed length (-l)" help="Minimum value is 5" />
-            <param name="sRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)">
-              <option value="round">Round to nearest 10</option>
-              <option value="noRound">Do not round to nearest 10</option>
-            </param>
-            <param name="sMaqSoapAlign" type="integer" value="-1" label="Number of mismatches for SOAP-like alignment policy (-v)" help="-1 for default MAQ-like alignment policy" />
-            <param name="sTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode">
-              <option value="noTryHard">Do not try hard</option>
-              <option value="doTryHard">Try hard</option>
-            </param>
-            <param name="sValAlign" type="integer" value="1" label="Report up to n valid alignments per read (-k)" />
-            <param name="sAllValAligns" type="select" label="Whether or not to report all valid alignments per read (-a)">
-              <option value="noAllValAligns">Do not report all valid alignments</option>
-              <option value="doAllValAligns">Report all valid alignments</option>
-            </param>
-            <param name="sSuppressAlign" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="-1 for no limit" />
-            <param name="sMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" />
-            <param name="sUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" />
-            <conditional name="sBestOption">
-              <param name="sBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option">
-                <option value="noBest">Do not use best</option>
-                <option value="doBest">Use best</option>
-              </param>
-              <when value="noBest">
-                <param name="snMaxBacktracks" type="integer" value="125" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-              </when>
-              <when value="doBest">
-                <param name="sdMaxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-                <param name="sdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)">
-                  <option value="noStrata">Do not use strata option</option>
-                  <option value="doStrata">Use strata option</option>
-                </param>
-              </when>
-            </conditional> <!-- bestOption -->
-            <param name="sOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" />
-            <param name="sSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" />
-          </when> <!-- full -->
-        </conditional> <!-- sParams -->
-      </when> <!-- single -->
-      <when value="paired">
-        <param name="pInput1" type="data" format="fastqsanger,fastqillumina,fastqsolexa" label="Forward FASTQ file" help="Must have ASCII encoded quality scores"/>
-        <param name="pInput2" type="data" format="fastqsanger,fastqillumina,fastqsolexa" label="Reverse FASTQ file" help="File format must match the Forward FASTQ file">
-            <options options_filter_attribute="ext" from_parameter="tool.app.datatypes_registry.datatypes_by_extension" transform_lines="obj.keys()">>
-               <column name="name" index="0"/>
-               <column name="value" index="0"/>
-               <filter type="param_value" ref="pInput1" ref_attribute="ext" column="0"/>
-           </options>
-        </param>
-        <param name="pMaxInsert" type="integer" value="1000" label="Maximum insert size for valid paired-end alignments (-X)" />
-        <param name="pMateOrient" type="select" label="The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand (--fr/--rf/--ff)">
-          <option value="fr">FR (for Illumina)</option>
-          <option value="rf">RF</option>
-          <option value="ff">FF (for SOLiD)</option>
-        </param>
-        <conditional name="pParams">
-          <param name="pSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
-            <option value="preSet">Commonly used</option>
-            <option value="full">Full parameter list</option>
-          </param>
-          <when value="preSet" />
-          <when value="full">
-            <param name="pSkip" type="integer" value="0" label="Skip the first n pairs (-s)" />
-            <param name="pAlignLimit" type="integer" value="-1" label="Only align the first n pairs (-u)" help="-1 for off" />
-            <param name="pTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" />
-            <param name="pTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" />
-            <param name="pMismatchSeed" type="integer" value="2" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" />
-            <param name="pMismatchQual" type="integer" value="70" label="Maximum permitted total of quality values at mismatched read positions (-e)" />
-            <param name="pSeedLen" type="integer" value="28" label="Seed length (-l)" help="Minimum value is 5" />
-            <param name="pRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)">
-              <option value="round">Round to nearest 10</option>
-              <option value="noRound">Do not round to nearest 10</option>
-            </param>
-            <param name="pMaqSoapAlign" type="integer" value="-1" label="Number of mismatches for SOAP-like alignment policy (-v)" help="-1 for default MAQ-like alignment policy" />
-            <param name="pMinInsert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments (-I)" />
-            <param name="pMaxAlignAttempt" type="integer" value="100" label="Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate (--pairtries)" />
-            <param name="pForwardAlign" type="select" label="Choose whether or not to attempt to align the forward reference strand (--nofw)">
-              <option value="forward">Align against the forward reference strand</option>
-              <option value="noForward">Do not align against the forward reference strand</option>
-            </param>
-            <param name="pReverseAlign" type="select" label="Choose whether or not to align against the reverse-complement reference strand (--norc)">
-              <option value="reverse">Align against the reverse-complement reference strand</option>
-              <option value="noReverse">Do not align against the reverse-complement reference strand</option>
-            </param>
-            <param name="pTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode">
-              <option value="noTryHard">Do not try hard</option>
-              <option value="doTryHard">Try hard</option>
-            </param>
-            <param name="pValAlign" type="integer" value="1" label="Report up to n valid arguments per pair (-k)" />
-            <param name="pAllValAligns" type="select" label="Whether or not to report all valid alignments per pair (-a)">
-              <option value="noAllValAligns">Do not report all valid alignments</option>
-              <option value="doAllValAligns">Report all valid alignments</option>
-            </param>
-            <param name="pSuppressAlign" type="integer" value="-1" label="Suppress all alignments for a pair if more than n reportable alignments exist (-m)" help="-1 for no limit" />
-            <param name="pMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" />
-            <param name="pUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" />
-            <conditional name="pBestOption">
-              <param name="pBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option">
-                <option value="noBest">Do not use best</option>
-                <option value="doBest">Use best</option>
-              </param>
-              <when value="noBest">
-                <param name="pnMaxBacktracks" type="integer" value="125" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-              </when>
-              <when value="doBest">
-                <param name="pdMaxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
-                <param name="pdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)">
-                  <option value="noStrata">Do not use strata option</option>
-                  <option value="doStrata">Use strata option</option>
-                </param>
-              </when>
-            </conditional>
-            <param name="pOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" />
-            <param name="pSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" />
-          </when> <!-- full -->
-        </conditional> <!-- pParams -->
-      </when> <!-- paired -->
-    </conditional> <!-- singlePaired -->
-    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information by default" />
-  </inputs>
-  <outputs>
-    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
-      <actions>
-        <conditional name="refGenomeSource.genomeSource">
-          <when value="indexed">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="bowtie_indexes" column="1" offset="0">
-                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="fastq" name="output_suppressed_reads_l" label="${tool.name} on ${on_string}: suppressed reads (L)">
-      <filter>((
-          singlePaired['sPaired'] == "single" and
-          singlePaired['sParams']['sSettingsType'] == "full" and
-          singlePaired['sParams']['sMaxFile'] is True
-        ) or (
-          singlePaired['sPaired'] == "paired" and
-          singlePaired['pParams']['pSettingsType'] == "full" and
-          singlePaired['pParams']['pMaxFile'] is True
-        ))
-      </filter>
-      <actions>
-        <conditional name="singlePaired.sPaired">
-          <when value="single">
-            <action type="format">
-              <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" />
-            </action>
-          </when>
-          <when value="paired">
-            <action type="format">
-              <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="fastq" name="output_suppressed_reads_r" label="${tool.name} on ${on_string}: suppressed reads (R)">
-      <filter>singlePaired['sPaired'] == "paired"</filter>
-      <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter>
-      <filter>singlePaired['pParams']['pMaxFile'] is True</filter>
-      <actions>
-        <conditional name="singlePaired.sPaired">
-          <when value="single">
-            <action type="format">
-              <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" />
-            </action>
-          </when>
-          <when value="paired">
-            <action type="format">
-              <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="fastq" name="output_unmapped_reads_l" label="${tool.name} on ${on_string}: unmapped reads (L)">
-      <filter>
-        ((
-          singlePaired['sPaired'] == "single" and
-          singlePaired['sParams']['sSettingsType'] == "full" and
-          singlePaired['sParams']['sUnmappedFile'] is True
-        ) or (
-          singlePaired['sPaired'] == "paired" and
-          singlePaired['pParams']['pSettingsType'] == "full" and
-          singlePaired['pParams']['pUnmappedFile'] is True
-        ))
-      </filter>
-      <actions>
-        <conditional name="singlePaired.sPaired">
-          <when value="single">
-            <action type="format">
-              <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" />
-            </action>
-          </when>
-          <when value="paired">
-            <action type="format">
-              <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-    <data format="fastq" name="output_unmapped_reads_r" label="${tool.name} on ${on_string}: unmapped reads (R)">
-      <filter>singlePaired['sPaired'] == "paired"</filter>
-      <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter>
-      <filter>singlePaired['pParams']['pUnmappedFile'] is True</filter>
-      <actions>
-        <conditional name="singlePaired.sPaired">
-          <when value="single">
-            <action type="format">
-              <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" />
-            </action>
-          </when>
-          <when value="paired">
-            <action type="format">
-              <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      Bowtie command:
-      bowtie -q -p 4 -S +sam-nohead chrM_base test-data/bowtie_in2.fastqsanger > bowtie_out6_u.sam
-      sort bowtie_out6_u.sam > bowtie_out6.sam
-      -p is the number of threads, which is hardcoded above. You need to replace the + with 2 dashes.
-      chrM_base needs to be the base location/name of the index files.
-      -->
-      <param name="genomeSource" value="indexed" />
-      <!-- this is the backwards-compatible "unique value" for this index, not an actual path -->
-      <param name="index" value="equCab2chrM" />
-      <param name="sPaired" value="single" />
-      <param name="sInput1" ftype="fastqsanger" value="bowtie_in2.fastqsanger" />
-      <param name="sSettingsType" value="preSet" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out6.sam" sort="True" />
-    </test>
-    <test>
-      <!--
-      Bowtie command:
-      bowtie-build -f test-data/phiX.fasta phiX_base
-      bowtie -q -X 1000 +ff -p 4 -S +sam-nohead -n 2 -e 70 -l 28 +pairtries 100 +maxbts 800 +best +un bowtie_out8_u.fastq phiX_base -1 test-data/bowtie_in5.fastqsanger -2 test-data/bowtie_in6.fastqsanger > bowtie_out7_u.sam
-      sort bowtie_out7_u.sam > bowtie_out7.sam
-      sort bowtie_out8_u_1.sam > bowtie_out8_1.sam
-      sort bowtie_out8_u_2.sam > bowtie_out8_2.sam
-      Then also need to modify bowtie_out8_1.sam and bowtie_out8_2.sam so that all @ lines come before sequence lines.
-      -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes.
-      The two unmapped output files will be named bowtie_out8_1.fastq and bowtie_out8_2.fastq.
-      chrM_base is the index files' location/base name.
-      -->
-      <param name="genomeSource" value="history" />
-      <param name="ownFile" value="phiX.fasta" />
-      <param name="indexSettings" value="indexPreSet" />
-      <param name="sPaired" value="paired" />
-      <param name="pInput1" ftype="fastqsanger" value="bowtie_in5.fastqsanger" />
-      <param name="pInput2" ftype="fastqsanger" value="bowtie_in6.fastqsanger" />
-      <param name="pMaxInsert" value="1000" />
-      <param name="pMateOrient" value="ff" />
-      <param name="pSettingsType" value="full" />
-      <param name="pSkip" value="0" />
-      <param name="pAlignLimit" value="-1" />
-      <param name="pTrimH" value="0" />
-      <param name="pTrimL" value="0" />
-      <param name="pMismatchSeed" value="2" />
-      <param name="pMismatchQual" value="70" />
-      <param name="pSeedLen" value="28" />
-      <param name="pRounding" value="round" />
-      <param name="pMaqSoapAlign" value="-1" />
-      <param name="pMinInsert" value="0" />
-      <param name="pMaxAlignAttempt" value="100" />
-      <param name="pForwardAlign" value="forward" />
-      <param name="pReverseAlign" value="reverse" />
-      <param name="pTryHard" value="noTryHard" />
-      <param name="pValAlign" value="1" />
-      <param name="pAllValAligns" value="noAllValAligns" />
-      <param name="pSuppressAlign" value="-1" />
-      <param name="pUnmappedFile" value="true" />
-      <param name="pMaxFile" value="false" />
-      <param name="pBest" value="doBest" />
-      <param name="pdMaxBacktracks" value="800" />
-      <param name="pdStrata" value="noStrata" />
-      <param name="pOffrate" value="-1" />
-      <param name="pSeed" value="-1" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out7.sam" sort="True" />
-      <output name="output_unmapped_reads_l" ftype="fastqsanger" file="bowtie_out8_1.fastq" sort="True" />
-      <output name="output_unmapped_reads_r" ftype="fastqsanger" file="bowtie_out8_2.fastq" sort="True" />
-    </test>
-    <!-- start testing of non-sanger variant fastq reads -->
-    <test>
-      <param name="genomeSource" value="history" />
-      <param name="ownFile" value="phiX.fasta" />
-      <param name="indexSettings" value="indexPreSet" />
-      <param name="sPaired" value="paired" />
-      <param name="pInput1" ftype="fastqillumina" value="bowtie_in5.fastqillumina" />
-      <param name="pInput2" ftype="fastqillumina" value="bowtie_in6.fastqillumina" />
-      <param name="pMaxInsert" value="1000" />
-      <param name="pMateOrient" value="ff" />
-      <param name="pSettingsType" value="full" />
-      <param name="pSkip" value="0" />
-      <param name="pAlignLimit" value="-1" />
-      <param name="pTrimH" value="0" />
-      <param name="pTrimL" value="0" />
-      <param name="pMismatchSeed" value="2" />
-      <param name="pMismatchQual" value="70" />
-      <param name="pSeedLen" value="28" />
-      <param name="pRounding" value="round" />
-      <param name="pMaqSoapAlign" value="-1" />
-      <param name="pMinInsert" value="0" />
-      <param name="pMaxAlignAttempt" value="100" />
-      <param name="pForwardAlign" value="forward" />
-      <param name="pReverseAlign" value="reverse" />
-      <param name="pTryHard" value="noTryHard" />
-      <param name="pValAlign" value="1" />
-      <param name="pAllValAligns" value="noAllValAligns" />
-      <param name="pSuppressAlign" value="-1" />
-      <param name="pUnmappedFile" value="true" />
-      <param name="pMaxFile" value="false" />
-      <param name="pBest" value="doBest" />
-      <param name="pdMaxBacktracks" value="800" />
-      <param name="pdStrata" value="noStrata" />
-      <param name="pOffrate" value="-1" />
-      <param name="pSeed" value="-1" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out7.sam" sort="True" />
-      <output name="output_unmapped_reads_l" ftype="fastqillumna" file="bowtie_out8_1.fastqillumina.sorted" sort="True" />
-      <output name="output_unmapped_reads_r" ftype="fastqillumna" file="bowtie_out8_2.fastqillumina.sorted" sort="True" />
-    </test>
-    <test>
-      <param name="genomeSource" value="history" />
-      <param name="ownFile" value="phiX.fasta" />
-      <param name="indexSettings" value="indexPreSet" />
-      <param name="sPaired" value="paired" />
-      <param name="pInput1" ftype="fastqsolexa" value="bowtie_in5.fastqsolexa" />
-      <param name="pInput2" ftype="fastqsolexa" value="bowtie_in6.fastqsolexa" />
-      <param name="pMaxInsert" value="1000" />
-      <param name="pMateOrient" value="ff" />
-      <param name="pSettingsType" value="full" />
-      <param name="pSkip" value="0" />
-      <param name="pAlignLimit" value="-1" />
-      <param name="pTrimH" value="0" />
-      <param name="pTrimL" value="0" />
-      <param name="pMismatchSeed" value="2" />
-      <param name="pMismatchQual" value="70" />
-      <param name="pSeedLen" value="28" />
-      <param name="pRounding" value="round" />
-      <param name="pMaqSoapAlign" value="-1" />
-      <param name="pMinInsert" value="0" />
-      <param name="pMaxAlignAttempt" value="100" />
-      <param name="pForwardAlign" value="forward" />
-      <param name="pReverseAlign" value="reverse" />
-      <param name="pTryHard" value="noTryHard" />
-      <param name="pValAlign" value="1" />
-      <param name="pAllValAligns" value="noAllValAligns" />
-      <param name="pSuppressAlign" value="-1" />
-      <param name="pUnmappedFile" value="true" />
-      <param name="pMaxFile" value="false" />
-      <param name="pBest" value="doBest" />
-      <param name="pdMaxBacktracks" value="800" />
-      <param name="pdStrata" value="noStrata" />
-      <param name="pOffrate" value="-1" />
-      <param name="pSeed" value="-1" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out7.sam" sort="True" />
-      <output name="output_unmapped_reads_l" ftype="fastqsolexa" file="bowtie_out8_1.fastqsolexa.sorted" sort="True" />
-      <output name="output_unmapped_reads_r" ftype="fastqsolexa" file="bowtie_out8_2.fastqsolexa.sorted" sort="True" />
-    </test>
-    <!-- end testing of non-sanger variant fastq reads -->
-    <test>
-      <!--
-      Bowtie command:
-      bowtie -q -p 4 -S +sam-nohead -n 2 -e 70 -l 28 +maxbts 125 -y -k 1 chrM_base test-data/bowtie_in2.fastqsanger > bowtie_out9_u.sam
-      sort bowtie_out9_u.sam > bowtie_out9.sam
-      -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes.
-      chrM_base is the index files' location/base name.
-      -->
-      <param name="genomeSource" value="indexed" />
-      <!-- this is the backwards-compatible "unique value" for this index, not an actual path -->
-      <param name="index" value="equCab2chrM" />
-      <param name="sPaired" value="single" />
-      <param name="sInput1" ftype="fastqsanger" value="bowtie_in2.fastqsanger" />
-      <param name="sSettingsType" value="full" />
-      <param name="sSkip" value="0" />
-      <param name="sAlignLimit" value="-1" />
-      <param name="sTrimH" value="0" />
-      <param name="sTrimL" value="0" />
-      <param name="sMismatchSeed" value="2" />
-      <param name="sMismatchQual" value="70" />
-      <param name="sSeedLen" value="28" />
-      <param name="sRounding" value="round" />
-      <param name="sMaqSoapAlign" value="-1" />
-      <param name="sTryHard" value="doTryHard" />
-      <param name="sValAlign" value="1" />
-      <param name="sAllValAligns" value="noAllValAligns" />
-      <param name="sSuppressAlign" value="-1" />
-      <param name="sUnmappedFile" value="false" />
-      <param name="sMaxFile" value="false" />
-      <param name="sBest" value="noBest" />
-      <param name="snMaxBacktracks" value="125" />
-      <param name="sOffrate" value="-1" />
-      <param name="sSeed" value="-1" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out9.sam" sort="True" />
-    </test>
-    <test>
-      <!--
-      Bowtie command:
-      bowtie-build +offrate 5 +ftabchars 10 +little -f test-data/phiX.fasta phiX_base
-      bowtie -q -X 1000 +ff -p 4 -S +sam-nohead phiX_base -1 test-data/bowtie_in5.fastqsanger -2 test-data/bowtie_in6.fastqsanger > bowtie_out10_u.sam
-      sort bowtie_out10_u.sam > bowtie_out10.sam
-      -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes.
-      chrM_base is the index files' location/base name.
-      -->
-      <param name="genomeSource" value="history" />
-      <param name="ownFile" value="phiX.fasta" />
-      <param name="indexSettings" value="indexFull" />
-      <param name="autoB" value="auto" />
-      <param name="nodc" value="dc" />
-      <param name="noref" value="ref" />
-      <param name="offrate" value="5" />
-      <param name="ftab" value="10" />
-      <param name="ntoa" value="no" />
-      <param name="endian" value="little" />
-      <param name="seed" value="-1" />
-      <param name="cutoff" value="-1" />
-      <param name="sPaired" value="paired" />
-      <param name="pInput1" ftype="fastqsanger" value="bowtie_in5.fastqsanger" />
-      <param name="pInput2" ftype="fastqsanger" value="bowtie_in6.fastqsanger" />
-      <param name="pMaxInsert" value="1000" />
-      <param name="pMateOrient" value="ff" />
-      <param name="pSettingsType" value="preSet" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" ftype="sam" file="bowtie_out10.sam" sort="True" />
-    </test>
-  </tests>
-
-  <help>
-
-**What it does**
-
-Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. It is developed by Ben Langmead and Cole Trapnell. Please cite: Langmead B, Trapnell C, Pop M, Salzberg SL. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25.
-
-.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
- .. __: http://bowtie-bio.sourceforge.net/index.shtml
-
-------
-
-**Input formats**
-
-Bowtie accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
-
-------
-
-**A Note on Built-in Reference Genomes**
-
-Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).
-
-------
-
-**Outputs**
-
-The output is in SAM format, and has the following columns::
-
-    Column  Description
-  --------  --------------------------------------------------------
-   1 QNAME  Query (pair) NAME
-   2 FLAG   bitwise FLAG
-   3 RNAME  Reference sequence NAME
-   4 POS    1-based leftmost POSition/coordinate of clipped sequence
-   5 MAPQ   MAPping Quality (Phred-scaled)
-   6 CIGAR  extended CIGAR string
-   7 MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-   8 MPOS   1-based Mate POSition
-   9 ISIZE  Inferred insert SIZE
-  10 SEQ    query SEQuence on the same strand as the reference
-  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALUE
-
-The flags are as follows::
-
-    Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-It looks like this (scroll sideways to see the entire example)::
-
-  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
-  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
-  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
-
--------
-
-**Bowtie settings**
-
-All of the options have a default value. You can change any of them. Most of the options in Bowtie have been implemented here.
-
-------
-
-**Bowtie parameter list**
-
-This is an exhaustive list of Bowtie options:
-
-For indexing (bowtie-build)::
-
-  -a                 No auto behavior. Disable the default behavior where bowtie automatically
-                     selects values for --bmax/--bmaxdivn/--dcv/--packed parameters according
-                     to the memory available. [off]
-  --packed           Packing. Use a packed representation for DNA strings. [auto]
-  --bmax INT         Suffix maximum. The maximum number of suffixes allowed in a block. [auto]
-  --bmaxdivn INT     Suffix maximum fraction. The maximum number of suffixes allowed in a block
-                     expressed as a fraction of the length of the reference. [4]
-  --dcv INT          Difference-cover sample. Use INT as the period for the difference-cover
-                     sample. [1024]
-  --nodc INT         No difference-cover sample. Disable the difference-cover sample. [off]
-  -r                 No reference indexes. Do not build the NAME.3.ebwt and NAME.4.ebwt portions
-                     of the index. Used only for paired-end alignment. [off]
-  -o                 Offrate. How many Burrows-Wheeler rows get marked by the indexer. The
-                     indexer will mark every 2^INT rows. The marked rows correspond to rows on
-                     the genome. [5]
-  -t INT             Ftab. The lookup table used to calculate an initial Burrows-Wheeler range
-                     with respect to the first INT characters of the query. Ftab is 4^INT+1
-                     bytes. [10]
-  --ntoa             N conversion. Convert Ns to As before building the index. Otherwise, Ns are
-                     simply excluded from the index and Bowtie will not find alignments that
-                     overlap them. [off]
-  --big              Endianness. Endianness to use when serializing integers to the index file. [off]
-  --little           Endianness. [--little]
-  --seed INT         Random seed. Use INT as the seed for the pseudo-random number generator. [off]
-  --cutoff INT       Cutoff. Index only the first INT bases of the reference sequences (cumulative
-                     across sequences) and ignore the rest. [off]
-
-For aligning (bowtie)::
-
-  -s INT             Skip. Do not align the first INT reads or pairs in the input. [off]
-  -u INT             Align limit. Only align the first INT reads/pairs from the input. [no limit]
-  -5 INT             High-quality trim. Trim INT bases from the high-quality (left) end of each
-                     read before alignment. [0]
-  -3 INT             Low-quality trim. Trim INT bases from the low-quality (right) end of each
-                     read before alignment. [0]
-  -n INT             Mismatch seed. Maximum number of mismatches permitted in the seed (defined
-                     with seed length option). Can be 0, 1, 2, or 3. [2]
-  -e INT             Mismatch quality. Maximum permitted total of quality values at mismatched
-                     read positions. Bowtie rounds quality values to the nearest 10 and saturates
-                     at 30. [70]
-  -l INT             Seed length. The number of bases on the high-quality end of the read to
-                     which the -n ceiling applies. Must be at least 5. [28]
-  --nomaqround       Suppress MAQ rounding. Values are internally rounded to the nearest 10 and
-                     saturate at 30. This options turns off that rounding. [off]
-  -v INT             MAQ- or SOAP-like alignment policy. This option turns off the default
-                     MAQ-like alignment policy in favor of a SOAP-like one. End-to-end alignments
-                     with at most INT mismatches. [off]
-  -I INT             Minimum insert. The minimum insert size for valid paired-end alignments.
-                     Does checking on untrimmed reads if -5 or -3 is used. [0]
-  -X INT             Maximum insert. The maximum insert size for valid paired-end alignments.
-                     Does checking on untrimmed reads if -5 or -3 is used. [250]
-  --fr               Mate orientation. The upstream/downstream mate orientations for a valid
-                     paired-end alignment against the forward reference strand. [--fr]
-  --rf               Mate orientation. [off]
-  --ff               Mate orientation. [off]
-  --pairtries INT    Maximum alignment attempts for paired-end data. [100]
-  --nofw             No forward aligning. Choosing this option means that Bowtie will not attempt
-                     to align against the forward reference strand. [off]
-  --norc             No reverse-complement aligning. Setting this will mean that Bowtie will not
-                     attempt to align against the reverse-complement reference strand. [off]
-  --un FILENAME      Write all reads that could not be aligned to file [off]
-  --max FILENAME     Write all reads with a number of valid alignments exceeding the limit
-                     set with the -m option to file [off]
-  --maxbts INT       Maximum backtracks. The maximum number of backtracks permitted when aligning
-                     a read in -n 2 or -n 3 mode. [125 without --best] [800 with --best]
-  -y                 Try hard. Try as hard as possible to find valid alignments when they exist,
-                     including paired-end alignments. [off]
-  --chunkmbs INT     Thread memory. The number of megabytes of memory a given thread is given to
-                     store path descriptors in --best mode. [32]
-  -k INT             Valid alignments. The number of valid alignments per read or pair. [off]
-  -a                 All valid alignments. Choosing this means that all valid alignments per read
-                     or pair will be reported. [off]
-  -m INT             Suppress alignments. Suppress all alignments for a particular read or pair
-                     if more than INT reportable alignments exist for it. [no limit]
-  --best             Best mode. Make Bowtie guarantee that reported singleton alignments are
-                     "best" in terms of stratum (the number of mismatches) and quality values at
-                     mismatched position. [off]
-  --strata           Best strata. When running in best mode, report alignments that fall into the
-                     best stratum if there are ones falling into more than one. [off]
-  -o INT             Offrate override. Override the offrate of the index with INT. Some row
-                     markings are discarded when index read into memory. INT must be greater than
-                     the value used to build the index (default: 5). [off]
-  --seed INT         Random seed. Use INT as the seed for the pseudo-random number generator. [off]
-  --snpphred INT     Use INT as the SNP penalty for decoding colorspace alignments. True ratio of
-                     SNPs per base in the subject genome. [see --snpfrac]
-  --snpfrac DEC      Use DEC as the estimated ratio of SNPs per base when decoding colorspace
-                     alignments. [0.001]
-  --col-keepends     Keep the extreme-end nucleotides and qualities when decoding colorspace
-                     alignments. [off]
-
-  </help>
-</tool>
--- a/tools/sr_mapping/bwa_color_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,482 +0,0 @@
-<tool id="bwa_color_wrapper" name="Map with BWA for SOLiD" version="1.0.1">
-  <description></description>
-  <parallelism method="basic"></parallelism>
-  <command interpreter="python">
-    bwa_wrapper.py
-      --threads="4"
-      --color-space
-
-      ## reference source
-      --fileSource=$genomeSource.refGenomeSource
-      #if $genomeSource.refGenomeSource == "history":
-        ##build index on the fly
-        --ref="${genomeSource.ownFile}"
-        --dbkey=$dbkey
-      #else:
-        ##use precomputed indexes
-        --ref="${ filter( lambda x: str( x[0] ) == str( $genomeSource.indices ), $__app__.tool_data_tables[ 'bwa_indexes_color' ].get_fields() )[0][-1] }"
-        --do_not_build_index
-      #end if
-
-      ## input file(s)
-      --input1=$paired.input1
-      #if $paired.sPaired == "paired":
-        --input2=$paired.input2
-      #end if
-
-      ## output file
-      --output=$output
-
-      ## run parameters
-      --genAlignType=$paired.sPaired
-      --params=$params.source_select
-      #if $params.source_select != "pre_set":
-        --maxEditDist=$params.maxEditDist
-        --fracMissingAligns=$params.fracMissingAligns
-        --maxGapOpens=$params.maxGapOpens
-        --maxGapExtens=$params.maxGapExtens
-        --disallowLongDel=$params.disallowLongDel
-        --disallowIndel=$params.disallowIndel
-        --seed=$params.seed
-        --maxEditDistSeed=$params.maxEditDistSeed
-        --mismatchPenalty=$params.mismatchPenalty
-        --gapOpenPenalty=$params.gapOpenPenalty
-        --gapExtensPenalty=$params.gapExtensPenalty
-        --suboptAlign=$params.suboptAlign
-        --noIterSearch=$params.noIterSearch
-        --outputTopN=$params.outputTopN
-        --outputTopNDisc=$params.outputTopNDisc
-        --maxInsertSize=$params.maxInsertSize
-        --maxOccurPairing=$params.maxOccurPairing
-        #if $params.readGroup.specReadGroup == "yes"
-          --rgid="$params.readGroup.rgid"
-          --rgcn="$params.readGroup.rgcn"
-          --rgds="$params.readGroup.rgds"
-          --rgdt="$params.readGroup.rgdt"
-          --rgfo="$params.readGroup.rgfo"
-          --rgks="$params.readGroup.rgks"
-          --rglb="$params.readGroup.rglb"
-          --rgpg="$params.readGroup.rgpg"
-          --rgpi="$params.readGroup.rgpi"
-          --rgpl="$params.readGroup.rgpl"
-          --rgpu="$params.readGroup.rgpu"
-          --rgsm="$params.readGroup.rgsm"
-        #end if
-      #end if
-
-      ## suppress output SAM header
-      --suppressHeader=$suppressHeader
-  </command>
-  <requirements>
-    <requirement type="package">bwa</requirement>
-  </requirements>
-  <inputs>
-    <conditional name="genomeSource">
-      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="indices" type="select" label="Select a reference genome">
-          <options from_data_table="bwa_indexes_color">
-            <filter type="sort_by" column="2" />
-            <validator type="no_options" message="No indexes are available for the selected input dataset" />
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
-      </when>
-    </conditional>
-    <conditional name="paired">
-      <param name="sPaired" type="select" label="Is this library mate-paired?">
-        <option value="single">Single-end</option>
-        <option value="paired">Paired-end</option>
-      </param>
-      <when value="single">
-        <param name="input1" type="data" format="fastqcssanger" label="FASTQ file (Nucleotide-space recoded from color-space)">
-          <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
-        </param>
-      </when>
-      <when value="paired">
-        <param name="input1" type="data" format="fastqcssanger" label="Forward FASTQ file (Nucleotide-space recoded from color-space)" help="Must have Sanger-scaled quality values with ASCII offset 33">
-          <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
-        </param>
-        <param name="input2" type="data" format="fastqcssanger" label="Reverse FASTQ file (Nucleotide-space recoded from color-space)" help="Must have Sanger-scaled quality values with ASCII offset 33">
-          <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
-        </param>
-      </when>
-    </conditional>
-    <conditional name="params">
-      <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
-        <option value="pre_set">Commonly Used</option>
-        <option value="full">Full Parameter List</option>
-      </param>
-      <when value="pre_set" />
-      <when value="full">
-        <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
-        <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
-        <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
-        <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
-        <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
-        <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
-        <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
-        <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
-        <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
-        <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
-        <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
-        <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
-        <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
-        <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
-        <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
-        <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
-        <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
-        <conditional name="readGroup">
-          <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
-            <option value="yes">Yes</option>
-            <option value="no" selected="True">No</option>
-          </param>
-          <when value="yes">
-            <param name="rgid" type="text" size="25" label="Read group identiﬁer (ID). Each @RG line must have a unique ID. The value of ID is used in the RG
-tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group
-IDs may be modiﬁed when merging SAM ﬁles in order to handle collisions." />
-            <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
-            <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
-            <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
-            <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each
-ﬂow of each read." help="Optional. Multi-base ﬂows are encoded in IUPAC format, and non-nucleotide ﬂows by
-various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
-            <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
-            <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
-            <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
-            <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
-            <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA,
-SOLID, HELICOS, IONTORRENT and PACBIO" />
-            <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identiﬁer (e.g. ﬂowcell-barcode.lane for Illumina or slide for SOLiD)" />
-            <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
-          </when>
-          <when value="no" />
-        </conditional>
-      </when>
-    </conditional>
-    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
-  </inputs>
-  <outputs>
-    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
-      <actions>
-        <conditional name="genomeSource.refGenomeSource">
-          <when value="indexed">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="bwa_indexes_color" column="1">
-                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                <filter type="param_value" ref="genomeSource.indices" column="0" />
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      BWA commands:
-      cp test-data/hg19chrX_midpart.fasta hg19chrX_midpart.fasta
-      bwa index -c -a is hg19chrX_midpart.fasta
-      bwa aln -t 4 -c hg19chrX_midpart.fasta test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out4.sai
-      bwa samse hg19chrX_midpart.fasta bwa_wrapper_out4.sai test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out4.u.sam
-      hg19chrX_midpart.fasta is the prefix for the reference files (hg19chrX_midpart.fasta.amb, hg19chrX_midpart.fasta.ann, hg19chrX_midpart.fasta.bwt, ...)
-      It's just part of hg19 chrX, from the middle of the chromosome
-      plain old sort doesn't handle underscores like python:
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out4.u.sam bwa_wrapper_out4.sam
-      -->
-      <param name="refGenomeSource" value="history" />
-      <param name="ownFile" value="hg19chrX_midpart.fasta" />
-      <param name="sPaired" value="single" />
-      <param name="input1" value="bwa_wrapper_in4.fastqcssanger" ftype="fastqcssanger" />
-      <param name="source_select" value="pre_set" />
-      <param name="suppressHeader" value="false" />
-      <output name="output" file="bwa_wrapper_out4.sam" ftype="sam" sort="True" lines_diff="2" />
-    </test>
-    <test>
-      <!--
-      BWA commands:
-      bwa aln -t 4 -c equCab2chrM_cs.fa test-data/bwa_wrapper_in5.fastqcssanger > bwa_wrapper_out5a.sai
-      bwa aln -t 4 -c equCab2chrM_cs.fa test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out5b.sai
-      bwa sampe equCab2chrM_cs.fa bwa_wrapper_out5a.sai bwa_wrapper_out5b.sai test-data/bwa_wrapper_in5.fastqcssanger test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out5.u.sam
-      equCab2chrM_cs.fa is the prefix of the index files (equCab2chrM_cs.fa.amb, equCab2chrM_cs.fa.ann, ...)
-      remove the comment lines (beginning with '@') from the resulting sam file
-      plain old sort doesn't handle underscores like python:
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out5.u.sam bwa_wrapper_out5.sam
-      -->
-      <param name="refGenomeSource" value="indexed" />
-      <param name="indices" value="equCab2chrM" />
-      <param name="sPaired" value="paired" />
-      <param name="input1" value="bwa_wrapper_in5.fastqcssanger" ftype="fastqcssanger" />
-      <param name="input2" value="bwa_wrapper_in6.fastqcssanger" ftype="fastqcssanger" />
-      <param name="source_select" value="pre_set" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" file="bwa_wrapper_out5.sam" ftype="sam" sort="True" />
-    </test>
-    <test>
-      <!--
-      BWA commands:
-      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c hg19chrX_midpart.fasta test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out6.sai
-      bwa samse -n 3 -r "@RG\tID:474747\tDS:description\tDT:2011-03-14\tLB:lib-child-1-A\tPI:200\tPL:SOLID\tSM:child-1" hg19chrX_midpart.fasta bwa_wrapper_out6.sai test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out6.u.sam
-      hg19chrX_midpart_cs.fa is the prefix of the index files (hg19chrX_midpart.fa.amb, hg19chrX_midpart.fa.ann, ...)
-      (It's just part of hg19 chrX, from the middle of the chromosome)
-      plain old sort doesn't handle underscores like python:
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out6.u.sam bwa_wrapper_out6.sam
-      -->
-      <param name="refGenomeSource" value="indexed" />
-      <param name="indices" value="hg19chrX_midpart" />
-      <param name="sPaired" value="single" />
-      <param name="input1" value="bwa_wrapper_in4.fastqcssanger" ftype="fastqcssanger" />
-      <param name="source_select" value="full" />
-      <param name="maxEditDist" value="0" />
-      <param name="fracMissingAligns" value="0.04" />
-      <param name="maxGapOpens" value="1" />
-      <param name="maxGapExtens" value="-1" />
-      <param name="disallowLongDel" value="16" />
-      <param name="disallowIndel" value="5" />
-      <param name="seed" value="-1" />
-      <param name="maxEditDistSeed" value="2" />
-      <param name="mismatchPenalty" value="3" />
-      <param name="gapOpenPenalty" value="11" />
-      <param name="gapExtensPenalty" value="4" />
-      <param name="suboptAlign" value="true" />
-      <param name="noIterSearch" value="true" />
-      <param name="outputTopN" value="3" />
-      <param name="outputTopNDisc" value="10" />
-      <param name="maxInsertSize" value="500" />
-      <param name="maxOccurPairing" value="100000" />
-      <param name="specReadGroup" value="yes" />
-      <param name="rgid" value="474747" />
-      <param name="rgcn" value="" />
-      <param name="rgds" value="description" />
-      <param name="rgdt" value="2011-03-14" />
-      <param name="rgfo" value="" />
-      <param name="rgks" value="" />
-      <param name="rglb" value="lib-child-1-A" />
-      <param name="rgpg" value="" />
-      <param name="rgpi" value="200" />
-      <param name="rgpl" value="SOLID" />
-      <param name="rgpu" value="" />
-      <param name="rgsm" value="child-1" />
-      <param name="suppressHeader" value="false" />
-      <output name="output" file="bwa_wrapper_out6.sam" ftype="sam" sort="True" lines_diff="2" />
-    </test>
-    <test>
-      <!--
-      BWA commands:
-      cp test-data/chr_m.fasta chr_m.fasta
-      bwa index -c -a is chr_m.fasta
-      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c chr_m.fasta test-data/bwa_wrapper_in5.fastqcssanger > bwa_wrapper_out7a.sai
-      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c chr_m.fasta test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out7b.sai
-      bwa sampe -a 100 -o 2 -n 3 -N 10 chr_m.fasta bwa_wrapper_out7a.sai bwa_wrapper_out7b.sai test-data/bwa_wrapper_in5.fastqcssanger test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out7.u.sam
-      chr_m.fasta is the prefix of the index files (chr_m.fasta.amb, chr_m.fasta.ann, ...)
-      plain old sort doesn't handle underscores like python:
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out7.u.sam bwa_wrapper_out7.sam
-      -->
-      <param name="refGenomeSource" value="history" />
-      <param name="ownFile" value="chr_m.fasta" />
-      <param name="sPaired" value="paired" />
-      <param name="input1" value="bwa_wrapper_in5.fastqcssanger" ftype="fastqcssanger" />
-      <param name="input2" value="bwa_wrapper_in6.fastqcssanger" ftype="fastqcssanger" />
-      <param name="source_select" value="full" />
-      <param name="maxEditDist" value="0" />
-      <param name="fracMissingAligns" value="0.04" />
-      <param name="maxGapOpens" value="1" />
-      <param name="maxGapExtens" value="-1" />
-      <param name="disallowLongDel" value="16" />
-      <param name="disallowIndel" value="5" />
-      <param name="seed" value="-1" />
-      <param name="maxEditDistSeed" value="2" />
-      <param name="mismatchPenalty" value="3" />
-      <param name="gapOpenPenalty" value="11" />
-      <param name="gapExtensPenalty" value="4" />
-      <param name="suboptAlign" value="true" />
-      <param name="noIterSearch" value="true" />
-      <param name="outputTopN" value="3" />
-      <param name="outputTopNDisc" value="10" />
-      <param name="maxInsertSize" value="100" />
-      <param name="maxOccurPairing" value="2" />
-      <param name="specReadGroup" value="no" />
-      <param name="suppressHeader" value="false" />
-      <output name="output" file="bwa_wrapper_out7.sam" ftype="sam" sort="True" lines_diff="2" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
- .. __: http://bio-bwa.sourceforge.net/
-
-------
-
-**Input formats**
-
-BWA accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files, set to either FASTQ Sanger or FASTQ Color Space Sanger as appropriate.
-
-If you have Color Space Sanger, it must be converted to nucleotide-space first. To do this, use the Manipulate FASTQ tool under NGS: QC and manipulation, with the following settings:
-    Manipulate reads on Sequence Content, choosing Change Adapter Base, and having the text box empty.
-    Manipulate reads on Sequence Content, doing a String Translate from "01234." to "ACGTN".
-
-
-------
-
-**A Note on Built-in Reference Genomes**
-
-Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).
-
-------
-
-**Outputs**
-
-The output is in SAM format, and has the following columns::
-
-    Column  Description
-  --------  --------------------------------------------------------
-  1  QNAME  Query (pair) NAME
-  2  FLAG   bitwise FLAG
-  3  RNAME  Reference sequence NAME
-  4  POS    1-based leftmost POSition/coordinate of clipped sequence
-  5  MAPQ   MAPping Quality (Phred-scaled)
-  6  CIGAR  extended CIGAR string
-  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-  8  MPOS   1-based Mate POSition
-  9  ISIZE  Inferred insert SIZE
-  10 SEQ    query SEQuence on the same strand as the reference
-  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
-
-The flags are as follows::
-
-    Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-It looks like this (scroll sideways to see the entire example)::
-
-  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
-  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
-  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
-
--------
-
-**BWA settings**
-
-All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
-
-------
-
-**BWA parameter list**
-
-This is an exhaustive list of BWA options:
-
-For **aln**::
-
-  -n NUM  Maximum edit distance if the value is INT, or the fraction of missing
-          alignments given 2% uniform base error rate if FLOAT. In the latter
-          case, the maximum edit distance is automatically chosen for different
-          read lengths. [0.04]
-  -o INT  Maximum number of gap opens [1]
-  -e INT  Maximum number of gap extensions, -1 for k-difference mode
-          (disallowing long gaps) [-1]
-  -d INT  Disallow a long deletion within INT bp towards the 3'-end [16]
-  -i INT  Disallow an indel within INT bp towards the ends [5]
-  -l INT  Take the first INT subsequence as seed. If INT is larger than the
-          query sequence, seeding will be disabled. For long reads, this option
-          is typically ranged from 25 to 35 for '-k 2'. [inf]
-  -k INT  Maximum edit distance in the seed [2]
-  -t INT  Number of threads (multi-threading mode) [1]
-  -M INT  Mismatch penalty. BWA will not search for suboptimal hits with a score
-          lower than (bestScore-misMsc). [3]
-  -O INT  Gap open penalty [11]
-  -E INT  Gap extension penalty [4]
-  -c      Reverse query but not complement it, which is required for alignment
-          in the color space.
-  -R      Proceed with suboptimal alignments even if the top hit is a repeat. By
-          default, BWA only searches for suboptimal alignments if the top hit is
-          unique. Using this option has no effect on accuracy for single-end
-          reads. It is mainly designed for improving the alignment accuracy of
-          paired-end reads. However, the pairing procedure will be slowed down,
-          especially for very short reads (~32bp).
-  -N      Disable iterative search. All hits with no more than maxDiff
-          differences will be found. This mode is much slower than the default.
-
-For **samse**::
-
-  -n INT  Maximum number of alignments to output in the XA tag for reads paired
-          properly. If a read has more than INT hits, the XA tag will not be
-          written. [3]
-  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
-
-For **sampe**::
-
-  -a INT  Maximum insert size for a read pair to be considered as being mapped
-          properly. Since version 0.4.5, this option is only used when there
-          are not enough good alignment to infer the distribution of insert
-          sizes. [500]
-  -n INT  Maximum number of alignments to output in the XA tag for reads paired
-          properly. If a read has more than INT hits, the XA tag will not be
-          written. [3]
-  -N INT  Maximum number of alignments to output in the XA tag for disconcordant
-          read pairs (excluding singletons). If a read has more than INT hits,
-          the XA tag will not be written. [10]
-  -o INT  Maximum occurrences of a read for pairing. A read with more
-          occurrences will be treated as a single-end read. Reducing this
-          parameter helps faster pairing. [100000]
-  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
-
-For specifying the read group in **samse** or **sampe**, use the following::
-
-  @RG   Read group. Unordered multiple @RG lines are allowed.
-  ID    Read group identiﬁer. Each @RG line must have a unique ID. The value of
-        ID is used in the RG tags of alignment records. Must be unique among all
-        read groups in header section. Read group IDs may be modiﬁed when
-        merging SAM ﬁles in order to handle collisions.
-  CN    Name of sequencing center producing the read.
-  DS    Description.
-  DT    Date the run was produced (ISO8601 date or date/time).
-  FO    Flow order. The array of nucleotide bases that correspond to the
-        nucleotides used for each flow of each read. Multi-base flows are encoded
-        in IUPAC format, and non-nucleotide flows by various other characters.
-        Format : /\*|[ACMGRSVTWYHKDBN]+/
-  KS    The array of nucleotide bases that correspond to the key sequence of each read.
-  LB    Library.
-  PG    Programs used for processing the read group.
-  PI    Predicted median insert size.
-  PL    Platform/technology used to produce the reads. Valid values : CAPILLARY,
-        LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO.
-  PU    Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for
-        SOLiD). Unique identiﬁer.
-  SM    Sample. Use pool name where a pool is being sequenced.
-
-  </help>
-</tool>
-
-
--- a/tools/sr_mapping/bwa_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,342 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs BWA on single-end or paired-end data.
-Produces a SAM file containing the mappings.
-Works with BWA version 0.5.9.
-
-usage: bwa_wrapper.py [options]
-
-See below for options
-"""
-
-import optparse, os, shutil, subprocess, sys, tempfile
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def check_is_double_encoded( fastq ):
-    # check that first read is bases, not one base followed by numbers
-    bases = [ 'A', 'C', 'G', 'T', 'a', 'c', 'g', 't', 'N' ]
-    nums = [ '0', '1', '2', '3' ]
-    for line in file( fastq, 'rb'):
-        if not line.strip() or line.startswith( '@' ):
-            continue
-        if len( [ b for b in line.strip() if b in nums ] ) > 0:
-            return False
-        elif line.strip()[0] in bases and len( [ b for b in line.strip() if b in bases ] ) == len( line.strip() ):
-            return True
-        else:
-            raise Exception, 'First line in first read does not appear to be a valid FASTQ read in either base-space or color-space'
-    raise Exception, 'There is no non-comment and non-blank line in your FASTQ file'
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to use' )
-    parser.add_option( '-c', '--color-space', dest='color_space', action='store_true', help='If the input files are SOLiD format' )
-    parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
-    parser.add_option( '-f', '--input1', dest='fastq', help='The (forward) fastq file to use for the mapping' )
-    parser.add_option( '-F', '--input2', dest='rfastq', help='The reverse fastq file to use for mapping if paired-end data' )
-    parser.add_option( '-u', '--output', dest='output', help='The file to save the output (SAM format)' )
-    parser.add_option( '-g', '--genAlignType', dest='genAlignType', help='The type of pairing (single or paired)' )
-    parser.add_option( '-p', '--params', dest='params', help='Parameter setting to use (pre_set or full)' )
-    parser.add_option( '-s', '--fileSource', dest='fileSource', help='Whether to use a previously indexed reference sequence or one form history (indexed or history)' )
-    parser.add_option( '-n', '--maxEditDist', dest='maxEditDist', help='Maximum edit distance if integer' )
-    parser.add_option( '-m', '--fracMissingAligns', dest='fracMissingAligns', help='Fraction of missing alignments given 2% uniform base error rate if fraction' )
-    parser.add_option( '-o', '--maxGapOpens', dest='maxGapOpens', help='Maximum number of gap opens' )
-    parser.add_option( '-e', '--maxGapExtens', dest='maxGapExtens', help='Maximum number of gap extensions' )
-    parser.add_option( '-d', '--disallowLongDel', dest='disallowLongDel', help='Disallow a long deletion within specified bps' )
-    parser.add_option( '-i', '--disallowIndel', dest='disallowIndel', help='Disallow indel within specified bps' )
-    parser.add_option( '-l', '--seed', dest='seed', help='Take the first specified subsequences' )
-    parser.add_option( '-k', '--maxEditDistSeed', dest='maxEditDistSeed', help='Maximum edit distance to the seed' )
-    parser.add_option( '-M', '--mismatchPenalty', dest='mismatchPenalty', help='Mismatch penalty' )
-    parser.add_option( '-O', '--gapOpenPenalty', dest='gapOpenPenalty', help='Gap open penalty' )
-    parser.add_option( '-E', '--gapExtensPenalty', dest='gapExtensPenalty', help='Gap extension penalty' )
-    parser.add_option( '-R', '--suboptAlign', dest='suboptAlign', help='Proceed with suboptimal alignments even if the top hit is a repeat' )
-    parser.add_option( '-N', '--noIterSearch', dest='noIterSearch', help='Disable iterative search' )
-    parser.add_option( '-T', '--outputTopN', dest='outputTopN', help='Maximum number of alignments to output in the XA tag for reads paired properly' )
-    parser.add_option( '', '--outputTopNDisc', dest='outputTopNDisc', help='Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)' )
-    parser.add_option( '-S', '--maxInsertSize', dest='maxInsertSize', help='Maximum insert size for a read pair to be considered mapped good' )
-    parser.add_option( '-P', '--maxOccurPairing', dest='maxOccurPairing', help='Maximum occurrences of a read for pairings' )
-    parser.add_option( '', '--rgid', dest='rgid', help='Read group identifier' )
-    parser.add_option( '', '--rgcn', dest='rgcn', help='Sequencing center that produced the read' )
-    parser.add_option( '', '--rgds', dest='rgds', help='Description' )
-    parser.add_option( '', '--rgdt', dest='rgdt', help='Date that run was produced (ISO8601 format date or date/time, like YYYY-MM-DD)' )
-    parser.add_option( '', '--rgfo', dest='rgfo', help='Flow order' )
-    parser.add_option( '', '--rgks', dest='rgks', help='The array of nucleotide bases that correspond to the key sequence of each read' )
-    parser.add_option( '', '--rglb', dest='rglb', help='Library name' )
-    parser.add_option( '', '--rgpg', dest='rgpg', help='Programs used for processing the read group' )
-    parser.add_option( '', '--rgpi', dest='rgpi', help='Predicted median insert size' )
-    parser.add_option( '', '--rgpl', dest='rgpl', choices=[ 'CAPILLARY', 'LS454', 'ILLUMINA', 'SOLID', 'HELICOS', 'IONTORRENT' and 'PACBIO' ], help='Platform/technology used to produce the reads' )
-    parser.add_option( '', '--rgpu', dest='rgpu', help='Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)' )
-    parser.add_option( '', '--rgsm', dest='rgsm', help='Sample' )
-    parser.add_option( '-D', '--dbkey', dest='dbkey', help='Dbkey for reference genome' )
-    parser.add_option( '-X', '--do_not_build_index', dest='do_not_build_index', action='store_true', help="Don't build index" )
-    parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
-    parser.add_option( '-I', '--illumina1.3', dest='illumina13qual', help='Input FASTQ files have Illuina 1.3 quality scores' )
-    (options, args) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='bwa 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'BWA %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine BWA version\n' )
-
-    # check for color space fastq that's not double-encoded and exit if appropriate
-    if options.color_space:
-        if not check_is_double_encoded( options.fastq ):
-            stop_err( 'Your file must be double-encoded (it must be converted from "numbers" to "bases"). See the help section for details' )
-        if options.genAlignType == 'paired':
-            if not check_is_double_encoded( options.rfastq ):
-                stop_err( 'Your reverse reads file must also be double-encoded (it must be converted from "numbers" to "bases"). See the help section for details' )
-
-    fastq = options.fastq
-    if options.rfastq:
-         rfastq = options.rfastq
-
-    # set color space variable
-    if options.color_space:
-        color_space = '-c'
-    else:
-        color_space = ''
-
-    # make temp directory for placement of indices
-    tmp_index_dir = tempfile.mkdtemp()
-    tmp_dir = tempfile.mkdtemp()
-    # index if necessary
-    if options.fileSource == 'history' and not options.do_not_build_index:
-        ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
-        ref_file_name = ref_file.name
-        ref_file.close()
-        os.symlink( options.ref, ref_file_name )
-        # determine which indexing algorithm to use, based on size
-        try:
-            size = os.stat( options.ref ).st_size
-            if size <= 2**30:
-                indexingAlg = 'is'
-            else:
-                indexingAlg = 'bwtsw'
-        except:
-            indexingAlg = 'is'
-        indexing_cmds = '%s -a %s' % ( color_space, indexingAlg )
-        cmd1 = 'bwa index %s %s' % ( indexing_cmds, ref_file_name )
-        try:
-            tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            # clean up temp dirs
-            if os.path.exists( tmp_index_dir ):
-                shutil.rmtree( tmp_index_dir )
-            if os.path.exists( tmp_dir ):
-                shutil.rmtree( tmp_dir )
-            stop_err( 'Error indexing reference sequence. ' + str( e ) )
-    else:
-        ref_file_name = options.ref
-    if options.illumina13qual:
-        illumina_quals = "-I"
-    else:
-        illumina_quals = ""
-
-    # set up aligning and generate aligning command options
-    if options.params == 'pre_set':
-        aligning_cmds = '-t %s %s %s' % ( options.threads, color_space, illumina_quals )
-        gen_alignment_cmds = ''
-    else:
-        if options.maxEditDist != '0':
-            editDist = options.maxEditDist
-        else:
-            editDist = options.fracMissingAligns
-        if options.seed != '-1':
-            seed = '-l %s' % options.seed
-        else:
-            seed = ''
-        if options.suboptAlign == 'true':
-            suboptAlign = '-R'
-        else:
-            suboptAlign = ''
-        if options.noIterSearch == 'true':
-            noIterSearch = '-N'
-        else:
-            noIterSearch = ''
-        aligning_cmds = '-n %s -o %s -e %s -d %s -i %s %s -k %s -t %s -M %s -O %s -E %s %s %s %s %s' % \
-                        ( editDist, options.maxGapOpens, options.maxGapExtens, options.disallowLongDel,
-                          options.disallowIndel, seed, options.maxEditDistSeed, options.threads,
-                          options.mismatchPenalty, options.gapOpenPenalty, options.gapExtensPenalty,
-                          suboptAlign, noIterSearch, color_space, illumina_quals )
-        if options.genAlignType == 'paired':
-            gen_alignment_cmds = '-a %s -o %s' % ( options.maxInsertSize, options.maxOccurPairing )
-            if options.outputTopNDisc:
-                gen_alignment_cmds += ' -N %s' % options.outputTopNDisc
-        else:
-            gen_alignment_cmds = ''
-        if options.rgid:
-            if not options.rglb or not options.rgpl or not options.rgsm:
-                stop_err( 'If you want to specify read groups, you must include the ID, LB, PL, and SM tags.' )
-            readGroup = '@RG\tID:%s\tLB:%s\tPL:%s\tSM:%s' % ( options.rgid, options.rglb, options.rgpl, options.rgsm )
-            if options.rgcn:
-                readGroup += '\tCN:%s' % options.rgcn
-            if options.rgds:
-                readGroup += '\tDS:%s' % options.rgds
-            if options.rgdt:
-                readGroup += '\tDT:%s' % options.rgdt
-            if options.rgfo:
-                readGroup += '\tFO:%s' % options.rgfo
-            if options.rgks:
-                readGroup += '\tKS:%s' % options.rgks
-            if options.rgpg:
-                readGroup += '\tPG:%s' % options.rgpg
-            if options.rgpi:
-                readGroup += '\tPI:%s' % options.rgpi
-            if options.rgpu:
-                readGroup += '\tPU:%s' % options.rgpu
-            gen_alignment_cmds += ' -r "%s"' % readGroup
-        if options.outputTopN:
-            gen_alignment_cmds += ' -n %s' % options.outputTopN
-    # set up output files
-    tmp_align_out = tempfile.NamedTemporaryFile( dir=tmp_dir )
-    tmp_align_out_name = tmp_align_out.name
-    tmp_align_out.close()
-    tmp_align_out2 = tempfile.NamedTemporaryFile( dir=tmp_dir )
-    tmp_align_out2_name = tmp_align_out2.name
-    tmp_align_out2.close()
-    # prepare actual aligning and generate aligning commands
-    cmd2 = 'bwa aln %s %s %s > %s' % ( aligning_cmds, ref_file_name, fastq, tmp_align_out_name )
-    cmd2b = ''
-    if options.genAlignType == 'paired':
-        cmd2b = 'bwa aln %s %s %s > %s' % ( aligning_cmds, ref_file_name, rfastq, tmp_align_out2_name )
-        cmd3 = 'bwa sampe %s %s %s %s %s %s >> %s' % ( gen_alignment_cmds, ref_file_name, tmp_align_out_name, tmp_align_out2_name, fastq, rfastq, options.output )
-    else:
-        cmd3 = 'bwa samse %s %s %s %s >> %s' % ( gen_alignment_cmds, ref_file_name, tmp_align_out_name, fastq, options.output )
-    # perform alignments
-    buffsize = 1048576
-    try:
-        # need to nest try-except in try-finally to handle 2.4
-        try:
-            # align
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error aligning sequence. ' + str( e )
-            # and again if paired data
-            try:
-                if cmd2b:
-                    tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                    tmp_stderr = open( tmp, 'wb' )
-                    proc = subprocess.Popen( args=cmd2b, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                    returncode = proc.wait()
-                    tmp_stderr.close()
-                    # get stderr, allowing for case where it's very large
-                    tmp_stderr = open( tmp, 'rb' )
-                    stderr = ''
-                    try:
-                        while True:
-                            stderr += tmp_stderr.read( buffsize )
-                            if not stderr or len( stderr ) % buffsize != 0:
-                                break
-                    except OverflowError:
-                        pass
-                    tmp_stderr.close()
-                    if returncode != 0:
-                        raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error aligning second sequence. ' + str( e )
-            # generate align
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=cmd3, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error generating alignments. ' + str( e )
-            # remove header if necessary
-            if options.suppressHeader == 'true':
-                tmp_out = tempfile.NamedTemporaryFile( dir=tmp_dir)
-                tmp_out_name = tmp_out.name
-                tmp_out.close()
-                try:
-                    shutil.move( options.output, tmp_out_name )
-                except Exception, e:
-                    raise Exception, 'Error moving output file before removing headers. ' + str( e )
-                fout = file( options.output, 'w' )
-                for line in file( tmp_out.name, 'r' ):
-                    if not ( line.startswith( '@HD' ) or line.startswith( '@SQ' ) or line.startswith( '@RG' ) or line.startswith( '@PG' ) or line.startswith( '@CO' ) ):
-                        fout.write( line )
-                fout.close()
-            # check that there are results in the output file
-            if os.path.getsize( options.output ) > 0:
-                sys.stdout.write( 'BWA run on %s-end data' % options.genAlignType )
-            else:
-                raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
-        except Exception, e:
-            stop_err( 'The alignment failed.\n' + str( e ) )
-    finally:
-        # clean up temp dir
-        if os.path.exists( tmp_index_dir ):
-            shutil.rmtree( tmp_index_dir )
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-
-if __name__=="__main__": __main__()
--- a/tools/sr_mapping/bwa_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,474 +0,0 @@
-<tool id="bwa_wrapper" name="Map with BWA for Illumina" version="1.2.2">
-  <description></description>
-  <parallelism method="basic"></parallelism>
-  <command interpreter="python">
-    bwa_wrapper.py
-      --threads="4"
-
-      #if $input1.ext == "fastqillumina":
-            --illumina1.3
-      #end if
-
-      ## reference source
-      --fileSource=$genomeSource.refGenomeSource
-      #if $genomeSource.refGenomeSource == "history":
-        ##build index on the fly
-        --ref="${genomeSource.ownFile}"
-        --dbkey=$dbkey
-      #else:
-        ##use precomputed indexes
-        --ref="${ filter( lambda x: str( x[0] ) == str( $genomeSource.indices ), $__app__.tool_data_tables[ 'bwa_indexes' ].get_fields() )[0][-1] }"
-        --do_not_build_index
-      #end if
-
-      ## input file(s)
-      --input1=$paired.input1
-      #if $paired.sPaired == "paired":
-        --input2=$paired.input2
-      #end if
-
-      ## output file
-      --output=$output
-
-      ## run parameters
-      --genAlignType=$paired.sPaired
-      --params=$params.source_select
-      #if $params.source_select != "pre_set":
-        --maxEditDist=$params.maxEditDist
-        --fracMissingAligns=$params.fracMissingAligns
-        --maxGapOpens=$params.maxGapOpens
-        --maxGapExtens=$params.maxGapExtens
-        --disallowLongDel=$params.disallowLongDel
-        --disallowIndel=$params.disallowIndel
-        --seed=$params.seed
-        --maxEditDistSeed=$params.maxEditDistSeed
-        --mismatchPenalty=$params.mismatchPenalty
-        --gapOpenPenalty=$params.gapOpenPenalty
-        --gapExtensPenalty=$params.gapExtensPenalty
-        --suboptAlign=$params.suboptAlign
-        --noIterSearch=$params.noIterSearch
-        --outputTopN=$params.outputTopN
-        --outputTopNDisc=$params.outputTopNDisc
-        --maxInsertSize=$params.maxInsertSize
-        --maxOccurPairing=$params.maxOccurPairing
-        #if $params.readGroup.specReadGroup == "yes"
-          --rgid="$params.readGroup.rgid"
-          --rgcn="$params.readGroup.rgcn"
-          --rgds="$params.readGroup.rgds"
-          --rgdt="$params.readGroup.rgdt"
-          --rgfo="$params.readGroup.rgfo"
-          --rgks="$params.readGroup.rgks"
-          --rglb="$params.readGroup.rglb"
-          --rgpg="$params.readGroup.rgpg"
-          --rgpi="$params.readGroup.rgpi"
-          --rgpl="$params.readGroup.rgpl"
-          --rgpu="$params.readGroup.rgpu"
-          --rgsm="$params.readGroup.rgsm"
-        #end if
-      #end if
-
-      ## suppress output SAM header
-      --suppressHeader=$suppressHeader
-  </command>
-  <requirements>
-    <requirement type="package">bwa</requirement>
-  </requirements>
-  <inputs>
-    <conditional name="genomeSource">
-      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="indices" type="select" label="Select a reference genome">
-          <options from_data_table="bwa_indexes">
-            <filter type="sort_by" column="2" />
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
-      </when>
-    </conditional>
-    <conditional name="paired">
-      <param name="sPaired" type="select" label="Is this library mate-paired?">
-        <option value="single">Single-end</option>
-        <option value="paired">Paired-end</option>
-      </param>
-      <when value="single">
-        <param name="input1" type="data" format="fastqsanger,fastqillumina" label="FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
-      </when>
-      <when value="paired">
-        <param name="input1" type="data" format="fastqsanger,fastqillumina" label="Forward FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
-        <param name="input2" type="data" format="fastqsanger,fastqillumina" label="Reverse FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
-      </when>
-    </conditional>
-    <conditional name="params">
-      <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
-        <option value="pre_set">Commonly Used</option>
-        <option value="full">Full Parameter List</option>
-      </param>
-      <when value="pre_set" />
-      <when value="full">
-        <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
-        <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
-        <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
-        <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
-        <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
-        <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
-        <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
-        <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
-        <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
-        <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
-        <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
-        <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
-        <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
-        <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
-        <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
-        <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
-        <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
-        <conditional name="readGroup">
-          <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
-            <option value="yes">Yes</option>
-            <option value="no" selected="True">No</option>
-          </param>
-          <when value="yes">
-            <param name="rgid" type="text" size="25" label="Read group identiﬁer (ID). Each @RG line must have a unique ID. The value of ID is used in the RG
-tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group
-IDs may be modiﬁed when merging SAM ﬁles in order to handle collisions." />
-            <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
-            <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
-            <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
-            <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each
-ﬂow of each read." help="Optional. Multi-base ﬂows are encoded in IUPAC format, and non-nucleotide ﬂows by
-various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
-            <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
-            <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
-            <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
-            <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
-            <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA,
-SOLID, HELICOS, IONTORRENT and PACBIO" />
-            <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identiﬁer (e.g. ﬂowcell-barcode.lane for Illumina or slide for SOLiD)" />
-            <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
-          </when>
-          <when value="no" />
-        </conditional>
-      </when>
-    </conditional>
-    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
-  </inputs>
-  <outputs>
-    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
-      <actions>
-        <conditional name="genomeSource.refGenomeSource">
-          <when value="indexed">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="bwa_indexes" column="1">
-                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                <filter type="param_value" ref="genomeSource.indices" column="0"/>
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      BWA commands:
-      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sai
-      bwa samse phiX.fasta bwa_wrapper_out1.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sam
-      phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
-      remove the comment lines (beginning with '@') from the resulting sam file
-      plain old sort doesn't handle underscores like python:
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out1.u.sam bwa_wrapper_out1.sam
-      -->
-      <param name="refGenomeSource" value="indexed" />
-      <param name="indices" value="phiX" />
-      <param name="sPaired" value="single" />
-      <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
-      <param name="source_select" value="pre_set" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" sort="True" />
-    </test>
-    <test>
-      <!--
-      BWA commands:
-      cp test-data/phiX.fasta phiX.fasta
-      bwa index -a is phiX.fasta
-      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.sai
-      bwa samse -n 3 phiX.fasta bwa_wrapper_out2.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.u.sam
-      phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
-      remove the comment lines (beginning with '@') from the resulting sam file
-      plain old sort doesn't handle underscores like python:
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out2.u.sam bwa_wrapper_out2.sam
-      -->
-      <param name="refGenomeSource" value="history" />
-      <param name="ownFile" value="phiX.fasta" />
-      <param name="sPaired" value="single" />
-      <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
-      <param name="source_select" value="full" />
-      <param name="maxEditDist" value="0" />
-      <param name="fracMissingAligns" value="0.04" />
-      <param name="maxGapOpens" value="1" />
-      <param name="maxGapExtens" value="-1" />
-      <param name="disallowLongDel" value="16" />
-      <param name="disallowIndel" value="5" />
-      <param name="seed" value="-1" />
-      <param name="maxEditDistSeed" value="2" />
-      <param name="mismatchPenalty" value="3" />
-      <param name="gapOpenPenalty" value="11" />
-      <param name="gapExtensPenalty" value="4" />
-      <param name="suboptAlign" value="true" />
-      <param name="noIterSearch" value="true" />
-      <param name="outputTopN" value="3" />
-      <param name="outputTopNDisc" value="10" />
-      <param name="maxInsertSize" value="500" />
-      <param name="maxOccurPairing" value="100000" />
-      <param name="specReadGroup" value="no" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" sort="True" />
-    </test>
-    <test>
-      <!--
-      BWA commands:
-      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out3a.sai
-      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3b.sai
-      bwa sampe -a 500 -o 100000 -n 3 -N 10 -r "@RG\tID:abcdefg\tDS:descrip\tDT:2010-11-01\tLB:lib-mom-A\tPI:400\tPL:ILLUMINA\tSM:mom" phiX.fasta bwa_wrapper_out3a.sai bwa_wrapper_out3b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3.u.sam
-      phiX.fasta is the prefix for the reference
-      plain old sort doesn't handle underscores like python:
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out3.u.sam bwa_wrapper_out3.sam
-      -->
-      <param name="refGenomeSource" value="indexed" />
-      <param name="indices" value="phiX" />
-      <param name="sPaired" value="paired" />
-      <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
-      <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
-      <param name="source_select" value="full" />
-      <param name="maxEditDist" value="0" />
-      <param name="fracMissingAligns" value="0.04" />
-      <param name="maxGapOpens" value="1" />
-      <param name="maxGapExtens" value="-1" />
-      <param name="disallowLongDel" value="16" />
-      <param name="disallowIndel" value="5" />
-      <param name="seed" value="-1" />
-      <param name="maxEditDistSeed" value="2" />
-      <param name="mismatchPenalty" value="3" />
-      <param name="gapOpenPenalty" value="11" />
-      <param name="gapExtensPenalty" value="4" />
-      <param name="suboptAlign" value="true" />
-      <param name="noIterSearch" value="true" />
-      <param name="outputTopN" value="3" />
-      <param name="outputTopNDisc" value="10" />
-      <param name="maxInsertSize" value="500" />
-      <param name="maxOccurPairing" value="100000" />
-      <param name="specReadGroup" value="yes" />
-      <param name="rgid" value="abcdefg" />
-      <param name="rgcn" value="" />
-      <param name="rgds" value="descrip" />
-      <param name="rgdt" value="2010-11-01" />
-      <param name="rgfo" value="" />
-      <param name="rgks" value="" />
-      <param name="rglb" value="lib-mom-A" />
-      <param name="rgpg" value="" />
-      <param name="rgpi" value="400" />
-      <param name="rgpl" value="ILLUMINA" />
-      <param name="rgpu" value="" />
-      <param name="rgsm" value="mom" />
-      <param name="suppressHeader" value="false" />
-      <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" sort="True" lines_diff="2" />
-    </test>
-    <test>
-      <!--
-      BWA commands:
-      cp test-data/phiX.fasta phiX.fasta
-      bwa index -a is phiX.fasta
-      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out8a.sai
-      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8b.sai
-      bwa sampe -a 500 -o 100000 phiX.fasta bwa_wrapper_out8a.sai bwa_wrapper_out8b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8.u.sam
-      phiX.fa is the prefix for the reference
-      remove the comment lines (beginning with '@') from the resulting sam file
-      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out8.u.sam bwa_wrapper_out8.sam
-      -->
-      <param name="refGenomeSource" value="history" />
-      <!-- this is the backwards-compatible "unique value" for this index, not an actual path -->
-      <param name="ownFile" value="phiX.fasta" />
-      <param name="sPaired" value="paired" />
-      <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
-      <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
-      <param name="source_select" value="preSet" />
-      <param name="suppressHeader" value="true" />
-      <output name="output" file="bwa_wrapper_out8.sam" ftype="sam" sort="True" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
- .. __: http://bio-bwa.sourceforge.net/
-
-------
-
-**Input formats**
-
-BWA accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*) or Illumina FASTQ format (galaxy type *fastqillumina*). Use the FASTQ Groomer to prepare your files.
-
-------
-
-**A Note on Built-in Reference Genomes**
-
-Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).
-
-------
-
-**Outputs**
-
-The output is in SAM format, and has the following columns::
-
-    Column  Description
-  --------  --------------------------------------------------------
-  1  QNAME  Query (pair) NAME
-  2  FLAG   bitwise FLAG
-  3  RNAME  Reference sequence NAME
-  4  POS    1-based leftmost POSition/coordinate of clipped sequence
-  5  MAPQ   MAPping Quality (Phred-scaled)
-  6  CIGAR  extended CIGAR string
-  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-  8  MPOS   1-based Mate POSition
-  9  ISIZE  Inferred insert SIZE
-  10 SEQ    query SEQuence on the same strand as the reference
-  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
-
-The flags are as follows::
-
-    Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-It looks like this (scroll sideways to see the entire example)::
-
-  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
-  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
-  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
-
--------
-
-**BWA settings**
-
-All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
-
-------
-
-**BWA parameter list**
-
-This is an exhaustive list of BWA options:
-
-For **aln**::
-
-  -n NUM  Maximum edit distance if the value is INT, or the fraction of missing
-          alignments given 2% uniform base error rate if FLOAT. In the latter
-          case, the maximum edit distance is automatically chosen for different
-          read lengths. [0.04]
-  -o INT  Maximum number of gap opens [1]
-  -e INT  Maximum number of gap extensions, -1 for k-difference mode
-          (disallowing long gaps) [-1]
-  -d INT  Disallow a long deletion within INT bp towards the 3'-end [16]
-  -i INT  Disallow an indel within INT bp towards the ends [5]
-  -l INT  Take the first INT subsequence as seed. If INT is larger than the
-          query sequence, seeding will be disabled. For long reads, this option
-          is typically ranged from 25 to 35 for '-k 2'. [inf]
-  -k INT  Maximum edit distance in the seed [2]
-  -t INT  Number of threads (multi-threading mode) [1]
-  -M INT  Mismatch penalty. BWA will not search for suboptimal hits with a score
-          lower than (bestScore-misMsc). [3]
-  -O INT  Gap open penalty [11]
-  -E INT  Gap extension penalty [4]
-  -c      Reverse query but not complement it, which is required for alignment
-          in the color space.
-  -R      Proceed with suboptimal alignments even if the top hit is a repeat. By
-          default, BWA only searches for suboptimal alignments if the top hit is
-          unique. Using this option has no effect on accuracy for single-end
-          reads. It is mainly designed for improving the alignment accuracy of
-          paired-end reads. However, the pairing procedure will be slowed down,
-          especially for very short reads (~32bp).
-  -N      Disable iterative search. All hits with no more than maxDiff
-          differences will be found. This mode is much slower than the default.
-
-For **samse**::
-
-  -n INT  Maximum number of alignments to output in the XA tag for reads paired
-          properly. If a read has more than INT hits, the XA tag will not be
-          written. [3]
-  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
-
-For **sampe**::
-
-  -a INT  Maximum insert size for a read pair to be considered as being mapped
-          properly. Since version 0.4.5, this option is only used when there
-          are not enough good alignment to infer the distribution of insert
-          sizes. [500]
-  -n INT  Maximum number of alignments to output in the XA tag for reads paired
-          properly. If a read has more than INT hits, the XA tag will not be
-          written. [3]
-  -N INT  Maximum number of alignments to output in the XA tag for disconcordant
-          read pairs (excluding singletons). If a read has more than INT hits,
-          the XA tag will not be written. [10]
-  -o INT  Maximum occurrences of a read for pairing. A read with more
-          occurrences will be treated as a single-end read. Reducing this
-          parameter helps faster pairing. [100000]
-  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
-
-For specifying the read group in **samse** or **sampe**, use the following::
-
-  @RG   Read group. Unordered multiple @RG lines are allowed.
-  ID    Read group identiﬁer. Each @RG line must have a unique ID. The value of
-        ID is used in the RG tags of alignment records. Must be unique among all
-        read groups in header section. Read group IDs may be modiﬁed when
-        merging SAM ﬁles in order to handle collisions.
-  CN    Name of sequencing center producing the read.
-  DS    Description.
-  DT    Date the run was produced (ISO8601 date or date/time).
-  FO    Flow order. The array of nucleotide bases that correspond to the
-        nucleotides used for each ﬂow of each read. Multi-base ﬂows are encoded
-        in IUPAC format, and non-nucleotide ﬂows by various other characters.
-        Format : /\*|[ACMGRSVTWYHKDBN]+/
-  KS    The array of nucleotide bases that correspond to the key sequence of each read.
-  LB    Library.
-  PG    Programs used for processing the read group.
-  PI    Predicted median insert size.
-  PL    Platform/technology used to produce the reads. Valid values : CAPILLARY,
-        LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO.
-  PU    Platform unit (e.g. ﬂowcell-barcode.lane for Illumina or slide for
-        SOLiD). Unique identiﬁer.
-  SM    Sample. Use pool name where a pool is being sequenced.
-
-  </help>
-</tool>
-
-
--- a/tools/sr_mapping/fastq_statistics.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-<tool id="cshl_fastq_statistics" name="FASTQ Statistics">
-  <description>for Solexa file</description>
-  <command>cat $input | solexa_quality_statistics -o $output</command>
-  <inputs>
-    <param format="fastqsolexa" name="input" type="data" label="Library to analyze" />
-  </inputs>
-  <outputs>
-    <data format="txt" name="output" />
-  </outputs>
-  <help>
-
-**What it does**
-
-Creates quality statistics report for the given Solexa/FASTQ library.
-
------
-
-**The output file will contain the following fields:**
-
-* column	= column number (1 to 36 for a 36-cycles read solexa file)
-* count   = number of bases found in this column.
-* min     = Lowest quality score value found in this column.
-* max     = Highest quality score value found in this column.
-* sum     = Sum of quality score values for this column.
-* mean    = Mean quality score value for this column.
-* Q1	= 1st quartile quality score.
-* med	= Median quality score.
-* Q3	= 3rd quartile quality score.
-* IQR	= Inter-Quartile range (Q3-Q1).
-* lW	= 'Left-Whisker' value (for boxplotting).
-* rW	= 'Right-Whisker' value (for boxplotting).
-* A_Count	= Count of 'A' nucleotides found in this column.
-* C_Count	= Count of 'C' nucleotides found in this column.
-* G_Count	= Count of 'G' nucleotides found in this column.
-* T_Count	= Count of 'T' nucleotides found in this column.
-* N_Count = Count of 'N' nucleotides found in this column.
-
-
-.. class:: infomark
-
-**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
-
-
-
-
-
-**Output Example**::
-
-    column	count	min	max	sum	mean	Q1	med	Q3	IQR	lW	rW	A_Count	C_Count	G_Count	T_Count	N_Count
-    1	6362991	-4	40	250734117	39.41	40	40	40	0	40	40	1396976	1329101	678730	2958184	0
-    2	6362991	-5	40	250531036	39.37	40	40	40	0	40	40	1786786	1055766	1738025	1782414	0
-    3	6362991	-5	40	248722469	39.09	40	40	40	0	40	40	2296384	984875	1443989	1637743	0
-    4	6362991	-5	40	247654797	38.92	40	40	40	0	40	40	1683197	1410855	1722633	1546306	0
-    5	6362991	-4	40	248214827	39.01	40	40	40	0	40	40	2536861	1167423	1248968	1409739	0
-    6	6362991	-5	40	248499903	39.05	40	40	40	0	40	40	1598956	1236081	1568608	1959346	0
-    7	6362991	-4	40	247719760	38.93	40	40	40	0	40	40	1692667	1822140	1496741	1351443	0
-    8	6362991	-5	40	245745205	38.62	40	40	40	0	40	40	2230936	1343260	1529928	1258867	0
-    9	6362991	-5	40	245766735	38.62	40	40	40	0	40	40	1702064	1306257	1336511	2018159	0
-    10	6362991	-5	40	245089706	38.52	40	40	40	0	40	40	1519917	1446370	1450995	1945709	0
-    11	6362991	-5	40	242641359	38.13	40	40	40	0	40	40	1717434	1282975	1387804	1974778	0
-    12	6362991	-5	40	242026113	38.04	40	40	40	0	40	40	1662872	1202041	1519721	1978357	0
-    13	6362991	-5	40	238704245	37.51	40	40	40	0	40	40	1549965	1271411	1973291	1566681	1643
-    14	6362991	-5	40	235622401	37.03	40	40	40	0	40	40	2101301	1141451	1603990	1515774	475
-    15	6362991	-5	40	230766669	36.27	40	40	40	0	40	40	2344003	1058571	1440466	1519865	86
-    16	6362991	-5	40	224466237	35.28	38	40	40	2	35	40	2203515	1026017	1474060	1651582	7817
-    17	6362991	-5	40	219990002	34.57	34	40	40	6	25	40	1522515	1125455	2159183	1555765	73
-    18	6362991	-5	40	214104778	33.65	30	40	40	10	15	40	1479795	2068113	1558400	1249337	7346
-    19	6362991	-5	40	212934712	33.46	30	40	40	10	15	40	1432749	1231352	1769799	1920093	8998
-    20	6362991	-5	40	212787944	33.44	29	40	40	11	13	40	1311657	1411663	2126316	1513282	73
-    21	6362991	-5	40	211369187	33.22	28	40	40	12	10	40	1887985	1846300	1300326	1318380	10000
-    22	6362991	-5	40	213371720	33.53	30	40	40	10	15	40	542299	3446249	516615	1848190	9638
-    23	6362991	-5	40	221975899	34.89	36	40	40	4	30	40	347679	1233267	926621	3855355	69
-    24	6362991	-5	40	194378421	30.55	21	40	40	19	-5	40	433560	674358	3262764	1992242	67
-    25	6362991	-5	40	199773985	31.40	23	40	40	17	-2	40	944760	325595	1322800	3769641	195
-    26	6362991	-5	40	179404759	28.20	17	34	40	23	-5	40	3457922	156013	1494664	1254293	99
-    27	6362991	-5	40	163386668	25.68	13	28	40	27	-5	40	1392177	281250	3867895	821491	178
-    28	6362991	-5	40	156230534	24.55	12	25	40	28	-5	40	907189	981249	4174945	299437	171
-    29	6362991	-5	40	163236046	25.65	13	28	40	27	-5	40	1097171	3418678	1567013	280008	121
-    30	6362991	-5	40	151309826	23.78	12	23	40	28	-5	40	3514775	2036194	566277	245613	132
-    31	6362991	-5	40	141392520	22.22	10	21	40	30	-5	40	1569000	4571357	124732	97721	181
-    32	6362991	-5	40	143436943	22.54	10	21	40	30	-5	40	1453607	4519441	38176	351107	660
-    33	6362991	-5	40	114269843	17.96	6	14	30	24	-5	40	3311001	2161254	155505	734297	934
-    34	6362991	-5	40	140638447	22.10	10	20	40	30	-5	40	1501615	1637357	18113	3205237	669
-    35	6362991	-5	40	138910532	21.83	10	20	40	30	-5	40	1532519	3495057	23229	1311834	352
-    36	6362991	-5	40	117158566	18.41	7	15	30	23	-5	40	4074444	1402980	63287	822035	245
-
-
-</help>
-</tool>
--- a/tools/sr_mapping/lastz_paired_reads_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,847 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs Lastz paired read alignment process
-Written for Lastz v. 1.02.00.
-
-# Author(s): based on various scripts written by Bob Harris (rsharris@bx.psu.edu),
-# then tweaked to this form by Greg Von Kuster (greg@bx.psu.edu)
-
-This tool takes the following input:
-a. A collection of 454 paired end reads ( a fasta file )
-b. A linker sequence ( a very small fasta file )
-c. A reference genome ( nob, 2bit or fasta )
-
-and uses the following process:
-1. Split reads into mates:  the input to this step is the read file XXX.fasta, and the output is three
-   files; XXX.short.fasta, XXX.long.fasta and XXX.mapping.  The mapping file records the information necessary
-   to convert mate coordinates back into the original read, which is needed later in the process.
-
-2. Align short mates to the reference: this runs lastz against every chromosome.  The input is XXX.short.fasta
-   and the reference genome, and the output is a SAM file, XXX.short.sam.
-
-3. Align long mates to the reference: this runs lastz against every chromosome.  The input is XXX.long.fasta
-   and the reference genome, and the output is a SAM file, XXX.long.sam.
-
-4. Combine, and convert mate coordinates back to read coordinates.  The input is XXX.mapping, XXX.short.sam and
-   XXX.long.sam, and the output is XXX.sam.
-
-usage: lastz_paired_reads_wrapper.py [options]
-    --ref_name: The reference name to change all output matches to
-    --ref_source: The reference is cached or from the history
-    --source_select: Use pre-set or cached reference file
-    --input1: The name of the reference file if using history or reference base name if using cached
-    --input2: The reads file to align
-    --input3: The sequencing linker file
-    --input4: The base quality score 454 file
-    --ref_sequences: The number of sequences in the reference file if using one from history
-    --output: The name of the output file
-    --lastz_seqs_file_dir: Directory of local lastz_seqs.loc file
-"""
-import optparse, os, subprocess, shutil, sys, tempfile, time
-from string import maketrans
-
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( 'bx-python' )
-from bx.seq.twobit import *
-from bx.seq.fasta import FastaReader
-from galaxy.util.bunch import Bunch
-from galaxy.util import string_as_bool
-
-# Column indexes for SAM required fields
-SAM_QNAME_COLUMN = 0
-SAM_FLAG_COLUMN  = 1
-SAM_RNAME_COLUMN = 2
-SAM_POS_COLUMN   = 3
-SAM_MAPQ_COLUMN  = 4
-SAM_CIGAR_COLUMN = 5
-SAM_MRNM_COLUMN  = 6
-SAM_MPOS_COLUMN  = 7
-SAM_ISIZE_COLUMN = 8
-SAM_SEQ_COLUMN   = 9
-SAM_QUAL_COLUMN  = 10
-SAM_MIN_COLUMNS  = 11
-# SAM bit-encoded flags
-BAM_FPAIRED      =    1    # the read is paired in sequencing, no matter whether it is mapped in a pair
-BAM_FPROPER_PAIR =    2    # the read is mapped in a proper pair
-BAM_FUNMAP       =    4    # the read itself is unmapped; conflictive with BAM_FPROPER_PAIR
-BAM_FMUNMAP      =    8    # the mate is unmapped
-BAM_FREVERSE     =   16    # the read is mapped to the reverse strand
-BAM_FMREVERSE    =   32    # the mate is mapped to the reverse strand
-BAM_FREAD1       =   64    # this is read1
-BAM_FREAD2       =  128    # this is read2
-BAM_FSECONDARY   =  256    # not primary alignment
-BAM_FQCFAIL      =  512    # QC failure
-BAM_FDUP         = 1024    # optical or PCR duplicate
-
-# Keep track of all created temporary files so they can be deleted
-global tmp_file_names
-tmp_file_names = []
-# The values in the skipped_lines dict are tuples consisting of:
-# - the number of skipped lines for that error
-# If not a sequence error:
-# - the 1st line number on which the error was found
-# - the text of the 1st line on which the error was found
-# If a sequence error:
-# - The number of the sequence in the file
-# - the sequence name on which the error occurred
-# We may need to improve dealing with file position and text as
-# much of it comes from temporary files that are created from the
-# inputs, and not the inputs themselves, so this could be confusing
-# to the user.
-global skipped_lines
-skipped_lines = dict( bad_interval=( 0, 0, '' ),
-                      inconsistent_read_lengths=( 0, 0, '' ),
-                      inconsistent_reads=( 0, 0, '' ),
-                      inconsistent_sizes=( 0, 0, '' ),
-                      missing_mate=( 0, 0, '' ),
-                      missing_quals=( 0, 0, '' ),
-                      missing_seq=( 0, 0, '' ),
-                      multiple_seqs=( 0, 0, '' ),
-                      no_header=( 0, 0, '' ),
-                      num_fields=( 0, 0, '' ),
-                      reads_paired=( 0, 0, '' ),
-                      sam_flag=( 0, 0, '' ),
-                      sam_headers=( 0, 0, '' ),
-                      sam_min_columns=( 0, 0, '' ),
-                      two_mate_names=( 0, 0, '' ),
-                      wrong_seq_len=( 0, 0, '' ) )
-global total_skipped_lines
-total_skipped_lines = 0
-
-def stop_err( msg ):
-    sys.stderr.write( "%s" % msg )
-    sys.exit()
-
-def skip_line( error_key, position, text ):
-    if not skipped_lines[ error_key ][2]:
-        skipped_lines[ error_key ][1] = position
-        skipped_lines[ error_key ][2] = text
-    skipped_lines[ error_key ][0] += 1
-    total_skipped_lines += 1
-
-def get_tmp_file_name( dir=None, suffix=None ):
-    """
-    Return a unique temporary file name that can be managed.  The
-    file must be manually removed after use.
-    """
-    if dir and suffix:
-        tmp_fd, tmp_name = tempfile.mkstemp( dir=dir, suffix=suffix )
-    elif dir:
-        tmp_fd, tmp_name = tempfile.mkstemp( dir=dir )
-    elif suffix:
-        tmp_fd, tmp_name = tempfile.mkstemp( suffix=suffix )
-    os.close( tmp_fd )
-    tmp_file_names.append( tmp_name )
-    return tmp_name
-
-def run_command( command ):
-    proc = subprocess.Popen( args=command, shell=True, stderr=subprocess.PIPE, )
-    proc.wait()
-    stderr = proc.stderr.read()
-    proc.wait()
-    if stderr:
-        stop_err( stderr )
-
-def split_paired_reads( input2, combined_linker_file_name ):
-    """
-    Given a fasta file of allegedly paired end reads ( input2 ), and a list of intervals
-    showing where the linker is on each read ( combined_linker_file_name ), split the reads into left and right
-    halves.
-
-    The input intervals look like this.  Note that they may include multiple intervals for the same read
-    ( which should overlap ), and we use the union of them as the linker interval.  Non-overlaps are
-    reported to the user, and those reads are not processed.  Starts are origin zero.
-
-        #name     strand start len size
-        FG3OYDA05FTEES +   219  42 283
-        FG3OYDA05FVOLL +   263  41 416
-        FG3OYDA05FFL7J +    81  42 421
-        FG3OYDA05FOQWE +    55  42 332
-        FG3OYDA05FV4DW +   297  42 388
-        FG3OYDA05FWAQV +   325  42 419
-        FG3OYDA05FVLGA +    90  42 367
-        FG3OYDA05FWJ71 +    58  42 276
-
-    The output gives each half-sequence on a separate line, like this.  This allows easy sorting of the
-    sequences by length, after the fact.
-
-        219 FG3OYDA05FTEES_L TTTAGTTACACTTAACTCACTTCCATCCTCTAAATACGTGATTACCTTTC...
-        22  FG3OYDA05FTEES_R CCTTCCTTAAGTCCTAAAACTG
-    """
-    # Bob says these should be hard-coded.
-    seq_len_lower_threshold = 17
-    short_mate_cutoff = 50
-    # We need to pass the name of this file back to the caller.
-    tmp_mates_file_name = get_tmp_file_name( suffix='mates.txt' )
-    mates_file = file( tmp_mates_file_name, "w+b" )
-    # Read the linker intervals
-    combined_linker_file = file( combined_linker_file_name, "rb" )
-    read_to_linker_dict = {}
-    i = 0
-    for i, line in enumerate( combined_linker_file ):
-        line = line.strip()
-        if line.startswith( "#" ):
-            continue
-        if line.find( '#' ) >= 0:
-            line = line.split( "#", 1 )[0].rstrip()
-        fields = line.split()
-        if len( fields ) != 4:
-            skip_line( 'num_fields', i+1, line )
-            continue
-        name, start, length, size = fields
-        start = int( start )
-        length = int( length )
-        size = int( size )
-        end = start + length
-        if end > size:
-            skip_line[ 'bad_interval' ] += 1
-            continue
-        if name not in read_to_linker_dict:
-            read_to_linker_dict[ name ] = ( start, end, size )
-            continue
-        if read_to_linker_dict[ name ] == None:
-            # Read previously marked as non-overlapping intervals, so skip this sequence - see below
-            continue
-        ( s, e, sz ) = read_to_linker_dict[ name ]
-        if sz != size:
-            skip_line( 'inconsistent_sizes', i+1, name )
-            continue
-        if s > end or e < start:
-            # Non-overlapping intervals, so skip this sequence
-            read_to_linker_dict[ name ] = None
-            continue
-        read_to_linker_dict[ name ] = ( min( s, start ), max( e, end ), size )
-    combined_linker_file.close()
-    # We need to pass the name of this file back to the caller.
-    tmp_mates_mapping_file_name = get_tmp_file_name( suffix='mates.mapping' )
-    mates_mapping_file = file( tmp_mates_mapping_file_name, 'w+b' )
-    # Process the sequences
-    seqs = 0
-    fasta_reader = FastaReader( file( input2, 'rb' ) )
-    while True:
-        seq = fasta_reader.next()
-        if not seq:
-            break
-        seqs += 1
-        if seq.name not in read_to_linker_dict:
-            if seq.length > seq_len_lower_threshold:
-                mates_file.write( "%-3d %s   %s\n" % ( seq.length, seq.name, seq.text ) )
-            read_to_linker_dict[ seq.name ] = ""
-            continue
-        if read_to_linker_dict[ seq.name ] == "":
-            skip_line( 'multiple_seqs', seqs, seq.name )
-            continue
-        if read_to_linker_dict[ seq.name ] == None:
-            # Read previously marked as non-overlapping intervals, so skip this sequence - see above
-            continue
-        ( start, end, size ) = read_to_linker_dict[ seq.name ]
-        if seq.length != size:
-            skip_line( 'wrong_seq_len', seqs, seq.name )
-            continue
-        left = seq.text[ :start ]
-        right = seq.text[ end: ]
-        left_is_small = len( left ) <= seq_len_lower_threshold
-        right_is_small = len( right ) <= seq_len_lower_threshold
-        if left_is_small and right_is_small:
-            continue
-        if not left_is_small:
-            mates_file.write( "%-3d %s %s\n" % ( len( left ), seq.name + "_L", left ) )
-            mates_mapping_file.write( "%s %s %s %s\n" % ( seq.name + "_L", seq.name, 0, size - start ) )
-        if not right_is_small:
-            mates_file.write( "%-3d %s %s\n" % ( len( right ), seq.name + "_R", right ) )
-            mates_mapping_file.write( "%s %s %s %s\n" % ( seq.name + "_R", seq.name, end, 0 ) )
-        read_to_linker_dict[ seq.name ] = ""
-    combined_linker_file.close()
-    mates_file.close()
-    mates_mapping_file.close()
-    # Create temporary files for short and long mates
-    tmp_mates_short_file_name = get_tmp_file_name( suffix='mates.short' )
-    tmp_mates_long_file_name = get_tmp_file_name( suffix='mates.long' )
-    tmp_mates_short = open( tmp_mates_short_file_name, 'w+b' )
-    tmp_mates_long = open( tmp_mates_long_file_name, 'w+b' )
-    i = 0
-    for i, line in enumerate( file( tmp_mates_file_name, 'rb' ) ):
-        fields = line.split()
-        seq_len = int( fields[0] )
-        seq_name = fields[1]
-        seq_text = fields[2]
-        if seq_len <= short_mate_cutoff:
-            tmp_mates_short.write( ">%s\n%s\n" % ( seq_name, seq_text ) )
-        else:
-            tmp_mates_long.write( ">%s\n%s\n" % ( seq_name, seq_text ) )
-    tmp_mates_short.close()
-    tmp_mates_long.close()
-    return tmp_mates_mapping_file_name, tmp_mates_file_name, tmp_mates_short_file_name, tmp_mates_long_file_name
-
-def align_mates( input1, ref_source, ref_name, ref_sequences, tmp_mates_short_file_name, tmp_mates_long_file_name ):
-    tmp_align_file_names = []
-    if ref_source == 'history':
-        # Reference is a fasta dataset from the history
-        # Create temporary files to contain the output from lastz executions
-        tmp_short_file_name = get_tmp_file_name( suffix='short_out' )
-        tmp_align_file_names.append( tmp_short_file_name )
-        tmp_long_file_name = get_tmp_file_name( suffix='long_out' )
-        tmp_align_file_names.append( tmp_long_file_name )
-        seqs = 0
-        fasta_reader = FastaReader( open( input1 ) )
-        while True:
-            # Read the next sequence from the reference dataset.  Note that if the reference contains
-            # a small number of chromosomes this loop is ok, but in many cases the genome has a bunch
-            # of small straggler scaffolds and contigs and it is a computational waste to do each one
-            # of these in its own run.  There is an I/O down side to running by subsets (even if they are
-            # one sequence per subset), compared to splitting the reference into sizes of 250 mb.  With
-            # the subset action, lastz still has to read and parse the entire file for every run (this
-            # is true for fasta, but for .2bit files it can access each sequence directly within the file,
-            # so the overhead is minimal).
-            """
-            :> output_file  (this creates the output file, empty)
-            while there are more sequences to align
-                find the next sequences that add up to 250M, put their names in farf.names
-                lastz ${refFile}[subset=farf.names][multi][unmask] ${matesPath}/${matesFile} ...
-                  >> output_file
-            """
-            seq = fasta_reader.next()
-            if not seq:
-                break
-            seqs += 1
-            # Create a temporary file to contain the current sequence as input to lastz.
-            # We're doing this a bit differently here since we could be generating a huge
-            # number of temporary files.
-            tmp_in_fd, tmp_in_file_name = tempfile.mkstemp( suffix='seq_%d_in' % seqs )
-            tmp_in_file = os.fdopen( tmp_in_fd, 'w+b' )
-            tmp_in_file.write( '>%s\n%s\n' % ( seq.name, seq.text ) )
-            tmp_in_file.close()
-            # Align short mates
-            command = 'lastz %s[unmask]%s %s ' % ( tmp_in_file_name, ref_name, tmp_mates_short_file_name )
-            command += 'Z=1 --seed=1111111011111 --notrans --maxwordcount=90% --match=1,3 O=1 E=3 X=15 K=10 Y=12 L=18 --ambiguousn --noytrim --identity=95 --coverage=80 --continuity=95 --format=softsam- '
-            command += '>> %s' % tmp_short_file_name
-            run_command( command )
-            # Align long mates
-            command = 'lastz %s[unmask]%s %s ' % ( tmp_in_file_name, ref_name, tmp_mates_long_file_name )
-            command += 'Z=15 W=13 --notrans --exact=18 --maxwordcount=90% --match=1,3 O=1 E=3 Y=10 L=18 --ambiguousn --noytrim --identity=95 --coverage=90 --continuity=95 --format=softsam- '
-            command += '>> %s' % tmp_long_file_name
-            run_command( command )
-            # Remove the temporary file that contains the current sequence
-            os.remove( tmp_in_file_name )
-    else:
-        # Reference is a locally cached 2bit file, split lastz calls across number of chroms in 2bit file
-        tbf = TwoBitFile( open( input1, 'rb' ) )
-        for chrom in tbf.keys():
-            # Align short mates
-            tmp_short_file_name = get_tmp_file_name( suffix='short_vs_%s' % chrom )
-            tmp_align_file_names.append( tmp_short_file_name )
-            command = 'lastz %s/%s[unmask]%s %s ' % ( input1, chrom, ref_name, tmp_mates_short_file_name )
-            command += 'Z=1 --seed=1111111011111 --notrans --maxwordcount=90% --match=1,3 O=1 E=3 X=15 K=10 Y=12 L=18 --ambiguousn --noytrim --identity=95 --coverage=80 --continuity=95 --format=softsam- '
-            command += '> %s' % tmp_short_file_name
-            run_command( command )
-            # Align long mates
-            tmp_long_file_name = get_tmp_file_name( suffix='long_vs_%s' % chrom )
-            tmp_align_file_names.append( tmp_long_file_name )
-            command = 'lastz %s/%s[unmask]%s %s ' % ( input1, chrom, ref_name, tmp_mates_long_file_name )
-            command += 'Z=15 W=13 --notrans --exact=18 --maxwordcount=90% --match=1,3 O=1 E=3 Y=10 L=18 --ambiguousn --noytrim --identity=95 --coverage=90 --continuity=95 --format=softsam- '
-            command += '> %s' % tmp_long_file_name
-            run_command( command )
-    return tmp_align_file_names
-
-def paired_mate_unmapper( input2, input4, tmp_mates_mapping_file_name, tmp_align_file_name_list, output ):
-    """
-    Given a SAM file corresponding to alignments of *subsegments* of paired 'reads' to a reference sequence,
-    convert the positions on the subsegments to positions on the reads.  Also (optionally) add quality values.
-
-    The input file is in SAM format, as shown below.  Each line represents the alignment of a part of a read
-    to a reference sequence.  Read pairs are indicated by suffixes in their names.  Normally, the suffixes _L
-    and _R indicate the left and right mates of reads (this can be overridden with the --left and --right
-    options).  Reads that were not mates have no suffix.
-
-        (SAM header lines omitted)
-        F2YP0BU02G7LK5_R 16 chr21 15557360 255 40M          * 0 0 ATTTTATTCTCTTTGAAGCAATTGTGAATGGGAGTTTACT           *
-        F2YP0BU02HXV58_L 16 chr21 15952091 255 40M6S        * 0 0 GCAAATTGTGCTGCTTTAAACATGCGTGTGCAAGTATCTTtttcat     *
-        F2YP0BU02HREML_R 0  chr21 16386077 255 33M5S        * 0 0 CCAAAGTTCTGGGATTACAGGCGTGAGCCATCGcgccc             *
-        F2YP0BU02IOF1F_L 0  chr21 17567321 255 7S28M        * 0 0 taaagagAAGAATTCTCAACCCAGAATTTCATATC                *
-        F2YP0BU02IKX84_R 16 chr21 18491628 255 22M1D18M9S   * 0 0 GTCTCTACCAAAAAATACAAAAATTAGCCGGGCGTGGTGGcatgtctgt  *
-        F2YP0BU02GW5VA_L 16 chr21 20255344 255 6S32M        * 0 0 caagaaCAAACACATTCAAAAGCTAGTAGAAGGCAAGA             *
-        F2YP0BU02JIMJ4_R 0  chr21 22383051 255 19M          * 0 0 CCCTTTATCATTTTTTATT                                *
-        F2YP0BU02IXZGF_L 16 chr21 23094798 255 13M1I18M     * 0 0 GCAAGCTCCACTTCCCGGGTTCACGCCATTCT                   *
-        F2YP0BU02IODR5_L 0  chr21 30935325 255 37M          * 0 0 GAAATAAAGGGTATTCAATTAGGAAAAGAGGAAGTCA              *
-        F2YP0BU02IMZBL_L 16 chr21 31603486 255 28M1D1M      * 0 0 ATACAAAAATTAGCCGGGCACAGTGGCAG                      *
-        F2YP0BU02JA9PR_L 16 chr21 31677159 255 23M          * 0 0 CACACCTGTAACCCCAGCACTTT                            *
-        F2YP0BU02HKC61_R 0  chr21 31678718 255 40M          * 0 0 CACTGCACTCCAGCCTGGGTGACAAAGCAAGACTCTGTCT           *
-        F2YP0BU02HKC61_R 0  chr21 31678718 255 40M          * 0 0 CACTGCACTCCAGCCTGGGTGACAAAGCAAGACTCTGTCT           *
-        F2YP0BU02HVA88   16 chr21 31703558 255 1M1D35M8S    * 0 0 TGGGATTACAGGCGTGAGCTACCACACCCAGCCAGAgttcaaat       *
-        F2YP0BU02JDCF1_L 0  chr21 31816600 255 38M          * 0 0 AGGAGAATCGCTTGAACCCAGGAGGCAGAGGTTGCGGT             *
-        F2YP0BU02GZ1GO_R 0  chr21 33360122 255 6S38M        * 0 0 cctagaCTTCACACACACACACACACACACACACACACACACAC       *
-        F2YP0BU02FX387_L 16 chr22 14786201 255 26M          * 0 0 TGGATGAAGCTGGAAACCATCATTCT                         *
-        F2YP0BU02IF2NE_R 0  chr22 16960842 255 40M10S       * 0 0 TGGCATGCACCTGTAGTCTCAGCTACTTGGGAGGCTGAGGtgggaggatc *
-        F2YP0BU02F4TVA   0  chr22 19200522 255 49M          * 0 0 CCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCACGCCATTGCACTCCA  *
-        F2YP0BU02HKC61_R 16 chr22 29516998 255 8S32M        * 0 0 agacagagTCTTGCTTTGTCACCCAGGCTGGAGTGCAGTG           *
-        F2YP0BU02FS4EM_R 0  chr22 30159364 255 29M          * 0 0 CTCCTGCCTCAGCCTCCCGAGTAGTTGGG                      *
-        F2YP0BU02G197P_L 0  chr22 32044496 255 40M10S       * 0 0 TTGTTGGACATTTGGGTTGGTTCCAAGTCTTTGCTATTGTgaataatgcc *
-        F2YP0BU02FIING   16 chr22 45959944 255 3M1I11M1I26M * 0 0 AGCTATGGTACTGGCTATGAAAGCAGACACATAGACCAATGG         *
-        F2YP0BU02GUB9L_L 16 chr22 49198404 255 16M1I20M     * 0 0 CACCACGCTCGGCTAATTTTTGTATTTTTAGTAGAGA              *
-
-    The user must provide a mapping file (which might better be called an unmapping file).  This file is usually
-    created by split_paired_reads, and tells us how to map the subsegments back to original coordinates in a single
-    read (this means the left and right mates were part of a single read).  The mapping file contains four columns.
-    The first two give the mates's name (including the suffix) and the read name.  The last two columns describe how
-    much of the full original sequence is missing from the mate.  For example, in the read below, the left mate is
-    missing 63 on the right (42 for the linker and 21 for the right half).  The right mate is missing 339 on the left.
-
-        left half:  TTTCAACATATGCAAATCAATAAATGTAATCCAGCATATAAACAGAACCA
-                    AAGACAAAAACCACATGATTATCTCAATAGATGCAGAAAAGGCCTTCGGC
-                    AAAATTCAACAAAACTCCATGCTAAAACTCTCAATAAGGTATTGATGGGA
-                    CATGCCGCATAATAATAAGACATATCTATGACAAACCCACAGCCAATATC
-                    ATGCTGAATGCACAAAAATTGGAAGCATTCCCTTTGAAAACTGGCACAAG
-                    ACTGGGATGCCCTCTCTCACAACTCCTATTCAACATAGTGTTGGAAG
-        linker:     CGTAATAACTTCGTATAGCATACATTATACGAAGTCATACGA
-        right half: CTCCTGCCTCAGCCTCCCGAG
-
-        mate_name        read_name      offset_to_start offset_from_end
-        F2YP0BU02FS4EM_L F2YP0BU02FS4EM         0              71
-        F2YP0BU02FS4EM_R F2YP0BU02FS4EM       339               0
-
-    The user can also specify a quality scores file, which should look something like this.  Quality values are presumed
-    to be PHRED scores, written in space-delimited decimal.
-
-        >F2YP0BU02FS4EM
-        38 38 38 40 40 40 40 40 40 40 40 40 40 39 39 39 40 40 40 40 38 21 21 21 40
-        40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 39 39 40 40 40 40 40 40 40 33
-        32 32 40 40 40 21 21 18 18 21 34 34 31 40 40 40 40 40 40 40 40 40 40 40 40
-        40 40 40 40 40 40 40 40 40 40 40 32 32 32 32 40 40 40 40 40 40 40 34 34 35
-        31 31 28 28 33 33 33 36 36 36 17 17 17 19 26 36 36 36 40 40 40 40 40 33 34
-        34 34 39 39 39 40 40 40 40 40 33 33 34 34 40 40 40 40 40 40 40 39 39 39 40
-        40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40
-        40 40 40 40 40 40 40 39 39 39 39 39 39 40 40 40 39 39 39 40 40 40 40 40 40
-        40 40 40 40 40 40 40 40 40 40 40 40 40 26 26 26 26 26 40 40 38 38 37 35 33
-        36 40 19 17 17 17 17 19 19 23 30 20 20 20 23 35 40 36 36 36 36 36 36 36 36
-        39 40 34 20 27 27 35 39 40 37 40 40 40 40 40 40 40 40 40 40 34 34 35 39 40
-        40 40 40 40 40 40 39 39 39 40 40 40 40 36 36 32 32 28 28 29 30 36 40 30 26
-        26 26 34 39 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 39 39
-        40 39 35 34 34 40 40 40 40 30 30 30 35 40 40 40 40 40 39 39 36 40 40 40 40
-        39 39 39 39 30 30 28 35 35 39 40 40 40 40 40 35 35 35
-        >F2YP0BU02G197P
-        40 40 40 40 40 40 40 40 40 40 39 39 39 39 39 39 40 40 40 40 40 40 40 40 40
-        40 40 40 40 26 26 26 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40
-        40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40
-        40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40
-        40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 34 34 34 40 40
-        40 40 40 40 40 40 39 39 39 40 40 40 40 40 40 40 40 40 40 39 39 39 40 40 40
-        40 40 40 40 40 40 40 34 34 34 34 40 40 40 40 34 34 34 34 40 40 40 40 40 40
-        40 40 40 40 40 39 39 39 34 34 34 34 40 40 40 40 39 39 25 25 26 39 40 40 40
-        40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40
-        33 33 33 33 40 35 21 21 21 30 38 40 40 40 40 40 40 40 40 35 35 30 30 30 40
-        40 40 39 39 39 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40
-        40 40 40 40 40 40 40 40 40 40 40 40 39 39 39 40 40 40 40 40 40 40 40 40 40
-        40 40 40 39 39 39 40 40
-        >F2YP0BU02FIING
-        32 32 32 25 25 25 25 24 25 30 31 30 27 27 27 28 28 21 19 19 13 13 13 14 19
-        19 17 19 16 16 25 28 22 21 17 17 18 25 24 25 25 25
-
-    The output file is also SAM:
-
-        (SAM header lines omitted)
-        F2YP0BU02G7LK5 81  chr21 15557360 255 40M303H        * 0 0 ATTTTATTCTCTTTGAAGCAATTGTGAATGGGAGTTTACT           D>>>>IIIIIIHHG???IIIIIIIIIHHHFFEIH999HII
-        F2YP0BU02HXV58 145 chr21 15952091 255 226H40M6S      * 0 0 GCAAATTGTGCTGCTTTAAACATGCGTGTGCAAGTATCTTtttcat     AA===DDDDAAAAD???:::ABBBBBAAA:888ECF;F>>>?8??@
-        F2YP0BU02HREML 65  chr21 16386077 255 320H33M5S      * 0 0 CCAAAGTTCTGGGATTACAGGCGTGAGCCATCGcgccc             HH???HHIIIHFHIIIIIIICDDHHIIIIIIHHHHHHH
-        F2YP0BU02IOF1F 129 chr21 17567321 255 7S28M409H      * 0 0 taaagagAAGAATTCTCAACCCAGAATTTCATATC                4100<<A>4113:<EFGGGFFFHHHHHHDFFFFED
-        F2YP0BU02IKX84 81  chr21 18491628 255 22M1D18M9S341H * 0 0 GTCTCTACCAAAAAATACAAAAATTAGCCGGGCGTGGTGGcatgtctgt  ;;;=7@.55------?2?11112GGB=CCCCDIIIIIIIIIHHHHHHII
-        F2YP0BU02GW5VA 145 chr21 20255344 255 286H6S32M      * 0 0 caagaaCAAACACATTCAAAAGCTAGTAGAAGGCAAGA             IIIIIIIHHHIIIIIIICCCCIIIIIIIIIIIIIIIII
-        F2YP0BU02JIMJ4 65  chr21 22383051 255 208H19M        * 0 0 CCCTTTATCATTTTTTATT                                555544E?GE113344I22
-        F2YP0BU02IXZGF 145 chr21 23094798 255 291H13M1I18M   * 0 0 GCAAGCTCCACTTCCCGGGTTCACGCCATTCT                   IIIIIIIIIIIGG;;;GGHIIIIIGGGIIIII
-        F2YP0BU02IODR5 129 chr21 30935325 255 37M154H        * 0 0 GAAATAAAGGGTATTCAATTAGGAAAAGAGGAAGTCA              6...7/--..,30;9<<>@BFFFAAAAHIIIIIH@@@
-        F2YP0BU02IMZBL 145 chr21 31603486 255 342H28M1D1M    * 0 0 ATACAAAAATTAGCCGGGCACAGTGGCAG                      BB1552222<<>9==8;;?AA=??A???A
-        F2YP0BU02JA9PR 145 chr21 31677159 255 229H23M        * 0 0 CACACCTGTAACCCCAGCACTTT                            IIIIIIIIIIICCCCIIIIIHHH
-        F2YP0BU02HKC61 65  chr21 31678718 255 300H40M        * 0 0 CACTGCACTCCAGCCTGGGTGACAAAGCAAGACTCTGTCT           AA@BD:::==AAA@A?8888:<90004<>>?><<<<4442
-        F2YP0BU02HKC61 65  chr21 31678718 255 300H40M        * 0 0 CACTGCACTCCAGCCTGGGTGACAAAGCAAGACTCTGTCT           AA@BD:::==AAA@A?8888:<90004<>>?><<<<4442
-        F2YP0BU02HVA88 16  chr21 31703558 255 1M1D35M8S      * 0 0 TGGGATTACAGGCGTGAGCTACCACACCCAGCCAGAgttcaaat       >8888DFFHHGFHHHH@@?@?DDC96666HIIIFFFFFFFFFFF
-        F2YP0BU02JDCF1 129 chr21 31816600 255 38M103H        * 0 0 AGGAGAATCGCTTGAACCCAGGAGGCAGAGGTTGCGGT             IIIIIIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIII
-        F2YP0BU02GZ1GO 65  chr21 33360122 255 76H6S38M       * 0 0 cctagaCTTCACACACACACACACACACACACACACACACACAC       BBBBD?:688CFFFFFFFFFFFFFFFFFFFFFFFFFFDDBBB51
-        F2YP0BU02FX387 145 chr22 14786201 255 201H26M        * 0 0 TGGATGAAGCTGGAAACCATCATTCT                         IIHHHHHHHHHHHHHFFFFFFFFFFF
-        F2YP0BU02IF2NE 65  chr22 16960842 255 209H40M10S     * 0 0 TGGCATGCACCTGTAGTCTCAGCTACTTGGGAGGCTGAGGtgggaggatc BAAADDDDFDDDDDDBBA889<A?4444000@<>AA?9444;;8>77<7-
-        F2YP0BU02F4TVA 0   chr22 19200522 255 49M            * 0 0 CCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCACGCCATTGCACTCCA  FFF???FFFFFIIIIIIIIIIIIIIIIIIIIIIIHHIIFHFFFGDDB=5
-        F2YP0BU02HKC61 81  chr22 29516998 255 8S32M300H      * 0 0 agacagagTCTTGCTTTGTCACCCAGGCTGGAGTGCAGTG           2444<<<<>?>><40009<:8888?A@AAA==:::DB@AA
-        F2YP0BU02FS4EM 65  chr22 30159364 255 339H29M        * 0 0 CTCCTGCCTCAGCCTCCCGAGTAGTTGGG                      IIIIHHEIIIIHHHH??=DDHIIIIIDDD
-        F2YP0BU02G197P 129 chr22 32044496 255 40M10S258H     * 0 0 TTGTTGGACATTTGGGTTGGTTCCAAGTCTTTGCTATTGTgaataatgcc IIIIIIIIIIHHHHHHIIIIIIIIIIIII;;;IIIIIIIIIIIIIIIIII
-        F2YP0BU02FIING 16  chr22 45959944 255 3M1I11M1I26M   * 0 0 AGCTATGGTACTGGCTATGAAAGCAGACACATAGACCAATGG         :::9:32267=:114244/...446==<<<?@?:9::::AAA
-        F2YP0BU02GUB9L 145 chr22 49198404 255 176H16M1I20M   * 0 0 CACCACGCTCGGCTAATTTTTGTATTTTTAGTAGAGA              IIIIIIIIIHAAC;<</////@4F5778;IIIIIIII
-
-    """
-    left_suffix       = "_L"
-    right_suffix      = "_R"
-    # Read the mapping
-    mate_to_read_dict = {}
-    i = 0
-    for i, line in enumerate( file( tmp_mates_mapping_file_name, 'rb' ) ):
-        line = line.strip()
-        if not line.startswith( "#" ):
-            fields = line.split()
-            if len( fields ) != 4:
-                skip_line( "num_fields", i+1, line )
-                continue
-            mate_name, read_name, s_offset, e_offset = fields
-            if mate_name in mate_to_read_dict:
-                skip_line( 'two_mate_names', i+1, mate_name )
-                continue
-            mate_to_read_dict[ mate_name ] = ( read_name, int( s_offset ), int( e_offset ) )
-    # Read sequence data
-    read_to_nucs_dict = {}
-    seqs = 0
-    fasta_reader = FastaReader( file( input2, 'rb' ) )
-    while True:
-        seq = fasta_reader.next()
-        if not seq:
-            break
-        seqs += 1
-        seq_text_upper = seq.text.upper()
-        if seq.name in read_to_nucs_dict:
-            if seq_text_upper != read_to_nucs_dict[ seq.name ]:
-                skip_line( 'inconsistent_reads', seqs, seq.name )
-                continue
-        read_to_nucs_dict[ seq.name ] = seq_text_upper
-    # Read quality data
-    def quality_sequences( f ):
-        seq_name  = None
-        seq_quals = None
-        line_number = 0
-        for line in f:
-            line_number += 1
-            line = line.strip()
-            if line.startswith( ">" ):
-                if seq_name != None:
-                    yield ( seq_name, seq_quals, seq_line )
-                seq_name  = sequence_name( line )
-                seq_line  = line_number
-                seq_quals = []
-            elif seq_name is None:
-                skip_line( 'no_header', line_number, line )
-                continue
-            else:
-                seq_quals += [ int( q ) for q in line.split() ]
-        if seq_name is not None:
-            yield ( seq_name, seq_quals, seq_line )
-    def sequence_name( s ):
-        s = s[ 1: ].strip()
-        if not s:
-            return ""
-        else:
-            return s.split()[ 0 ]
-    read_to_quals_dict = {}
-    # TODO: should we use Dan's fastaNamedReader here?
-    for seq_name, quals, line_number in quality_sequences( file( input4 ) ):
-        quals = samify_phred_scores( quals )
-        if seq_name in read_to_quals_dict:
-            if quals != read_to_quals_dict[ seq_name ]:
-                skip_line( 'inconsistent_reads', line_number, seq_name )
-            continue
-        if len( quals ) != len( read_to_nucs_dict[ seq_name ] ):
-            skip_line( 'inconsistent_read_lengths', line_number, seq_name )
-            continue
-        read_to_quals_dict[ seq_name ] = quals
-    # process the SAM file
-    tmp_align_file_names = ' '.join( tmp_align_file_name_list )
-    combined_chrom_file_name = get_tmp_file_name( suffix='combined_chrom' )
-    command = 'cat %s | grep -v "^@" | sort -k 1 > %s' % ( tmp_align_file_names, combined_chrom_file_name )
-    run_command( command )
-    fout = file( output, 'w+b' )
-    has_non_header = False
-    i = 0
-    for i, line in enumerate( file( combined_chrom_file_name, 'rb' ) ):
-        line = line.strip()
-        if line.startswith( "@" ):
-            if has_non_header:
-                skip_line( 'sam_headers', i+1, line )
-                continue
-            fout.write( "%s\n" % line )
-            continue
-        has_non_header = True
-        fields = line.split()
-        num_fields = len( fields )
-        if num_fields < SAM_MIN_COLUMNS:
-            skip_line( 'sam_min_columns', i+1, line )
-            continue
-        # Set flags for mates
-        try:
-            flag = int( fields[ SAM_FLAG_COLUMN ] )
-        except ValueError:
-            skip_line( 'sam_flag', i+1, line )
-            continue
-        if not( flag & ( BAM_FPAIRED + BAM_FREAD1 + BAM_FREAD2 ) == 0 ):
-            skip_line( 'reads_paired', i+1, line )
-            continue
-        mate_name = fields[ SAM_QNAME_COLUMN ]
-        unmap_it = False
-        half = None
-        if mate_name.endswith( left_suffix ):
-            flag += BAM_FPAIRED + BAM_FREAD2
-            fields[ SAM_FLAG_COLUMN ] = "%d" % flag
-            unmap_it = True
-            half = "L"
-        elif mate_name.endswith( right_suffix ):
-            flag += BAM_FPAIRED + BAM_FREAD1
-            fields[ SAM_FLAG_COLUMN ] = "%d" % flag
-            unmap_it = True
-            half = "R"
-        on_plus_strand = ( flag & BAM_FREVERSE == 0 )
-        # Convert position from mate to read by adding clipping to cigar
-        if not unmap_it:
-            read_name = mate_name
-        else:
-            try:
-                read_name, s_offset, e_offset = mate_to_read_dict[ mate_name ]
-            except KeyError:
-                skip_line( 'missing_mate', i+1, mate_name )
-                continue
-            cigar = fields[ SAM_CIGAR_COLUMN ]
-            cigar_prefix = None
-            cigar_suffix = None
-            if half == "L":
-                if on_plus_strand:
-                    if s_offset > 0:
-                        cigar_prefix = ( s_offset, "S" )
-                    if e_offset > 0:
-                        cigar_suffix = ( e_offset, "H" )
-                else:
-                    if e_offset > 0:
-                        cigar_prefix = ( e_offset, "H" )
-                    if s_offset > 0:
-                        cigar_suffix = ( s_offset, "S" )
-            elif half == "R":
-                if on_plus_strand:
-                    if s_offset > 0:
-                        cigar_prefix = ( s_offset, "H" )
-                    if e_offset > 0:
-                        cigar_suffix = ( e_offset, "S" )
-                else:
-                    if e_offset > 0:
-                        cigar_prefix = ( e_offset, "S" )
-                    if s_offset > 0:
-                        cigar_suffix = ( s_offset, "H" )
-            else:
-                if on_plus_strand:
-                    if s_offset > 0:
-                        cigar_prefix = ( s_offset, "S" )
-                    if e_offset > 0:
-                        cigar_suffix = ( e_offset, "S" )
-                else:
-                    if e_offset > 0:
-                        cigar_prefix = ( e_offset, "S" )
-                    if s_offset > 0:
-                        cigar_suffix = ( s_offset, "S" )
-            if cigar_prefix != None:
-                count, op = cigar_prefix
-                cigar = prefix_cigar( "%d%s" % ( count, op ), cigar )
-                if op == "S":
-                    refPos = int( fields[ SAM_POS_COLUMN ] ) - count
-                    fields[ SAM_POS_COLUMN ] = "%d" % refPos
-            if cigar_suffix != None:
-                count, op = cigar_suffix
-                cigar = suffix_cigar( cigar,"%d%s" % ( count, op) )
-            fields[ SAM_QNAME_COLUMN ] = read_name
-            fields[ SAM_CIGAR_COLUMN ] = cigar
-        # Fetch sequence and quality values, and flip/clip them
-        if read_name not in read_to_nucs_dict:
-            skip_line( 'missing_seq', i+1, read_name )
-            continue
-        nucs = read_to_nucs_dict[ read_name ]
-        if not on_plus_strand:
-            nucs = reverse_complement( nucs )
-        quals = None
-        if read_to_quals_dict != None:
-            if read_name not in read_to_quals_dict:
-                skip_line( 'missing_quals', i+1, read_name )
-                continue
-            quals = read_to_quals_dict[ read_name ]
-            if not on_plus_strand:
-                quals = reverse_string( quals )
-        cigar = split_cigar( fields[ SAM_CIGAR_COLUMN ] )
-        nucs, quals = clip_for_cigar( cigar, nucs, quals )
-        fields[ SAM_SEQ_COLUMN ] = nucs
-        if quals != None:
-            fields[ SAM_QUAL_COLUMN ] = quals
-        # Output the line
-        fout.write( "%s\n" % "\t".join( fields ) )
-    fout.close()
-
-def prefix_cigar( prefix, cigar ):
-    ix = 0
-    while cigar[ ix ].isdigit():
-        ix += 1
-    if cigar[ ix ] != prefix[ -1 ]:
-        return prefix + cigar
-    count = int( prefix[ :-1 ] ) + int( cigar[ :ix ] )
-    return "%d%s%s" % ( count, prefix[ -1 ], cigar[ ix+1: ] )
-
-def suffix_cigar( cigar, suffix ):
-    if cigar[ -1 ] != suffix[ -1 ]:
-        return cigar + suffix
-    ix = len( cigar ) - 2
-    while cigar[ix].isdigit():
-        ix -= 1
-    ix += 1
-    count = int( cigar[ ix:-1 ] ) + int( suffix[ :-1 ] )
-    return "%s%d%s" % ( cigar[ :ix ], count, suffix[ -1 ] )
-
-def split_cigar( text ):
-    fields = []
-    field  = []
-    for ch in text:
-        if ch not in "MIDHS":
-            field += ch
-            continue
-        if field == []:
-            raise ValueError
-        fields += [ ( int( "".join( field ) ), ch ) ]
-        field = []
-    if field != []:
-        raise ValueError
-    return fields
-
-def clip_for_cigar( cigar, nucs, quals ):
-    # Hard clip prefix
-    count, op = cigar[0]
-    if op == "H":
-        nucs = nucs[ count: ]
-        if quals != None:
-            quals = quals[ count: ]
-        count, op = cigar[ 1 ]
-    # Soft clip prefix
-    if op == "S":
-        nucs = nucs[ :count ].lower() + nucs[ count: ]
-    # Hard clip suffix
-    count,op = cigar[ -1 ]
-    if op == "H":
-        nucs = nucs[ :-count ]
-        if quals != None:
-            quals = quals[ :-count ]
-        count, op = cigar[ -2 ]
-    # Soft clip suffix
-    if op == "S":
-        nucs = nucs[ :-count ] + nucs[ -count: ].lower()
-    return nucs, quals
-
-def samify_phred_scores( quals ):
-    """
-    Convert a decimal list of phred base-quality scores to a sam quality string.
-    Note that if a quality is outside the dynamic range of sam's ability to
-    represent it, we clip the value to the max allowed.  SAM quality scores
-    range from chr(33) to chr(126).
-    """
-    if min( quals ) >= 0 and max( quals ) <= 126-33:
-        return "".join( [ chr( 33 + q ) for q in quals ] )
-    else:
-        return "".join( [ chr( max( 33, min( 126, 33+q ) ) ) for q in quals ] )
-
-def reverse_complement( nucs ):
-    complementMap = maketrans( "ACGTacgt", "TGCAtgca" )
-    return nucs[ ::-1 ].translate( complementMap )
-
-def reverse_string( s ):
-    return s[ ::-1 ]
-
-def __main__():
-    # Parse command line
-    # input1: a reference genome ( 2bit or fasta )
-    # input2: a collection of 454 paired end reads ( a fasta file )
-    # input3: a linker sequence ( a very small fasta file )
-    # input4: a base quality score 454 file ( qual454 )
-    parser = optparse.OptionParser()
-    parser.add_option( '', '--ref_name', dest='ref_name', help='The reference name to change all output matches to' )
-    parser.add_option( '', '--ref_source', dest='ref_source', help='The reference is cached or from the history' )
-    parser.add_option( '', '--ref_sequences', dest='ref_sequences', help='Number of sequences in the reference dataset' )
-    parser.add_option( '', '--source_select', dest='source_select', help='Use pre-set or cached reference file' )
-    parser.add_option( '', '--input1', dest='input1', help='The name of the reference file if using history or reference base name if using cached' )
-    parser.add_option( '', '--input2', dest='input2', help='The 454 reads file to align' )
-    parser.add_option( '', '--input3', dest='input3', help='The sequencing linker file' )
-    parser.add_option( '', '--input4', dest='input4', help='The base quality score 454 file' )
-    parser.add_option( '', '--output', dest='output', help='The output file' )
-    parser.add_option( '', '--lastz_seqs_file_dir', dest='lastz_seqs_file_dir', help='Directory of local lastz_seqs.loc file' )
-    ( options, args ) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='lastz -v', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( '%s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Lastz version\n' )
-
-    if options.ref_name:
-        ref_name = '[nickname=%s]' % options.ref_name
-    else:
-        ref_name = ''
-    if options.ref_source == 'history':
-        # Reference is a fasta dataset from the history
-        try:
-            # Ensure there is at least 1 sequence in the dataset ( this may not be necessary ).
-            error_msg = "The reference dataset is missing metadata, click the pencil icon in the history item and 'auto-detect' the metadata attributes."
-            ref_sequences = int( options.ref_sequences )
-            if ref_sequences < 1:
-                stop_err( error_msg )
-        except:
-            stop_err( error_msg )
-    else:
-        ref_sequences = 0
-    tmp_w12_name = get_tmp_file_name( suffix='vs_linker.W12' )
-    tmp_T1_name = get_tmp_file_name( suffix='vs_linker.T1' )
-    # Run lastz twice ( with different options ) on the linker sequence and paired end reads,
-    # looking for the linker ( each run finds some the other doesn't )
-    command = 'lastz %s %s W=12 --notrans --exact=18 --match=1,3 O=1 E=3 Y=10 L=18 --ambiguousn --coverage=85 --format=general-:name2,zstart2+,length2,size2 > %s' % \
-        ( options.input3, options.input2, tmp_w12_name )
-    run_command( command )
-    command = 'lastz %s %s T=1 --match=1,2 O=1 E=2 X=15 K=10 Y=15 L=18 --ambiguousn --coverage=85 --format=general-:name2,zstart2+,length2,size2 > %s' % \
-        ( options.input3, options.input2, tmp_T1_name )
-    run_command( command )
-    # Combine the alignment output from the two lastz runs
-    tmp_combined_linker_file_name = get_tmp_file_name( suffix='vs_linker' )
-    command = 'cat %s %s | sort -u > %s' % ( tmp_w12_name, tmp_T1_name, tmp_combined_linker_file_name )
-    run_command( command )
-    # Use the alignment info to split reads into left and right mates
-    tmp_mates_mapping_file_name, tmp_mates_file_name, tmp_mates_short_file_name, tmp_mates_long_file_name = split_paired_reads( options.input2, tmp_combined_linker_file_name )
-    # Align mates to the reference - tmp_align_file_names is a list of file names created by align_mates()
-    tmp_align_file_name_list = align_mates( options.input1, options.ref_source, ref_name, ref_sequences, tmp_mates_short_file_name, tmp_mates_long_file_name )
-    # Combine and convert mate coordinates back to read coordinates
-    paired_mate_unmapper( options.input2, options.input4, tmp_mates_mapping_file_name, tmp_align_file_name_list, options.output )
-    # Delete all temporary files
-    for file_name in tmp_file_names:
-        os.remove( file_name )
-    # Handle any invalid lines in the input data
-    if total_skipped_lines:
-        msgs = dict( bad_interval="Bad interval in line",
-                     inconsistent_read_lengths="Inconsistent read/quality lengths for seq #",
-                     inconsistent_reads="Inconsistent reads for seq #",
-                     inconsistent_sizes="Inconsistent sizes for seq #",
-                     missing_mate="Mapping file does not include mate on line",
-                     missing_quals="Missing quality values for name on line",
-                     missing_seq="Missing sequence for name on line",
-                     multiple_seqs="Multiple names for seq #",
-                     no_header="First quality sequence has no header",
-                     num_fields="Must have 4 fields in line",
-                     reads_paired="SAM flag indicates reads already paired on line",
-                     sam_flag="Bad SAM flag on line",
-                     sam_headers="SAM headers on line",
-                     sam_min_columns="Need 11 columns on line",
-                     two_mate_names="Mate name already seen, line",
-                     wrong_seq_len="Size differs from length of seq #" )
-        print "Skipped %d invalid lines: "
-        msg = ""
-        for k, v in skipped_lines.items():
-            if v[0]:
-                # v[0] is the number of times the error occurred
-                # v[1] is the position of the line or sequence in the file
-                # v[2] is the name of the sequence or the text of the line
-                msg += "(%d)%s %d:%s. " % ( v[0], msgs[k], v[1], v[2] )
-        print msg
-
-if __name__=="__main__": __main__()
--- a/tools/sr_mapping/lastz_paired_reads_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,278 +0,0 @@
-<tool id="lastz_paired_reads_wrapper" name="Lastz paired reads" version="1.1.1">
-    <description> map short paired reads against reference sequence</description>
-    <command interpreter="python">lastz_paired_reads_wrapper.py
-      #if $seq_name.how_to_name=="yes":
-        --ref_name=$seq_name.ref_name
-      #end if
-      --ref_source=$source.ref_source
-      --input2=$input2
-      --input3=$input3
-      --input4=$input4
-      #if $source.ref_source=="history":
-        --input1=$source.input1
-        --ref_sequences=$input1.metadata.sequences
-      #else:
-        --input1="${ filter( lambda x: str( x[0] ) == str( $source.input1_2bit ), $__app__.tool_data_tables[ 'lastz_seqs' ].get_fields() )[0][-1] }"
-      #end if
-      --output=$output1
-      --lastz_seqs_file_dir=${GALAXY_DATA_INDEX_DIR}
-    </command>
-    <inputs>
-        <param name="input2" format="fasta" type="data" label="Align sequencing reads in" />
-        <conditional name="source">
-            <param name="ref_source" type="select" label="Against reference sequences that are">
-                <option value="cached">locally cached</option>
-                <option value="history">in your history</option>
-            </param>
-            <when value="cached">
-                <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
-                    <options from_data_table="lastz_seqs" />
-                </param>
-            </when>
-            <when value="history">
-                <param name="input1" type="data" format="fasta" label="Select a reference dataset" />
-            </when>
-        </conditional>
-        <param name="input3" format="fasta" type="data" label="Linker file" />
-        <param name="input4" format="qual454" type="data" label="Select a base quality score 454 dataset" />
-        <conditional name="seq_name">
-            <param name="how_to_name" type="select" label="Do you want to modify the reference name?">
-                <option value="no">No</option>
-                <option value="yes">Yes</option>
-            </param>
-            <when value="yes">
-                <param name="ref_name" type="text" size="25" value="Type sequence name here" label="Enter name for the Reference sequence"/>
-            </when>
-            <when value="no" />
-        </conditional>
-    </inputs>
-    <outputs>
-        <data format="sam" name="output1" label="${tool.name} on ${on_string}: mapped reads" />
-    </outputs>
-    <requirements>
-        <requirement type="package">lastz</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <!--
-                input1: a reference genome ( 2bit or fasta )
-                input2: a collection of 454 paired end reads ( a fasta file )
-                input3: a linker sequence ( a very small fasta file )
-                input4: a base quality score 454 file ( qual454 )
-            -->
-            <param name="input2" value="lastz_paired_input2.fasta" ftype="fasta" />
-            <param name="ref_source" value="cached" />
-            <param name="input1_2bit" value="/galaxy/data/hg18/seq/chr21.2bit" />
-            <param name="input3" value="lastz_paired_input3.fasta" ftype="fasta" />
-            <param name="input4" value="lastz_paired_input4.qual454" ftype="qual454" />
-            <param name="how_to_name" value="no" />
-            <output name="output1" file="lastz_paired_out1.sam" />
-        </test>
-    </tests>
-    <help>
-
-**What it does**
-
-**LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. This Galaxy version of LASTZ is geared towards aligning short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) paired reads against a reference sequence. There is excellent, extensive documentation on LASTZ available here_.
-
- .. _here: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02.00.html
-
-------
-
-**Input formats**
-
-LASTZ accepts reference and reads in FASTA format. However, because Galaxy supports implicit format conversion the tool will recognize fastq and other method specific formats.
-
-------
-
-**Outputs**
-
-This LASTZ tool produces a SAM file showing sequence alignments.
-
-**SAM output**
-
-SAM has 12 columns::
-
-                                   1     2     3         4   5    6  7         8     9                                    10                                     11  12
-  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-  HWI-EAS91_1_30788AAXX:1:2:1670:915    99  chr9  58119878  60  36M  =  58120234   392  GACCCCTACCCCACCGTGCTCTGGATCTCAGTGTTT   IIIIIIIIIIIIIIIIEIIIIIII7IIIIIIIIIII  XT:A:U  NM:i:0  SM:i:37  AM:i:37  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:36
-  HWI-EAS91_1_30788AAXX:1:2:1670:915   147  chr9  58120234  60  36M  =  58119878  -392  ATGAGTCGAATTCTATTTTCCAAACTGTTAACAAAA   IFIIDI;IIICIIIIIIIIIIIIIIIIIIIIIIIII  XT:A:U  NM:i:0  SM:i:37  AM:i:37  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:36
-
-
-where::
-
-     Column  Description
-  ---------  ---------------------------------------------------------------------
-   1. QNAME  Query (pair) NAME
-   2. FLAG   bitwise FLAG
-   3. RNAME  Reference sequence NAME
-   4. POS    1-based leftmost POSition/coordinate of clipped sequence
-   5. MAPQ   MAPping Quality (Phred-scaled)
-   6. CIGAR  extended CIGAR string
-   7. MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-   8. MPOS   1-based Mate POSition
-   9. ISIZE  Inferred insert SIZE
-  10. SEQ    query SEQuence on the same strand as the reference
-  11. QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12. OPT    variable OPTional fields in the format TAG:VTYPE:VALUE, tab-separated
-
-The flags are as follows::
-
-    Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-------
-
-**Do you want to modify the reference name?**
-
-This option allows you to set the name of the reference sequence manually. This is helpful when, for example, you would like to make the reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at the UCSC Genome Browser.
-
-------
-
-**LASTZ parameter list**
-
-This is an exhaustive list of LASTZ options. Once again, please note that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu::
-
-  target[[s..e]][-]       spec/file containing target sequence (fasta or nib)
-                          [s..e] defines a subrange of the file
-                          - indicates reverse-complement
-                          (use --help=files for more details)
-  query[[s..e]][-]        spec/file containing query sequences (fasta or nib)
-                          if absent, queries come from stdin (unless they
-                          aren't needed, as for --self or --tableonly)
-                          (use --help=files for more details)
-  --self                  the target sequence is also the query
-  --quantum               the query sequence contains quantum DNA
-  --seed=match&lt;length&gt;    use a word with no gaps instead of a seed pattern
-  --seed=half&lt;length&gt;     use space-free half-weight word instead of seed pattern
-  --match=&lt;reward&gt;[,&lt;penalty&gt;]   set the score values for a match (+&lt;reward&gt;)
-                          and mismatch (-&lt;penalty&gt;)
-  --[no]trans[ition][=2]         allow one or two transitions in a seed hit
-                          (by default a transition is allowed)
-  --word=&lt;bits&gt;           set max bits for word hash;  use this to trade time for
-                          memory, eliminating thrashing for heavy seeds
-                          (default is 28 bits)
-  --[no]filter=[&lt;T&gt;:]&lt;M&gt;     filter half-weight seed hits, requiring at least M
-                          matches and allowing no more than T transversions
-                          (default is no filtering)
-  --notwins               require just one seed hit
-  --twins=[&lt;min&gt;:]&lt;maxgap&gt;   require two nearby seed hits on the same diagonal
-                          (default is twins aren't required)
-  --notwins               allow single, isolated seeds
-  --[no]recoverseeds      avoid losing seeds in hash collisions. Cannot be used with --twins
-  --seedqueue=&lt;entries&gt;   set number of entries in seed hit queue
-                          (default is 262144)
-  --anchors=&lt;file&gt;        read anchors from a file, instead of discovering anchors
-                          via seeding
-  --recoverhits           recover hash-collision seed hits
-                          (default is not to recover seed hits)
-  --step=&lt;length&gt;         set step length (default is 1)
-  --maxwordcount=&lt;limit&gt;  words occurring more often than &lt;limit&gt; in the target
-                          are not eligible for seeds
-  --strand=both           search both strands
-  --strand=plus           search + strand only (matching strand of query spec)
-  --strand=minus          search - strand only (opposite strand of query spec)
-                          (by default both strands are searched)
-  --ambiguousn            treat N as an ambiguous nucleotide
-                          (by default N is treated as a sequence splicing character)
-  --[no]gfextend          perform gap-free extension of seed hits to HSPs
-                          (by default no extension is performed)
-  --[no]chain             perform chaining
-  --chain=&lt;diag,anti&gt;     perform chaining with given penalties for diagonal and
-                          anti-diagonal
-                          (by default no chaining is performed)
-  --[no]gapped            perform gapped alignment (instead of gap-free)
-                          (by default gapped alignment is performed)
-  --score[s]=&lt;file&gt;         read substitution scores from a file
-                          (default is HOXD70)
-  --unitscore[s]          scores are +1/-1 for match/mismatch
-  --gap=&lt;[open,]extend&gt;   set gap open and extend penalties (default is 400,30)
-  --xdrop=&lt;score&gt;         set x-drop threshold (default is 10*sub[A][A])
-  --ydrop=&lt;score&gt;         set y-drop threshold (default is open+300extend)
-  --infer[=&lt;control&gt;]     infer scores from the sequences, then use them
-  --inferonly[=&lt;control&gt;]   infer scores, but don't use them (requires --infscores)
-                          all inference options are read from the control file
-  --infscores[=&lt;file&gt;]    write inferred scores to a file
-  --hspthresh=&lt;score&gt;     set threshold for high scoring pairs (default is 3000)
-                          ungapped extensions scoring lower are discarded
-                          &lt;score&gt; can also be a percentage or base count
-  --entropy               adjust for entropy when qualifying HSPs in the x-drop extension
-                          method
-  --noentropy             don't adjust for entropy when qualifying HSPs
-  --exact=&lt;length&gt;        set threshold for exact matches
-                          if specified, exact matches are found rather than high
-                          scoring pairs (replaces --hspthresh)
-  --inner=&lt;score&gt;         set threshold for HSPs during interpolation
-                          (default is no interpolation)
-  --gappedthresh=&lt;score&gt;  set threshold for gapped alignments
-                          gapped extensions scoring lower are discarded
-                          &lt;score&gt; can also be a percentage or base count
-                          (default is to use same value as --hspthresh)
-  --ball=&lt;score&gt;          set minimum score required of words 'in' a quantum ball
-  --[no]entropy           involve entropy in filtering high scoring pairs
-                          (default is "entropy")
-  --[no]mirror            report/use mirror image of all gap-free alignments
-                          (default is "mirror" for self-alignments only)
-  --traceback=&lt;bytes&gt;     space for trace-back information
-                          (default is 80.0M)
-  --masking=&lt;count&gt;       mask any position in target hit this many times
-                          zero indicates no masking
-                          (default is no masking)
-  --targetcapsule=&lt;capsule_file&gt;   the target seed word position table and seed
-                          (as well as the target sequence)are read from specified file
-  --segments=&lt;segment_file&gt;   read segments from a file, instead of discovering
-                          them via seeding. Replaces other seeding or gap-free extension
-                          options
-  --[no]census[=&lt;file&gt;]     count/report how many times each target base aligns
-                          (default is to not report census)
-  --identity=&lt;min&gt;[..&lt;max&gt;]   filter alignments by percent identity
-                          0&lt;=min&lt;=max&lt;=100;  blocks (or HSPs) outside min..max
-                          are discarded
-                          (default is no identity filtering)
-  --coverage=&lt;min&gt;[..&lt;max&gt;]   filter alignments by percentage pf query covered
-                          0&lt;=min&lt;=max&lt;=100;  blocks (or HSPs) outside min..max
-                          are discarded
-                          (default is no query coverage filtering)
-  --notrivial             do not output trivial self-alignment block if the target and query
-                          sequences are identical. Using --self enables this option automatically
-  --output=&lt;output_file&gt;  write the alignments to the specified file name instead of stdout
-  --code=&lt;file&gt;           give quantum code for query sequence (only for display)
-  --format=&lt;type&gt;         specify output format; one of lav, axt, maf, maf+, maf-, text,
-                          lav+text, cigar, text, rdplot, general, or general:&lt;fields&gt;
-                          (by default output is LAV)
-  --rdotplot=&lt;file&gt;       create an additional output file suitable for plotting the alignments
-                          with the R statistical package.
-  --markend               Just before normal completion, write "# lastz end-of-file" to output file
-  --census[=&lt;output_file&gt;]    count and report how many times each target base aligns, up
-                          to 255. Ns are included in the count
-  --census16[=&lt;output_file&gt;]  count and report how many times each target base aligns, up
-                          up 65 thousand
-  --census32[=&lt;output_file&gt;]  count and report how many times each target bas aligns, up
-                          to 4 billion
-  --writecapsule=&lt;capsule_file&gt;    just write out a targegt capsule file and quit; don't
-                          search for seeds or perform subsequent stages
-  --verbosity=&lt;level&gt;     set info level (0 is minimum, 10 is everything)
-                          (default is 0)
-  --[no]runtime           report runtime in the output file
-                          (default is to not report runtime)
-  --tableonly[=count]     just produce the target position table, don't
-                          search for seeds
-  --[no]stats[=&lt;file&gt;]    show search statistics (or don't)
-                          (not available in this build)
-  --version               report the program version and quit
-  --help                  list all options
-  --help=files            list information about file specifiers
-  --help=short[cuts]      list blastz-compatible shortcuts
-  --help=yasra            list yasra-specific shortcuts
-
-    </help>
-</tool>
--- a/tools/sr_mapping/lastz_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,290 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs Lastz
-Written for Lastz v. 1.01.88.
-
-usage: lastz_wrapper.py [options]
-    --ref_name: The reference name to change all output matches to
-    --ref_source: Whether the reference is cached or from the history
-    --source_select: Whether to used pre-set or cached reference file
-    --input1: The name of the reference file if using history or reference base name if using cached
-    --input2: The reads file to align
-    --ref_sequences: The number of sequences in the reference file if using one from history
-    --pre_set_options: Which of the pre set options to use, if using pre-sets
-    --strand: Which strand of the read to search, if specifying all parameters
-    --seed: Seeding settings, if specifying all parameters
-    --gfextend: Whether to perform gap-free extension of seed hits to HSPs (high scoring segment pairs), if specifying all parameters
-    --chain: Whether to perform chaining of HSPs, if specifying all parameters
-    --transition: Number of transitions to allow in each seed hit, if specifying all parameters
-    --O: Gap opening penalty, if specifying all parameters
-    --E: Gap extension penalty, if specifying all parameters
-    --X: X-drop threshold, if specifying all parameters
-    --Y: Y-drop threshold, if specifying all parameters
-    --K: Threshold for HSPs, if specifying all parameters
-    --L: Threshold for gapped alignments, if specifying all parameters
-    --entropy: Whether to involve entropy when filtering HSPs, if specifying all parameters
-    --identity_min: Minimum identity (don't report matches under this identity)
-    --identity_max: Maximum identity (don't report matches above this identity)
-    --coverage: The minimum coverage value (don't report matches covering less than this)
-    --unmask: Whether to convert lowercase bases to uppercase
-    --out_format: The format of the output file (sam, diffs, or tabular (general))
-    --output: The name of the output file
-    --lastzSeqsFileDir: Directory of local lastz_seqs.loc file
-"""
-import optparse, os, subprocess, shutil, sys, tempfile, threading, time
-from Queue import Queue
-
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( 'bx-python' )
-from bx.seq.twobit import *
-from bx.seq.fasta import FastaReader
-from galaxy.util.bunch import Bunch
-
-STOP_SIGNAL = object()
-WORKERS = 4
-SLOTS = 128
-
-def stop_err( msg ):
-    sys.stderr.write( "%s" % msg )
-    sys.exit()
-
-def stop_queues( lastz, combine_data ):
-    # This method should only be called if an error has been encountered.
-    # Send STOP_SIGNAL to all worker threads
-    for t in lastz.threads:
-        lastz.put( STOP_SIGNAL, True )
-    combine_data.put( STOP_SIGNAL, True )
-
-class BaseQueue( object ):
-    def __init__( self, num_threads, slots=-1 ):
-        # Initialize the queue and worker threads
-        self.queue = Queue( slots )
-        self.threads = []
-        for i in range( num_threads ):
-            worker = threading.Thread( target=self.run_next )
-            worker.start()
-            self.threads.append( worker )
-    def run_next( self ):
-        # Run the next job, waiting until one is available if necessary
-        while True:
-            job = self.queue.get()
-            if job is STOP_SIGNAL:
-                return self.shutdown()
-            self.run_job( job )
-            time.sleep( 1 )
-    def run_job( self, job ):
-        stop_err( 'Not Implemented' )
-    def put( self, job, block=False ):
-        # Add a job to the queue
-        self.queue.put( job, block )
-    def shutdown( self ):
-        return
-
-class LastzJobQueue( BaseQueue ):
-    """
-    A queue that runs commands in parallel.  Blocking is done so the queue will
-    not consume much memory.
-    """
-    def run_job( self, job ):
-        # Execute the job's command
-        proc = subprocess.Popen( args=job.command, shell=True, stderr=subprocess.PIPE, )
-        proc.wait()
-        stderr = proc.stderr.read()
-        proc.wait()
-        if stderr:
-            stop_queues( self, job.combine_data_queue )
-            stop_err( stderr )
-        job.combine_data_queue.put( job )
-
-class CombineDataQueue( BaseQueue ):
-    """
-    A queue that concatenates files in serial.  Blocking is not done since this
-    queue is not expected to grow larger than the command queue.
-    """
-    def __init__( self, output_filename, num_threads=1 ):
-        BaseQueue.__init__( self, num_threads )
-        self.CHUNK_SIZE = 2**20 # 1Mb
-        self.output_file = open( output_filename, 'wb' )
-    def run_job( self, job ):
-        in_file = open( job.output, 'rb' )
-        while True:
-            chunk = in_file.read( self.CHUNK_SIZE )
-            if not chunk:
-                in_file.close()
-                break
-            self.output_file.write( chunk )
-        for file_name in job.cleanup:
-            os.remove( file_name )
-    def shutdown( self ):
-        self.output_file.close()
-        return
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '', '--ref_name', dest='ref_name', help='The reference name to change all output matches to' )
-    parser.add_option( '', '--ref_source', dest='ref_source', help='Whether the reference is cached or from the history' )
-    parser.add_option( '', '--ref_sequences', dest='ref_sequences', help='Number of sequences in the reference dataset' )
-    parser.add_option( '', '--source_select', dest='source_select', help='Whether to used pre-set or cached reference file' )
-    parser.add_option( '', '--input1', dest='input1', help='The name of the reference file if using history or reference base name if using cached' )
-    parser.add_option( '', '--input2', dest='input2', help='The reads file to align' )
-    parser.add_option( '', '--pre_set_options', dest='pre_set_options', help='Which of the pre set options to use, if using pre-sets' )
-    parser.add_option( '', '--strand', dest='strand', help='Which strand of the read to search, if specifying all parameters' )
-    parser.add_option( '', '--seed', dest='seed', help='Seeding settings, if specifying all parameters' )
-    parser.add_option( '', '--transition', dest='transition', help='Number of transitions to allow in each seed hit, if specifying all parameters' )
-    parser.add_option( '', '--gfextend', dest='gfextend', help='Whether to perform gap-free extension of seed hits to HSPs (high scoring segment pairs), if specifying all parameters' )
-    parser.add_option( '', '--chain', dest='chain', help='Whether to perform chaining of HSPs, if specifying all parameters' )
-    parser.add_option( '', '--O', dest='O', help='Gap opening penalty, if specifying all parameters' )
-    parser.add_option( '', '--E', dest='E', help='Gap extension penalty, if specifying all parameters' )
-    parser.add_option( '', '--X', dest='X', help='X-drop threshold, if specifying all parameters' )
-    parser.add_option( '', '--Y', dest='Y', help='Y-drop threshold, if specifying all parameters' )
-    parser.add_option( '', '--K', dest='K', help='Threshold for HSPs, if specifying all parameters' )
-    parser.add_option( '', '--L', dest='L', help='Threshold for gapped alignments, if specifying all parameters' )
-    parser.add_option( '', '--entropy', dest='entropy', help='Whether to involve entropy when filtering HSPs, if specifying all parameters' )
-    parser.add_option( '', '--identity_min', dest='identity_min', help="Minimum identity (don't report matches under this identity)" )
-    parser.add_option( '', '--identity_max', dest='identity_max', help="Maximum identity (don't report matches above this identity)" )
-    parser.add_option( '', '--coverage', dest='coverage', help="The minimum coverage value (don't report matches covering less than this)" )
-    parser.add_option( '', '--unmask', dest='unmask', help='Whether to convert lowercase bases to uppercase' )
-    parser.add_option( '', '--out_format', dest='format', help='The format of the output file (sam, diffs, or tabular (general))' )
-    parser.add_option( '', '--output', dest='output', help='The output file' )
-    parser.add_option( '', '--lastzSeqsFileDir', dest='lastzSeqsFileDir', help='Directory of local lastz_seqs.loc file' )
-    ( options, args ) = parser.parse_args()
-
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='lastz -v', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( '%s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Lastz version\n' )
-
-    if options.unmask == 'yes':
-        unmask = '[unmask]'
-    else:
-        unmask = ''
-    if options.ref_name:
-        ref_name = '[nickname=%s]' % options.ref_name
-    else:
-        ref_name = ''
-    # Prepare for commonly-used preset options
-    if options.source_select == 'pre_set':
-        set_options = '--%s' % options.pre_set_options
-    # Prepare for user-specified options
-    else:
-        set_options = '--%s --%s --gapped --strand=%s --seed=%s --%s O=%s E=%s X=%s Y=%s K=%s L=%s --%s' % \
-                    ( options.gfextend, options.chain, options.strand, options.seed, options.transition,
-                      options.O, options.E, options.X, options.Y, options.K, options.L, options.entropy )
-    # Specify input2 and add [fullnames] modifier if output format is diffs
-    if options.format == 'diffs':
-        input2 = '%s[fullnames]' % options.input2
-    else:
-        input2 = options.input2
-    if options.format == 'tabular':
-        # Change output format to general if it's tabular and add field names for tabular output
-        format = 'general-'
-        tabular_fields = ':score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle'
-    elif options.format == 'sam':
-        # We currently ALWAYS suppress SAM headers.
-        format = 'sam-'
-        tabular_fields = ''
-    else:
-        format = options.format
-        tabular_fields = ''
-
-    # Set up our queues
-    lastz_job_queue = LastzJobQueue( WORKERS, slots=SLOTS )
-    combine_data_queue = CombineDataQueue( options.output )
-
-    if options.ref_source == 'history':
-        # Reference is a fasta dataset from the history, so split job across
-        # the number of sequences in the dataset ( this could be a HUGE number )
-        try:
-            # Ensure there is at least 1 sequence in the dataset ( this may not be necessary ).
-            error_msg = "The reference dataset is missing metadata, click the pencil icon in the history item and 'auto-detect' the metadata attributes."
-            ref_sequences = int( options.ref_sequences )
-            if ref_sequences < 1:
-                stop_queues( lastz_job_queue, combine_data_queue )
-                stop_err( error_msg )
-        except:
-            stop_queues( lastz_job_queue, combine_data_queue )
-            stop_err( error_msg )
-        seqs = 0
-        fasta_reader = FastaReader( open( options.input1 ) )
-        while True:
-            # Read the next sequence from the reference dataset
-            seq = fasta_reader.next()
-            if not seq:
-                break
-            seqs += 1
-            # Create a temporary file to contain the current sequence as input to lastz
-            tmp_in_fd, tmp_in_name = tempfile.mkstemp( suffix='.in' )
-            tmp_in = os.fdopen( tmp_in_fd, 'wb' )
-            # Write the current sequence to the temporary input file
-            tmp_in.write( '>%s\n%s\n' % ( seq.name, seq.text ) )
-            tmp_in.close()
-            # Create a 2nd temporary file to contain the output from lastz execution on the current sequence
-            tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' )
-            os.close( tmp_out_fd )
-            # Generate the command line for calling lastz on the current sequence
-            command = 'lastz %s%s%s %s %s --ambiguousn --nolaj --identity=%s..%s --coverage=%s --format=%s%s > %s' % \
-                ( tmp_in_name, unmask, ref_name, input2, set_options, options.identity_min,
-                  options.identity_max, options.coverage, format, tabular_fields, tmp_out_name )
-            # Create a job object
-            job = Bunch()
-            job.command = command
-            job.output = tmp_out_name
-            job.cleanup = [ tmp_in_name, tmp_out_name ]
-            job.combine_data_queue = combine_data_queue
-            # Add another job to the lastz_job_queue. Execution
-            # will wait at this point if the queue is full.
-            lastz_job_queue.put( job, block=True )
-        # Make sure the value of sequences in the metadata is the same as the
-        # number of sequences read from the dataset ( this may not be necessary ).
-        if ref_sequences != seqs:
-            stop_queues( lastz_job_queue, combine_data_queue )
-            stop_err( "The value of metadata.sequences (%d) differs from the number of sequences read from the reference (%d)." % ( ref_sequences, seqs ) )
-    else:
-        # Reference is a locally cached 2bit file, split job across number of chroms in 2bit file
-        tbf = TwoBitFile( open( options.input1, 'r' ) )
-        for chrom in tbf.keys():
-            # Create a temporary file to contain the output from lastz execution on the current chrom
-            tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' )
-            os.close( tmp_out_fd )
-            command = 'lastz %s/%s%s%s %s %s --ambiguousn --nolaj --identity=%s..%s --coverage=%s --format=%s%s >> %s' % \
-                ( options.input1, chrom, unmask, ref_name, input2, set_options, options.identity_min,
-                  options.identity_max, options.coverage, format, tabular_fields, tmp_out_name )
-            # Create a job object
-            job = Bunch()
-            job.command = command
-            job.output = tmp_out_name
-            job.cleanup = [ tmp_out_name ]
-            job.combine_data_queue = combine_data_queue
-            # Add another job to the lastz_job_queue. Execution
-            # will wait at this point if the queue is full.
-            lastz_job_queue.put( job, block=True )
-
-    # Stop the lastz_job_queue
-    for t in lastz_job_queue.threads:
-        lastz_job_queue.put( STOP_SIGNAL, True )
-    # Although all jobs are submitted to the queue, we can't shut down the combine_data_queue
-    # until we know that all jobs have been submitted to its queue.  We do this by checking
-    # whether all of the threads in the lastz_job_queue have terminated.
-    while threading.activeCount() > 2:
-        time.sleep( 1 )
-    # Now it's safe to stop the combine_data_queue
-    combine_data_queue.put( STOP_SIGNAL )
-
-if __name__=="__main__": __main__()
--- a/tools/sr_mapping/lastz_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,524 +0,0 @@
-<tool id="lastz_wrapper_2" name="Lastz" version="1.2.2">
-    <description> map short reads against reference sequence</description>
-    <command interpreter="python">lastz_wrapper.py
-      #if $seq_name.how_to_name=="yes":
-        --ref_name=$seq_name.ref_name
-      #end if
-      --ref_source=$source.ref_source
-      --source_select=$params.source_select
-      --out_format=$out_format
-      --input2=$input2
-      #if $source.ref_source=="history":
-        --input1=$source.input1
-        --ref_sequences=$input1.metadata.sequences
-      #else:
-        --input1="${ filter( lambda x: str( x[0] ) == str( $source.input1_2bit ), $__app__.tool_data_tables[ 'lastz_seqs' ].get_fields() )[0][-1] }"
-        --ref_sequences="None"
-      #end if
-      #if $params.source_select=="pre_set":
-        --pre_set_options=${params.pre_set_options}
-      #else:
-        --strand=$params.strand
-        --seed=$params.seed
-        --gfextend=$params.gfextend
-        --chain=$params.chain
-        --transition="$params.transition"
-        --O=$params.O
-        --E=$params.E
-        --X=$params.X
-        --Y=$params.Y
-        --K=$params.K
-        --L=$params.L
-        --entropy=$params.entropy
-      #end if
-      --identity_min=$min_ident
-      --identity_max=$max_ident
-      --coverage=$min_cvrg
-      --output=$output1
-      --unmask=$unmask
-      --lastzSeqsFileDir=${GALAXY_DATA_INDEX_DIR}
-    </command>
-    <inputs>
-        <param name="input2" format="fasta" type="data" label="Align sequencing reads in" />
-        <conditional name="source">
-            <param name="ref_source" type="select" label="Against reference sequences that are">
-                <option value="cached">locally cached</option>
-                <option value="history">in your history</option>
-            </param>
-            <when value="cached">
-                <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
-                    <options from_data_table="lastz_seqs" />
-                </param>
-            </when>
-            <when value="history">
-                <param name="input1" type="data" format="fasta" label="Select a reference dataset" />
-            </when>
-        </conditional>
-        <param name="out_format" type="select" label="Output format">
-            <option value="sam">SAM</option>
-            <option value="diffs">Polymorphisms</option>
-            <option value="tabular">Tabular</option>
-        </param>
-        <conditional name="params">
-            <param name="source_select" type="select" label="Lastz settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
-                <option value="pre_set">Commonly used</option>
-                <option value="full">Full Parameter List</option>
-            </param>
-            <when value="pre_set">
-                <param name="pre_set_options" type="select" label="Select mapping mode">
-                    <option value="yasra98">Roche-454 98% identity</option>
-                    <option value="yasra95">Roche-454 95% identity</option>
-                    <option value="yasra90">Roche-454 90% identity</option>
-                    <option value="yasra85">Roche-454 85% identity</option>
-                    <option value="yasra75">Roche-454 75% identity</option>
-                    <option value="yasra95short">Illumina 95% identity</option>
-                    <option value="yasra85short">Illumina 85% identity</option>
-                </param>
-            </when>
-            <when value="full">
-                <param name="strand" type="select" label="Which strand to search?">
-                    <option value="both">Both</option>
-                    <option value="plus">Search forward strand only (the one in the reference)</option>
-                    <option value="minus">Search the reverse complement strand only (opposite of the reference)</option>
-                </param>
-                <param name="seed" type="select" label="Select seeding settings" help="allows you set word size and number of mismatches">
-                    <option value="12of19">Seed hits require a 19 bp word with matches in 12 specific positions</option>
-                    <option value="14of22">Seed hits require a 22 bp word with matches in 14 specific positions</option>
-                </param>
-                <param name="transition" type="select" label="Select transition settings" help="affects the number of allowed transition substitutions">
-                    <option value="transition">Allow one transition in each seed hit</option>
-                    <option value="transition=2">Allow two transitions in a seed hit </option>
-                    <option value="notransition">Don't allow any transitions in seed hits</option>
-                </param>
-                <param name="gfextend" type="select" label="Perform gap-free extension of seed hits to HSPs (high scoring segment pairs)?">
-                    <option value="nogfextend">No</option>
-                    <option value="gfextend">Yes</option>
-                </param>
-                <param name="chain" type="select" label="Perform chaining of HSPs?">
-                    <option value="nochain">No</option>
-                    <option value="chain">Yes</option>
-                </param>
-                <param name="O" type="integer" size="5" value="400" label="Gap opening penalty"/>
-                <param name="E" type="integer" size="5" value="30" label="Gap extension penalty"/>
-                <param name="X" type="integer" size="5" value="910" label="X-drop threshold"/>
-                <param name="Y" type="integer" size="5" value="9370" label="Y-drop threshold"/>
-                <param name="K" type="integer" size="5" value="3000" label="Set the threshold for HSPs (ungapped extensions scoring lower are discarded)"/>
-                <param name="L" type="integer" size="5" value="3000" label="Set the threshold for gapped alignments (gapped extensions scoring lower are discarded)"/>
-                <param name="entropy" type="select" label="Involve entropy when filtering HSPs?">
-                    <option value="noentropy">No</option>
-                    <option value="entropy">Yes</option>
-                </param>
-            </when>
-        </conditional>
-        <conditional name="seq_name">
-            <param name="how_to_name" type="select" label="Do you want to modify the reference name?">
-                <option value="no">No</option>
-                <option value="yes">Yes</option>
-            </param>
-            <when value="yes">
-                <param name="ref_name" type="text" size="25" value="Type sequence name here" label="Enter name for the Reference sequence"/>
-            </when>
-            <when value="no" />
-        </conditional>
-        <param name="min_ident" type="integer" size="3" value="0" label="Do not report matches below this identity (%)"/>
-        <param name="max_ident" type="integer" size="3" value="100" label="Do not report matches above this identity (%)"/>
-        <param name="min_cvrg" type="integer" size="3" value="0" label="Do not report matches that cover less than this percentage of each read"/>
-        <param name="unmask" type="select" label="Convert lowercase bases to uppercase">
-            <option value="yes">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="output1" label="${tool.name} on ${on_string}: mapped reads">
-            <change_format>
-                <when input="out_format" value="sam" format="sam" />
-            </change_format>
-        </data>
-    </outputs>
-    <requirements>
-        <requirement type="package">lastz</requirement>
-    </requirements>
-    <tests>
-        <test>
-            <!--
-            Lastz command:
-            lastz phiX.2bit/phiX174[nickname=Ref] test-data/b1.fasta +nogfextend +nochain +gapped +strand=both +seed=12of19 +transition O=400 E=30 X=910 Y=9370 K=3000 L=3000 +noentropy +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=sam- > lastz_wrapper_out2.sam
-            You need to point to phiX.2bit somewhere on your system. b1.fasta is located in galaxy's test-data.  You will have to replace all the pluses before the
-            commands with 2 dashes, as double-dash can't appear in an XML comment.
-            -->
-            <param name="input2" value="b1.fasta" ftype="fasta" />
-            <param name="ref_source" value="cached" />
-            <!-- this is the backwards-compatible "unique value" for this file, not an actual path -->
-            <param name="input1_2bit" value="/galaxy/data/phiX/seq/phiX.2bit" />
-            <param name="out_format" value="sam" />
-            <param name="source_select" value="full" />
-            <param name="strand" value="both" />
-            <param name="seed" value="12of19" />
-            <param name="transition" value="transition" />
-            <param name="gfextend" value="nogfextend" />
-            <param name="chain" value="nochain" />
-            <param name="O" value="400" />
-            <param name="E" value="30" />
-            <param name="X" value="910" />
-            <param name="Y" value="9370" />
-            <param name="K" value="3000" />
-            <param name="L" value="3000" />
-            <param name="entropy" value="noentropy" />
-            <!--
-            how_to_name is not the default. It is changed to modify
-            input1_2bit by adding the ref_name as a nickname
-            -->
-            <param name="how_to_name" value="yes" />
-            <param name="ref_name" value="Ref" />
-            <param name="min_ident" value="0" />
-            <param name="max_ident" value="100" />
-            <param name="min_cvrg" value="0" />
-            <param name="unmask" value="yes" />
-            <output name="output1" file="lastz_wrapper_out2.sam" />
-        </test>
-        <test>
-            <!--
-            Lastz command:
-            lastz test-data/phiX.fasta test-data/b1.fasta[fullnames] +yasra95short +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=diffs > lastz_wrapper_out3.tabular
-            phiX.fasta and b1.fasta are located in galaxy's test-data.  You will have to replace all the pluses before the commands with 2 dashes,
-            as double-dash can't appear in an XML comment.
-            -->
-            <param name="input2" value="b1.fasta" ftype="fasta" />
-            <param name="ref_source" value="history" />
-            <param name="input1" value="phiX.fasta" ftype="fasta" />
-            <param name="out_format" value="diffs" />
-            <param name="source_select" value="pre_set" />
-            <param name="pre_set_options" value="yasra95short" />
-            <param name="how_to_name" value="no" />
-            <param name="min_ident" value="0" />
-            <param name="max_ident" value="100" />
-            <param name="min_cvrg" value="0" />
-            <param name="unmask" value="yes" />
-            <output name="output1" file="lastz_wrapper_out3.tabular" />
-        </test>
-        <test>
-            <!--
-            Lastz command: first you will need to split the file phiX_split.fasta into two files,
-            phiX1.fasta and phiX2.fasta, each with 1 sequence (phiX1 and phiX2, respectively). Then:
-            lastz phiX1.fasta test-data/b1.fasta *yasra95short *ambiguousn *nolaj *identity=0..100 *coverage=0 *format=general-:score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle > lastz_wrapper_out4.tabular
-            lastz phiX2.fasta test-data/b1.fasta *yasra95short *ambiguousn *nolaj *identity=0..100 *coverage=0 *format=general-:score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle >> lastz_wrapper_out4.tabular
-            You need to point to phiX1.fasta and phiX2.fasta somewhere on your system.
-            phiX_split.fasta and b1.fasta are located in galaxy's test-data
-            You will have to replace all the asterisks before the commands with 2 dashes,
-            as double-dash can't appear in an XML comment
-
-            NOTE: since the input file include more than 1 sequence, the output must be sorted in
-            order for functional test to pass.  This is done using the sort="True" attribute on the output.
-            -->
-            <param name="input2" value="b1.fasta" ftype="fasta" />
-            <param name="ref_source" value="history" />
-            <param name="input1" value="phiX_split.fasta" ftype="fasta"  />
-            <param name="out_format" value="tabular" />
-            <param name="source_select" value="pre_set" />
-            <param name="pre_set_options" value="yasra95short" />
-            <param name="how_to_name" value="no" />
-            <param name="min_ident" value="0" />
-            <param name="max_ident" value="100" />
-            <param name="min_cvrg" value="0" />
-            <param name="unmask" value="yes" />
-            <output name="output1" file="lastz_wrapper_out4.tabular" sort="True" />
-        </test>
-    </tests>
-    <help>
-
-**What it does**
-
-**LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. This Galaxy version of LASTZ is geared towards aligning short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) reads against a reference sequence. There is excellent, extensive documentation on LASTZ available here_.
-
- .. _here: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02.00.html
-
-------
-
-**Input formats**
-
-LASTZ accepts reference and reads in FASTA format. However, because Galaxy supports implicit format conversion the tool will recognize fastq and other method specific formats.
-
-------
-
-**Outputs**
-
-LASTZ generates one output. Depending on the choice you make in the *Select output format* drop-down, LASTZ will produce a SAM file showing sequence alignments, a list of differences between the reads and reference (Polymorphisms), or a general table with one line per alignment block (Tabular). Examples of these outputs are shown below.
-
-**SAM output**
-
-SAM has 12 columns::
-
-                                   1     2     3         4   5    6  7         8     9                                    10                                     11  12
-  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-  HWI-EAS91_1_30788AAXX:1:2:1670:915    99  chr9  58119878  60  36M  =  58120234   392  GACCCCTACCCCACCGTGCTCTGGATCTCAGTGTTT   IIIIIIIIIIIIIIIIEIIIIIII7IIIIIIIIIII  XT:A:U  NM:i:0  SM:i:37  AM:i:37  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:36
-  HWI-EAS91_1_30788AAXX:1:2:1670:915   147  chr9  58120234  60  36M  =  58119878  -392  ATGAGTCGAATTCTATTTTCCAAACTGTTAACAAAA   IFIIDI;IIICIIIIIIIIIIIIIIIIIIIIIIIII  XT:A:U  NM:i:0  SM:i:37  AM:i:37  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:36
-
-
-where::
-
-     Column  Description
-  ---------  ---------------------------------------------------------------------
-   1. QNAME  Query (pair) NAME
-   2. FLAG   bitwise FLAG
-   3. RNAME  Reference sequence NAME
-   4. POS    1-based leftmost POSition/coordinate of clipped sequence
-   5. MAPQ   MAPping Quality (Phred-scaled)
-   6. CIGAR  extended CIGAR string
-   7. MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
-   8. MPOS   1-based Mate POSition
-   9. ISIZE  Inferred insert SIZE
-  10. SEQ    query SEQuence on the same strand as the reference
-  11. QUAL   query QUALity (ASCII-33 gives the Phred base quality)
-  12. OPT    variable OPTional fields in the format TAG:VTYPE:VALUE, tab-separated
-
-The flags are as follows::
-
-    Flag  Description
-  ------  -------------------------------------
-  0x0001  the read is paired in sequencing
-  0x0002  the read is mapped in a proper pair
-  0x0004  the query sequence itself is unmapped
-  0x0008  the mate is unmapped
-  0x0010  strand of the query (1 for reverse)
-  0x0020  strand of the mate
-  0x0040  the read is the first read in a pair
-  0x0080  the read is the second read in a pair
-  0x0100  the alignment is not primary
-
-**Polymorphism (SNP or differences) output**
-
-Polymorphism output contains 14 columns::
-
-     1     2     3  4     5                                   6   7   8  9  10  11 12                                   13                                    14
-  --------------------------------------------------------------------------------------------------------------------------------------------------------------
-  chrM  2490  2491  +  5386  HWI-EAS91_1_306UPAAXX:6:1:486:822   10  11  -  36  C  A  ACCTGTTTTACAGACACCTAAAGCTACATCGTCAAC  ACCTGTTTTAAAGACACCTAAAGCTACATCGTCAAC
-  chrM  2173  2174  +  5386  HWI-EAS91_1_306UPAAXX:6:1:259:1389  26  27  +  36  G  T  GCGTACTTATTCGCCACCATGATTATGACCAGTGTT  GCGTACTTATTCGCCACCATGATTATTACCAGTGTT
-
-where::
-
-  1. (chrM)   - Reference sequence id
-  2. (2490)   - Start position of the difference in the reference
-  3. (2491)   - End position of the difference in the reference
-  4. (+)      - Strand of the reference (always plus)
-  5. (5386)   - Length of the reference sequence
-  6. (HWI...) - read id
-  7. (10)     - Start position of the difference in the read
-  8. (11)     - End position of the difference in the read
-  9. (+)      - Strand of the read
- 10. (36)     - Length of the read
- 11. (C)      - Nucleotide in the reference
- 12. (A)      - Nucleotide in the read
- 13. (ACC...) - Reference side os the alignment
- 14. (ACC...) - Read side of the alignment
-
-**Tabular output**
-
-Tabular output is a tab-separated format with 30 columns::
-
-   1        2  3     4     5     6     7   8                 9              10  11   12   13   14   15   16   17   18  19                20                21   22     23      24      25    26    27    28    29  30
-  -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-  14  PHIX174  +  5386  4648  4647  4661  14  ATTTTCGTGATATT    EYKX4VC01BV8HS  +   204  154  153  167  154  153  167  14  ATTTTCGTGATATT    ..............    14M  14/14  100.0%  14/204  6.9%  0/14  0.0%  4494  NA
-  16  PHIX174  +  5386  3363  3362  3378  16  GACGCCGGATTTGAGA  EYKX4VC01AWJ88  -   259   36   35   51  209  208  224  16  GACGCCGGATTTGAGA  ................  16M  16/16  100.0%  16/259  6.2%  0/16  0.0%  3327  NA
-
-The following columns are present::
-
-             Field  Meaning
-  ----------------  -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-   1.        score  Score of the alignment block. The scale and meaning of this number will vary, depending on the final stage performed and other command-line options.
-   2.        name1  Name of the target sequence.
-   3.      strand1  Target sequence strand, either "+" or "−".
-   4.        size1  Size of the entire target sequence.
-   5.       start1  Starting position of the alignment block in the target, origin-one.
-   6.      zstart1  Starting position of the alignment block in the target, origin-zero.
-   7.         end1  Ending position of the alignment block in the target, expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems).
-   8.      length1  Length of the alignment block in the target (excluding gaps).
-   9.        text1  Aligned characters in the target, including gap characters.
-  10.        name2  Name of the query sequence.
-  11.      strand2  Query sequence strand, either "+" or "−".
-  12.        size2  Size of the entire query sequence.
-  13.       start2  Starting position of the alignment block in the query, origin-one.
-  14.      zstart2  Starting position of the alignment block in the query, origin-zero.
-  15.         end2  Ending position of the alignment block in the query, expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems).
-  16.      start2+  Starting position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), origin-one. Note that if strand2 is "−", then this is the other end of the block from start2.
-  17.     zstart2+  Starting position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), origin-zero. Note that if strand2 is "−", then this is the other end of the block from zstart2.
-  18.        end2+  Ending position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems). Note that if strand2 is "−", then this is the other end of the block from end2.
-  19.      length2  Length of the alignment block in the query (excluding gaps).
-  20.        text2  Aligned characters in the query, including gap characters.
-  21.         diff  Differences between what would be written for text1 and text2. Matches are written as . (period), transitions as : (colon), transversions as X, and gaps as - (hyphen).
-  22.        cigar  A CIGAR-like representation of the alignment's path through the Dynamic Programming matrix. This is the short representation, without spaces, described in the Ensembl CIGAR specification.
-  23./24. identity  Fraction of aligned bases in the block that are matches (see Identity). This is written as two fields. The first field is a fraction, written as &lt;n&gt;/&lt;d&gt;. The second field contains the same value, computed as a percentage.
-  25./26. coverage  Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (see Coverage). This is written as two fields. The first field is a fraction, written as &lt;n&gt;/&lt;d&gt;. The second field contains the same value, computed as a percentage.
-  27./28.  gaprate  Rate of gaps (also called indels) in the alignment block. This is written as two fields. The first field is a fraction, written as &lt;n&gt;/&lt;d&gt;, with the numerator being the number of alignment columns containing gaps and the denominator being the number without gaps. The second field contains the same value, computed as a percentage.
-  29.     diagonal  The diagonal of the start of the alignment block in the dynamic programming matrix, expressed as an identifying number start1-start2.
-  30.      shingle  A measurement of the shingle overlap between the target and the query. This is intended for the case where both the target and query are relatively short, and their ends are expected to overlap.
-
--------
-
-**LASTZ Settings**
-
-There are two setting modes: (1) **Commonly used settings** and (2) **Full Parameter List**.
-
-**Commonly used settings**
-
-There are seven modes::
-
-  Illumina-Solexa/AB-SOLiD 95% identity
-  Illumina-Solexa/AB-SOLiD 85% identity
-  Roche-454 98% identity
-  Roche-454 95% identity
-  Roche-454 90% identity
-  Roche-454 85% identity
-  Roche-454 75% identity
-
-When deciding which one to use, consider the following: a 36 bp read with two differences will be 34/36 = 94% identical to the reference.
-
-**Full Parameter List**
-
-This mode gives you fuller control over lastz. The description of these and other parameters is found at the end of this page. Note that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu.
-
-------
-
-**Do you want to modify the reference name?**
-
-This option allows you to set the name of the reference sequence manually. This is helpful when, for example, you would like to make the reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at the UCSC Genome Browser.
-
-------
-
-**LASTZ parameter list**
-
-This is an exhaustive list of LASTZ options. Once again, please note that not all options are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu::
-
-  target[[s..e]][-]       spec/file containing target sequence (fasta or nib)
-                          [s..e] defines a subrange of the file
-                          - indicates reverse-complement
-                          (use --help=files for more details)
-  query[[s..e]][-]        spec/file containing query sequences (fasta or nib)
-                          if absent, queries come from stdin (unless they
-                          aren't needed, as for --self or --tableonly)
-                          (use --help=files for more details)
-  --self                  the target sequence is also the query
-  --quantum               the query sequence contains quantum DNA
-  --seed=match&lt;length&gt;    use a word with no gaps instead of a seed pattern
-  --seed=half&lt;length&gt;     use space-free half-weight word instead of seed pattern
-  --match=&lt;reward&gt;[,&lt;penalty&gt;]   set the score values for a match (+&lt;reward&gt;)
-                          and mismatch (-&lt;penalty&gt;)
-  --[no]trans[ition][=2]         allow one or two transitions in a seed hit
-                          (by default a transition is allowed)
-  --word=&lt;bits&gt;           set max bits for word hash;  use this to trade time for
-                          memory, eliminating thrashing for heavy seeds
-                          (default is 28 bits)
-  --[no]filter=[&lt;T&gt;:]&lt;M&gt;     filter half-weight seed hits, requiring at least M
-                          matches and allowing no more than T transversions
-                          (default is no filtering)
-  --notwins               require just one seed hit
-  --twins=[&lt;min&gt;:]&lt;maxgap&gt;   require two nearby seed hits on the same diagonal
-                          (default is twins aren't required)
-  --notwins               allow single, isolated seeds
-  --[no]recoverseeds      avoid losing seeds in hash collisions. Cannot be used with --twins
-  --seedqueue=&lt;entries&gt;   set number of entries in seed hit queue
-                          (default is 262144)
-  --anchors=&lt;file&gt;        read anchors from a file, instead of discovering anchors
-                          via seeding
-  --recoverhits           recover hash-collision seed hits
-                          (default is not to recover seed hits)
-  --step=&lt;length&gt;         set step length (default is 1)
-  --maxwordcount=&lt;limit&gt;  words occurring more often than &lt;limit&gt; in the target
-                          are not eligible for seeds
-  --strand=both           search both strands
-  --strand=plus           search + strand only (matching strand of query spec)
-  --strand=minus          search - strand only (opposite strand of query spec)
-                          (by default both strands are searched)
-  --ambiguousn            treat N as an ambiguous nucleotide
-                          (by default N is treated as a sequence splicing character)
-  --[no]gfextend          perform gap-free extension of seed hits to HSPs
-                          (by default no extension is performed)
-  --[no]chain             perform chaining
-  --chain=&lt;diag,anti&gt;     perform chaining with given penalties for diagonal and
-                          anti-diagonal
-                          (by default no chaining is performed)
-  --[no]gapped            perform gapped alignment (instead of gap-free)
-                          (by default gapped alignment is performed)
-  --score[s]=&lt;file&gt;         read substitution scores from a file
-                          (default is HOXD70)
-  --unitscore[s]          scores are +1/-1 for match/mismatch
-  --gap=&lt;[open,]extend&gt;   set gap open and extend penalties (default is 400,30)
-  --xdrop=&lt;score&gt;         set x-drop threshold (default is 10*sub[A][A])
-  --ydrop=&lt;score&gt;         set y-drop threshold (default is open+300extend)
-  --infer[=&lt;control&gt;]     infer scores from the sequences, then use them
-  --inferonly[=&lt;control&gt;]   infer scores, but don't use them (requires --infscores)
-                          all inference options are read from the control file
-  --infscores[=&lt;file&gt;]    write inferred scores to a file
-  --hspthresh=&lt;score&gt;     set threshold for high scoring pairs (default is 3000)
-                          ungapped extensions scoring lower are discarded
-                          &lt;score&gt; can also be a percentage or base count
-  --entropy               adjust for entropy when qualifying HSPs in the x-drop extension
-                          method
-  --noentropy             don't adjust for entropy when qualifying HSPs
-  --exact=&lt;length&gt;        set threshold for exact matches
-                          if specified, exact matches are found rather than high
-                          scoring pairs (replaces --hspthresh)
-  --inner=&lt;score&gt;         set threshold for HSPs during interpolation
-                          (default is no interpolation)
-  --gappedthresh=&lt;score&gt;  set threshold for gapped alignments
-                          gapped extensions scoring lower are discarded
-                          &lt;score&gt; can also be a percentage or base count
-                          (default is to use same value as --hspthresh)
-  --ball=&lt;score&gt;          set minimum score required of words 'in' a quantum ball
-  --[no]entropy           involve entropy in filtering high scoring pairs
-                          (default is "entropy")
-  --[no]mirror            report/use mirror image of all gap-free alignments
-                          (default is "mirror" for self-alignments only)
-  --traceback=&lt;bytes&gt;     space for trace-back information
-                          (default is 80.0M)
-  --masking=&lt;count&gt;       mask any position in target hit this many times
-                          zero indicates no masking
-                          (default is no masking)
-  --targetcapsule=&lt;capsule_file&gt;   the target seed word position table and seed
-                          (as well as the target sequence)are read from specified file
-  --segments=&lt;segment_file&gt;   read segments from a file, instead of discovering
-                          them via seeding. Replaces other seeding or gap-free extension
-                          options
-  --[no]census[=&lt;file&gt;]     count/report how many times each target base aligns
-                          (default is to not report census)
-  --identity=&lt;min&gt;[..&lt;max&gt;]   filter alignments by percent identity
-                          0&lt;=min&lt;=max&lt;=100;  blocks (or HSPs) outside min..max
-                          are discarded
-                          (default is no identity filtering)
-  --coverage=&lt;min&gt;[..&lt;max&gt;]   filter alignments by percentage pf query covered
-                          0&lt;=min&lt;=max&lt;=100;  blocks (or HSPs) outside min..max
-                          are discarded
-                          (default is no query coverage filtering)
-  --notrivial             do not output trivial self-alignment block if the target and query
-                          sequences are identical. Using --self enables this option automatically
-  --output=&lt;output_file&gt;  write the alignments to the specified file name instead of stdout
-  --code=&lt;file&gt;           give quantum code for query sequence (only for display)
-  --format=&lt;type&gt;         specify output format; one of lav, axt, maf, maf+, maf-, text,
-                          lav+text, cigar, text, rdplot, general, or general:&lt;fields&gt;
-                          (by default output is LAV)
-  --rdotplot=&lt;file&gt;       create an additional output file suitable for plotting the alignments
-                          with the R statistical package.
-  --markend               Just before normal completion, write "# lastz end-of-file" to output file
-  --census[=&lt;output_file&gt;]    count and report how many times each target base aligns, up
-                          to 255. Ns are included in the count
-  --census16[=&lt;output_file&gt;]  count and report how many times each target base aligns, up
-                          up 65 thousand
-  --census32[=&lt;output_file&gt;]  count and report how many times each target bas aligns, up
-                          to 4 billion
-  --writecapsule=&lt;capsule_file&gt;    just write out a target capsule file and quit; don't
-                          search for seeds or perform subsequent stages
-  --verbosity=&lt;level&gt;     set info level (0 is minimum, 10 is everything)
-                          (default is 0)
-  --[no]runtime           report runtime in the output file
-                          (default is to not report runtime)
-  --tableonly[=count]     just produce the target position table, don't
-                          search for seeds
-  --[no]stats[=&lt;file&gt;]    show search statistics (or don't)
-                          (not available in this build)
-  --version               report the program version and quit
-  --help                  list all options
-  --help=files            list information about file specifiers
-  --help=short[cuts]      list blastz-compatible shortcuts
-  --help=yasra            list yasra-specific shortcuts
-
-    </help>
-</tool>
--- a/tools/sr_mapping/mosaik.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-<?xml version="1.0"?>
-<tool id="mosaik_wrapper" name="Map with Mosaik" version="1.1.1">
-  <description/>
-  <requirements><requirement type="package">mosaik</requirement></requirements>
-  <command>
-    #set $processors = '-p 4'
-    #set $lm = ''
-    #if $paired.kind == 'single':
-        #set $mfl = ''
-        #set $ls  = ''
-    #else:
-        #set $ls = '-ls $mfl'
-    #end if
-    MosaikBuild -fr
-    #if $genomeSource.refGenomeSource == 'indexed':
-        ${ filter( lambda x: str( x[0] ) == str( $genomeSource.indexReference ), $__app__.tool_data_tables[ 'mosaik_indexes' ].get_fields() )[0][-1] }
-    #else:
-        $genomeSource.historyReference
-    #end if
-        -oa mosaik_ref_file;
-    MosaikBuild  -q $reads $mfl -st $st -out mosaik_reads_file;
-    MosaikAligner -ia mosaik_ref_file -in mosaik_reads_file -out mosaik_aligned_file $ls -mm $mm -mhp $mhp -act $act -bw $bw $processors $lm -hs 15;
-    MosaikText -in mosaik_aligned_file -$outFormat sam_bam_file;
-    #if str($outFormat) == 'bam':
-        samtools sort sam_bam_file sorted_bam;
-        mv sorted_bam.bam $output
-    #else:
-        gunzip sam_bam_file.gz;
-        mv sam_bam_file $output
-    #end if
-  </command>
-  <inputs>
-    <conditional name="genomeSource">
-      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="indexReference" type="select" label="Select a reference genome">
-          <options from_data_table="mosaik_indexes">
-            <filter type="sort_by" column="2"/>
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param format="fasta" name="historyReference" type="data" metadata_name="dbkey" label="Select a reference from history"/>
-      </when>
-    </conditional>
-    <param format="fastq" name="reads" type="data" label="Fastq Reads File"/>
-    <param name="outFormat" type="select" label="Output Format">
-      <option value="sam">Sam</option>
-      <option value="bam">Bam</option>
-    </param>
-    <param name="st" type="select" label="Sequencing Technology Used">
-      <option value="454">454</option>
-      <option value="illumina">Illumina</option>
-      <option value="solid">Solid</option>
-      <option value="sanger">Sanger</option>
-      <option value="helicos">Helicos</option>
-    </param>
-    <conditional name="paired">
-      <param name="kind" type="select" label="Is this library mate-paired?">
-        <option value="single">Single-end</option>
-        <option value="paired">Paired-end</option>
-      </param>
-      <when value="single"/>
-      <when value="paired">
-        <param name="mfl" type="integer" value="200" label="Insert Size" help="the length between the paired reads"/>
-        <param name="ls" type="integer" value="50" label="Realignment Window" help="Window size to realign mate pairs that are out of position. Large values slow down performance"/>
-      </when>
-    </conditional>
-    <param name="mm" size="5" type="integer" value="6" label="Mismatches allowed" help="mismatches allowed per sequence"/>
-    <param name="act" size="5" type="integer" value="35" label="Alignment Candidate Threshold" help="determines which hash regions will be aligned with Smith Waterman"/>
-    <param name="bw" size="5" type="integer" value="19" label="Smith-Waterman band width"/>
-    <param name="mhp" size="5" type="integer" value="100" label="Maximum # Of Positions Stored Per Seed" help="number of places in the reference the aligner will try to place a particular hash"/>
-  </inputs>
-  <outputs>
-    <data format="sam" name="output">
-      <change_format>
-        <when input="outFormat" value="bam" format="bam" />
-      </change_format>
-      <actions>
-        <conditional name="genomeSource.refGenomeSource">
-          <when value="indexed">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="mosaik_indexes" column="1">
-                <filter type="param_value" column="0" value="#" compare="startswith" keep="False" />
-                <filter type="param_value" ref="genomeSource.indexReference" column="0" />
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="genomeSource.historyReference" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-   </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="refGenomeSource" value="history"/>
-      <param name="historyReference" ftype="fasta" value="mosaik_test_ref.fasta"/>
-      <param name="reads" ftype="fastq" value="mosaik_test_input.fastq"/>
-      <param name="outFormat" value="sam"/>
-      <param name="st" value="454"/>
-      <param name="kind" value="single"/>
-      <param name="mm" value="6"/>
-      <param name="act" value="35"/>
-      <param name="bw" value="19"/>
-      <param name="mhp" value="100"/>
-      <output name="output" file="mosaik_test_out.sam" compare="sim_size" delta="0"/>
-    </test>
-  </tests>
-  <help>
-This tool uses Mosaik to align reads to a reference sequence.
-  </help>
-</tool>
--- a/tools/sr_mapping/srma_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,195 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs SRMA on a SAM/BAM file;
-TODO: more documentation
-
-usage: srma_wrapper.py [options]
-
-See below for options
-"""
-
-import optparse, os, shutil, subprocess, sys, tempfile
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def parseRefLoc( refLoc, refUID ):
-    for line in open( refLoc ):
-        if not line.startswith( '#' ):
-            fields = line.strip().split( '\t' )
-            if len( fields ) >= 3:
-                if fields[0] == refUID:
-                    return fields[1]
-    return None
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to index and use' )
-    parser.add_option( '-u', '--refUID', dest='refUID', help='The pre-index reference genome unique Identifier' )
-    parser.add_option( '-L', '--refLocations', dest='refLocations', help='The filepath to the srma indices location file' )
-    parser.add_option( '-i', '--input', dest='input', help='The SAM/BAM input file' )
-    parser.add_option( '-I', '--inputIndex', dest='inputIndex', help='The SAM/BAM input index file' )
-    parser.add_option( '-o', '--output', dest='output', help='The SAM/BAM output file' )
-    parser.add_option( '-O', '--offset', dest='offset', help='The alignment offset' )
-    parser.add_option( '-Q', '--minMappingQuality', dest='minMappingQuality', help='The minimum mapping quality' )
-    parser.add_option( '-P', '--minAlleleProbability', dest='minAlleleProbability', help='The minimum allele probability conditioned on coverage (for the binomial quantile).' )
-    parser.add_option( '-C', '--minAlleleCoverage', dest='minAlleleCoverage', help='The minimum haploid coverage for the consensus' )
-    parser.add_option( '-R', '--range', dest='range', help='A range to examine' )
-    parser.add_option( '-c', '--correctBases', dest='correctBases', help='Correct bases ' )
-    parser.add_option( '-q', '--useSequenceQualities', dest='useSequenceQualities', help='Use sequence qualities ' )
-    parser.add_option( '-M', '--maxHeapSize', dest='maxHeapSize', help='The maximum number of nodes on the heap before re-alignment is ignored' )
-    parser.add_option( '-s', '--fileSource', dest='fileSource', help='Whether to use a previously indexed reference sequence or one from history (indexed or history)' )
-    parser.add_option( '-p', '--params', dest='params', help='Parameter setting to use (pre_set or full)' )
-    parser.add_option( '-j', '--jarBin', dest='jarBin', default='', help='The path to where jars are stored' )
-    parser.add_option( '-f', '--jarFile', dest='jarFile', help='The file name of the jar file to use')
-    (options, args) = parser.parse_args()
-
-    # make temp directory for srma
-    tmp_dir = tempfile.mkdtemp()
-    buffsize = 1048576
-
-    # set up reference filenames
-    reference_filepath_name = None
-    # need to create SRMA dict and Samtools fai files for custom genome
-    if options.fileSource == 'history':
-        try:
-            reference_filepath = tempfile.NamedTemporaryFile( dir=tmp_dir, suffix='.fa' )
-            reference_filepath_name = reference_filepath.name
-            reference_filepath.close()
-            fai_filepath_name = '%s.fai' % reference_filepath_name
-            dict_filepath_name = reference_filepath_name.replace( '.fa', '.dict' )
-            os.symlink( options.ref, reference_filepath_name )
-            # create fai file using Samtools
-            index_fai_cmd = 'samtools faidx %s' % reference_filepath_name
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=index_fai_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                # clean up temp dir
-                if os.path.exists( tmp_dir ):
-                    shutil.rmtree( tmp_dir )
-                stop_err( 'Error creating Samtools index for custom genome file: %s\n' % str( e ) )
-            # create dict file using SRMA
-            dict_cmd = 'java -cp "%s" net.sf.picard.sam.CreateSequenceDictionary R=%s O=%s' % ( os.path.join( options.jarBin, options.jarFile ), reference_filepath_name, dict_filepath_name )
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=dict_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                # clean up temp dir
-                if os.path.exists( tmp_dir ):
-                    shutil.rmtree( tmp_dir )
-                stop_err( 'Error creating index for custom genome file: %s\n' % str( e ) )
-        except Exception, e:
-            # clean up temp dir
-            if os.path.exists( tmp_dir ):
-                shutil.rmtree( tmp_dir )
-            stop_err( 'Problem handling SRMA index (dict file) for custom genome file: %s\n' % str( e ) )
-    # using built-in dict/index files
-    else:
-        if options.ref:
-            reference_filepath_name = options.ref
-        else:
-            reference_filepath_name = parseRefLoc( options.refLocation, options.refUID )
-    if reference_filepath_name is None:
-        raise ValueError( 'A valid genome reference was not provided.' )
-
-    # set up aligning and generate aligning command options
-    if options.params == 'pre_set':
-        srma_cmds = ''
-    else:
-        if options.useSequenceQualities == 'true':
-            useSequenceQualities = 'true'
-        else:
-            useSequenceQualities = 'false'
-        ranges = 'null'
-        if options.range == 'None':
-            range = 'null'
-        else:
-            range = options.range
-        srma_cmds = "OFFSET=%s MIN_MAPQ=%s MINIMUM_ALLELE_PROBABILITY=%s MINIMUM_ALLELE_COVERAGE=%s RANGES=%s RANGE=%s CORRECT_BASES=%s USE_SEQUENCE_QUALITIES=%s MAX_HEAP_SIZE=%s" % ( options.offset, options.minMappingQuality, options.minAlleleProbability, options.minAlleleCoverage, ranges, range, options.correctBases, options.useSequenceQualities, options.maxHeapSize )
-
-    # perform alignments
-    buffsize = 1048576
-    try:
-        #symlink input bam and index files due to the naming conventions required by srma here
-        input_bam_filename = os.path.join( tmp_dir, '%s.bam' % os.path.split( options.input )[-1] )
-        os.symlink( options.input, input_bam_filename )
-        input_bai_filename = "%s.bai" % os.path.splitext( input_bam_filename )[0]
-        os.symlink( options.inputIndex, input_bai_filename )
-
-        #create a temp output name, ending in .bam due to required naming conventions? unkown if required
-        output_bam_filename = os.path.join( tmp_dir, "%s.bam" % os.path.split( options.output )[-1] )
-        # generate commandline
-        cmd = 'java -jar %s I=%s O=%s R=%s %s' % ( os.path.join( options.jarBin, options.jarFile ), input_bam_filename, output_bam_filename, reference_filepath_name, srma_cmds )
-
-        # need to nest try-except in try-finally to handle 2.4
-        try:
-            try:
-                tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                if returncode != 0:
-                    raise Exception, stderr
-            except Exception, e:
-                raise Exception, 'Error executing SRMA. ' + str( e )
-            # move file from temp location (with .bam name) to provided path
-            shutil.move( output_bam_filename, options.output )
-            # check that there are results in the output file
-            if os.path.getsize( options.output ) <= 0:
-                raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
-        except Exception, e:
-            stop_err( 'The re-alignment failed.\n' + str( e ) )
-    finally:
-        # clean up temp dir
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-
-if __name__=="__main__": __main__()
--- a/tools/sr_mapping/srma_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,218 +0,0 @@
-<tool id="srma_wrapper" name="Re-align with SRMA" version="0.2.5">
-  <description></description>
-  <command interpreter="python">srma_wrapper.py
-    #if $refGenomeSource.refGenomeSource_type == "history":
-      --ref=$refGenomeSource.ownFile
-    #else:
-      --ref="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.ref ), $__app__.tool_data_tables[ 'srma_indexes' ].get_fields() )[0][-1] }"
-      --refUID=$refGenomeSource.ref
-      --refLocations=${GALAXY_DATA_INDEX_DIR}/srma_index.loc
-    #end if
-    --input=$input
-    --inputIndex=${input.metadata.bam_index}
-    --output=$output
-    --params=$params.source_select
-    --fileSource=$refGenomeSource.refGenomeSource_type
-    --jarBin="${GALAXY_DATA_INDEX_DIR}/shared/jars"
-    #if $params.source_select == "full":
-      --offset=$params.offset
-      --minMappingQuality=$params.minMappingQuality
-      --minAlleleProbability=$params.minAlleleProbability
-      --minAlleleCoverage=$params.minAlleleCoverage
-      --range=$params.range
-      --correctBases=$params.correctBases
-      --useSequenceQualities=$params.useSequenceQualities
-      --maxHeapSize=$params.maxHeapSize
-    #end if
-    --jarFile="srma.jar"
-  </command>
-  <inputs>
-    <conditional name="refGenomeSource">
-      <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in reference?">
-        <option value="built-in">Use a built-in reference</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="built-in">
-        <param name="ref" type="select" label="Select a reference genome">
-          <options from_data_table="srma_indexes">
-            <filter type="sort_by" column="2" />
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
-      </when>
-    </conditional>
-    <param name="input" type="data" format="bam" label="Input BAM file" help="The input BAM file to re-align"/>
-    <conditional name="params">
-      <param name="source_select" type="select" label="SRMA settings to use" help="For most re-alignment needs, use Commonly Used settings. If you want full control use Full Parameter List">
-        <option value="pre_set">Commonly Used</option>
-        <option value="full">Full Parameter List</option>
-      </param>
-      <when value="pre_set" />
-      <when value="full">
-        <param name="offset" type="integer" value="20" label="Offset" help="The alignment offset" />
-        <param name="minMappingQuality" type="integer" value="0" label="Minimum mapping quality" help="The minimum mapping quality" />
-        <param name="minAlleleProbability" type="float" value="0.1" label="Minimum allele probability" help="The minimum allele probability conditioned on coverage (for the binomial quantile)." />
-        <param name="minAlleleCoverage" type="integer" value="2" label="Minimum allele coverage" help="The minimum haploid coverage for the consensus. Default value: 3. This option can be set " />
-        <param name="range" type="text" value="null" label="Range" help="A range to examine" />
-        <param name="correctBases" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Correct bases" help="Correct bases " />
-        <param name="useSequenceQualities" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Use sequence qualities" help="Use sequence qualities " />
-        <param name="maxHeapSize" type="integer" value="8192" label="Maximum heap size" help="The maximum number of nodes on the heap before re-alignment is ignored" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="bam" name="output" label="${tool.name} on ${on_string}: re-aligned reads">
-      <actions>
-        <conditional name="refGenomeSource.refGenomeSource_type">
-          <when value="built-in">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="srma_indexes" column="1" offset="0">
-                <filter type="param_value" column="0" value="#" compare="startswith" keep="False" />
-                <filter type="param_value" ref="refGenomeSource.ref" column="0" />
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-    </data>
-  </outputs>
-  <tests>
-      <test>
-          <!-- Commands to run to prepare test files (uses built-in index)
-          Prepare bam index file:
-          samtools index srma_in1.bam
-          Run SRMA:
-          java -jar srma.jar I=srma_in1.bam O=srma_out1.bam R=/afs/bx.psu.edu/depot/data/genome/hg18/srma_index/chr21.fa
-          To create the bam file first, start with a sam file (srma_in1.sam) generated with a run using the chr21 fasta file and which contains the header. Run before samtools index:
-          samtools view -bt /afs/bx.psu.edu/depot/data/genome/hg18/sam_index/chr21.fa -o srma_in1.u.bam srma_in1.sam
-          samtools sort srma_in1.u.bam srma_in1
-          -->
-          <param name="refGenomeSource_type" value="built-in" />
-          <param name="ref" value="hg18chr21" />
-          <param name="input" value="srma_in1.bam" type="bam" />
-          <param name="source_select" value="pre_set" />
-          <output name="output" file="srma_out1.bam" ftype="bam" lines_diff="2" /><!-- allows tag with version number to be different -->
-      </test>
-      <test>
-          <!-- Commands to run to prepare test files (uses custom genome):
-          Prepare custom dict/index files:
-          samtools faidx srma_in2.fa
-          java -cp srma.jar net.sf.picard.sam.CreateSequenceDictionary R=srma_in2.fa O=srma_in2.dict
-          Prepare bam index file:
-          samtools index srma_in3.bam
-          Run SRMA:
-          java -jar "srma.jar" I=srma_in3.bam O=srma_out2.bam R=srma_in2.fa OFFSET=20 MIN_MAPQ=0 MINIMUM_ALLELE_PROBABILITY=0.1 MINIMUM_ALLELE_COVERAGE=2 RANGES=null RANGE=null CORRECT_BASES=true USE_SEQUENCE_QUALITIES=true MAX_HEAP_SIZE=8192
-          To create the bam file first, the sam file needs to have been run with the same reference file (srma_in2.fa) and have the header present. For instance:
-          samtools view -bT srma_in2.fa -o srma_in3.u.bam srma_in3.sam
-          samtools sort srma_in3.u.bam srma_in3
-          -->
-          <param name="refGenomeSource_type" value="history" />
-          <param name="ownFile" value="srma_in2.fa" ftype="fasta" />
-          <param name="input" value="srma_in3.bam" ftype="bam" />
-          <param name="source_select" value="full" />
-          <param name="offset" value="20" />
-          <param name="minMappingQuality" value="0" />
-          <param name="minAlleleProbability" value="0.1" />
-          <param name="minAlleleCoverage" value="2" />
-          <param name="range" value="null" />
-          <param name="correctBases" value="true" />
-          <param name="useSequenceQualities" value="true" />
-          <param name="maxHeapSize" value="8192" />
-          <output name="output" file="srma_out2.bam" ftype="bam" lines_diff="2" /><!-- allows tag with version number to be different -->
-      </test>
-  </tests>
-  <help>
-**What it does**
-
-SRMA is a short read micro re-aligner for next-generation high throughput sequencing data.
-
-Sequence alignment algorithms examine each read independently. When indels occur towards the ends of reads, the alignment can lead to false SNPs as well as improperly placed indels. This tool aims to perform a re-alignment of each read to a graphical representation of all alignments within a local region to provide a better overall base-resolution consensus.
-
-Currently this tool works well with and has been tested on 30x diploid coverage genome sequencing data from Illumina and ABI SOLiD technology. This tool may not work well with 454 data, as indels are a significant error mode for 454 data.
-
-------
-
-Please cite the website "http://srma.sourceforge.net" as well as:
-
-Homer N, and Nelson SF.  SRMA: short read micro re-aligner. 2010.
-
-------
-
-**Know what you are doing**
-
-.. class:: warningmark
-
-There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
-
-.. __: http://srma.sourceforge.net/
-
-------
-
-**Input formats**
-
-SRMA accepts a BAM input file. Note that this file should have been generated from a SAM file which contains the header.
-
-------
-
-**Outputs**
-
-The output is in BAM format, see http://samtools.sourceforge.net for more details.
-
--------
-
-**SRMA settings**
-
-All of the options have a default value. You can change any of them. Most of the options in SRMA have been implemented here.
-
-------
-
-**SRMA parameter list**
-
-This is an exhaustive list of SRMA options:
-
-For **SRMA**::
-
-  INPUT=File
-  I=File                        The input SAM or BAM file. Required.
-
-  OUTPUT=File
-  O=File                        The output SAM or BAM file. Default value: null.
-
-  REFERENCE=File
-  R=File                        The reference FASTA file. Required.
-
-  OFFSET=Integer                The alignment offset. Default value: 20. This option can be set to 'null' to clear the
-                                default value.
-
-  MIN_MAPQ=Integer              The minimum mapping quality. Default value: 0. This option can be set to 'null' to clear
-                                the default value.
-
-  MINIMUM_ALLELE_PROBABILITY=Double
-                                The minimum allele probability conditioned on coverage (for the binomial quantile).
-                                Default value: 0.1. This option can be set to 'null' to clear the default value.
-
-  MINIMUM_ALLELE_COVERAGE=Integer
-                                The minimum haploid coverage for the consensus. Default value: 3. This option can be set
-                                to 'null' to clear the default value.
-
-  RANGE=String                  A range to examine. Default value: null.
-
-  CORRECT_BASES=Boolean         Correct bases. Default value: false. This option can be set to 'null' to clear the
-                                default value. Possible values: {true, false}
-
-  USE_SEQUENCE_QUALITIES=BooleanUse sequence qualities Default value: true. This option can be set to 'null' to clear the
-                                default value. Possible values: {true, false}
-
-  MAX_HEAP_SIZE=Integer         The maximum number of nodes on the heap before re-alignment is ignored Default value:
-                                8192. This option can be set to 'null' to clear the default value.
-
-  </help>
-</tool>
--- a/tools/stats/aggregate_binned_scores_in_intervals.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-<tool id="aggregate_scores_in_intervals2" description="such as phastCons, GERP, binCons, and others for a set of genomic intervals" name="Aggregate datapoints" version="1.1.3">
-  <description>Appends the average, min, max of datapoints per interval</description>
-  <command interpreter="python">
-    #if $score_source_type.score_source == "user" #aggregate_scores_in_intervals.py $score_source_type.input2 $input1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $out_file1 --chrom_buffer=3
-    #else                                         #aggregate_scores_in_intervals.py $score_source_type.datasets $input1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $out_file1 -b
-    #end if#
-  </command>
-  <inputs>
-    <param format="interval" name="input1" type="data" label="Interval file"/>
-    <conditional name="score_source_type">
-      <param name="score_source" type="select" label="Score Source">
-        <option value="cached" selected="true">Locally Cached Scores</option>
-        <option value="user">Scores in Your History</option>
-      </param>
-      <when value="cached">
-        <param name="datasets" type="select" label="Available datasets" display="radio">
-          <options from_file="binned_scores.loc">
-            <column name="name" index="1"/>
-            <column name="value" index="2"/>
-            <column name="dbkey" index="0"/>
-            <filter type="data_meta" ref="input1" key="dbkey" column="0" />
-          </options>
-        </param>
-      </when>
-      <when value="user">
-        <param format="wig" name="input2" type="data" label="Score file">
-          <options>
-            <filter type="data_meta" ref="input1" key="dbkey" />
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_file1" metadata_source="input1"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="6.bed" dbkey="hg17" ftype="bed"/>
-      <param name="score_source" value="cached"/>
-      <param name="datasets" value="/galaxy/data/binned_scores/hg17/phastcons_encode_sep2005_tba" />
-      <output name="out_file1" file="aggregate_binned_scores_in_intervals.out" />
-    </test>
-    <test>
-      <param name="input1" value="9_hg18.bed" dbkey="hg18" ftype="bed"/>
-      <param name="score_source" value="cached"/>
-      <param name="datasets" value="/galaxy/data/binned_scores/hg18/phastCons17way/ba" />
-      <output name="out_file1" file="aggregate_binned_scores_in_intervals2.interval" />
-    </test>
-    <test>
-      <param name="input1" value="6.bed" dbkey="hg17" ftype="bed"/>
-      <param name="score_source" value="user"/>
-      <param name="input2" value="aggregate_binned_scores_3.wig" dbkey="hg17" ftype="wig"/>
-      <output name="out_file1" file="aggregate_binned_scores_in_intervals3.out"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-This tool currently only has cached data for genome builds hg16, hg17 and hg18. However, you may use your own data point (wiggle) data, such as those available from UCSC. If you are trying to use your own data point file and it is not appearing as an option, make sure that the builds for your history items are the same.
-
-.. class:: warningmark
-
-This tool assumes that the input dataset is in interval format and contains at least a chrom column, a start column and an end column.  These 3 columns can be dispersed throughout any number of other data columns.
-
------
-
-.. class:: infomark
-
-**TIP:** Computing summary information may throw exceptions if the data type (e.g., string, integer) in every line of the columns is not appropriate for the computation (e.g., attempting numerical calculations on strings).  If an exception is thrown when computing summary information for a line, that line is skipped as invalid for the computation.  The number of invalid skipped lines is documented in the resulting history item as a "Data issue".
-
------
-
-**Syntax**
-
-This tool appends columns of summary information for each interval matched against a selected dataset.  For each interval, the average, minimum and maximum for the data falling within the interval is computed.
-
-- Several quantitative scores are provided for the ENCODE regions.
-
-  - Various Scores
-      - Regulatory Potential
-      - Neutral rate (Ancestral Repeats)
-      - GC fraction
-  - Conservation Scores
-      - PhastCons
-      - binCons
-      - GERP
-
------
-
-**Example**
-
-If your original data has the following format:
-
-+------+-----+-----+---+------+
-|other1|chrom|start|end|other2|
-+------+-----+-----+---+------+
-
-and you choose to aggregate phastCons scores, your output will look like this:
-
-+------+-----+-----+---+------+---+---+---+
-|other1|chrom|start|end|other2|avg|min|max|
-+------+-----+-----+---+------+---+---+---+
-
-where:
-
-* **avg** - average phastCons score for each region
-* **min** - minimum phastCons score for each region
-* **max** - maximum phastCons score for each region
-
-  </help>
-</tool>
--- a/tools/stats/aggregate_scores_in_intervals.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,243 +0,0 @@
-#!/usr/bin/env python
-# Greg Von Kuster
-"""
-usage: %prog score_file interval_file chrom start stop [out_file] [options]
-    -b, --binned: 'score_file' is actually a directory of binned array files
-    -m, --mask=FILE: bed file containing regions not to consider valid
-    -c, --chrom_buffer=INT: number of chromosomes (default is 3) to keep in memory when using a user supplied score file
-"""
-
-from __future__ import division
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-pkg_resources.require( "lrucache" )
-try:
-    pkg_resources.require( "python-lzo" )
-except:
-    pass
-
-import psyco_full
-import sys
-import os, os.path
-from UserDict import DictMixin
-import bx.wiggle
-from bx.binned_array import BinnedArray, FileBinnedArray
-from bx.bitset import *
-from bx.bitset_builders import *
-from fpconst import isNaN
-from bx.cookbook import doc_optparse
-from galaxy.tools.exception_handling import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-import tempfile, struct
-class PositionalScoresOnDisk:
-    fmt = 'f'
-    fmt_size = struct.calcsize( fmt )
-    default_value = float( 'nan' )
-
-    def __init__( self ):
-        self.file = tempfile.TemporaryFile( 'w+b' )
-        self.length = 0
-    def __getitem__( self, i ):
-        if i < 0: i = self.length + i
-        if i < 0 or i >= self.length: return self.default_value
-        try:
-            self.file.seek( i * self.fmt_size )
-            return struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
-        except Exception, e:
-            raise IndexError, e
-    def __setitem__( self, i, value ):
-        if i < 0: i = self.length + i
-        if i < 0: raise IndexError, 'Negative assignment index out of range'
-        if i >= self.length:
-            self.file.seek( self.length * self.fmt_size )
-            self.file.write( struct.pack( self.fmt, self.default_value ) * ( i - self.length ) )
-            self.length = i + 1
-        self.file.seek( i * self.fmt_size )
-        self.file.write( struct.pack( self.fmt, value ) )
-    def __len__( self ):
-        return self.length
-    def __repr__( self ):
-        i = 0
-        repr = "[ "
-        for i in xrange( self.length ):
-            repr = "%s %s," % ( repr, self[i] )
-        return "%s ]" % ( repr )
-
-class FileBinnedArrayDir( DictMixin ):
-    """
-    Adapter that makes a directory of FileBinnedArray files look like
-    a regular dict of BinnedArray objects.
-    """
-    def __init__( self, dir ):
-        self.dir = dir
-        self.cache = dict()
-    def __getitem__( self, key ):
-        value = None
-        if key in self.cache:
-            value = self.cache[key]
-        else:
-            fname = os.path.join( self.dir, "%s.ba" % key )
-            if os.path.exists( fname ):
-                value = FileBinnedArray( open( fname ) )
-                self.cache[key] = value
-        if value is None:
-            raise KeyError( "File does not exist: " + fname )
-        return value
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def load_scores_wiggle( fname, chrom_buffer_size = 3 ):
-    """
-    Read a wiggle file and return a dict of BinnedArray objects keyed
-    by chromosome.
-    """
-    scores_by_chrom = dict()
-    try:
-        for chrom, pos, val in bx.wiggle.Reader( UCSCOutWrapper( open( fname ) ) ):
-            if chrom not in scores_by_chrom:
-                if chrom_buffer_size:
-                    scores_by_chrom[chrom] = BinnedArray()
-                    chrom_buffer_size -= 1
-                else:
-                    scores_by_chrom[chrom] = PositionalScoresOnDisk()
-            scores_by_chrom[chrom][pos] = val
-    except UCSCLimitException:
-        # Wiggle data was truncated, at the very least need to warn the user.
-        print 'Encountered message from UCSC: "Reached output limit of 100000 data values", so be aware your data was truncated.'
-    except IndexError:
-        stop_err('Data error: one or more column data values is missing in "%s"' %fname)
-    except ValueError:
-        stop_err('Data error: invalid data type for one or more values in "%s".' %fname)
-    return scores_by_chrom
-
-def load_scores_ba_dir( dir ):
-    """
-    Return a dict-like object (keyed by chromosome) that returns
-    FileBinnedArray objects created from "key.ba" files in `dir`
-    """
-    return FileBinnedArrayDir( dir )
-
-def main():
-
-    # Parse command line
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        score_fname = args[0]
-        interval_fname = args[1]
-        chrom_col = args[2]
-        start_col = args[3]
-        stop_col = args[4]
-        if len( args ) > 5:
-            out_file = open( args[5], 'w' )
-        else:
-            out_file = sys.stdout
-        binned = bool( options.binned )
-        mask_fname = options.mask
-    except:
-        doc_optparse.exit()
-
-    if score_fname == 'None':
-        stop_err( 'This tool works with data from genome builds hg16, hg17 or hg18.  Click the pencil icon in your history item to set the genome build if appropriate.' )
-
-    try:
-        chrom_col = int(chrom_col) - 1
-        start_col = int(start_col) - 1
-        stop_col = int(stop_col) - 1
-    except:
-        stop_err( 'Chrom, start & end column not properly set, click the pencil icon in your history item to set these values.' )
-
-    if chrom_col < 0 or start_col < 0 or stop_col < 0:
-        stop_err( 'Chrom, start & end column not properly set, click the pencil icon in your history item to set these values.' )
-
-    if binned:
-        scores_by_chrom = load_scores_ba_dir( score_fname )
-    else:
-        try:
-            chrom_buffer = int( options.chrom_buffer )
-        except:
-            chrom_buffer = 3
-        scores_by_chrom = load_scores_wiggle( score_fname, chrom_buffer )
-
-    if mask_fname:
-        masks = binned_bitsets_from_file( open( mask_fname ) )
-    else:
-        masks = None
-
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_line = ''
-
-    for i, line in enumerate( open( interval_fname )):
-        valid = True
-        line = line.rstrip('\r\n')
-        if line and not line.startswith( '#' ):
-            fields = line.split()
-
-            try:
-                chrom, start, stop = fields[chrom_col], int( fields[start_col] ), int( fields[stop_col] )
-            except:
-                valid = False
-                skipped_lines += 1
-                if not invalid_line:
-                    first_invalid_line = i + 1
-                    invalid_line = line
-            if valid:
-                total = 0
-                count = 0
-                min_score = 100000000
-                max_score = -100000000
-                for j in range( start, stop ):
-                    if chrom in scores_by_chrom:
-                        try:
-                            # Skip if base is masked
-                            if masks and chrom in masks:
-                                if masks[chrom][j]:
-                                    continue
-                            # Get the score, only count if not 'nan'
-                            score = scores_by_chrom[chrom][j]
-                            if not isNaN( score ):
-                                total += score
-                                count += 1
-                                max_score = max( score, max_score )
-                                min_score = min( score, min_score )
-                        except:
-                            continue
-                if count > 0:
-                    avg = total/count
-                else:
-                    avg = "nan"
-                    min_score = "nan"
-                    max_score = "nan"
-
-                # Build the resulting line of data
-                out_line = []
-                for k in range(0, len(fields)):
-                    out_line.append(fields[k])
-                out_line.append(avg)
-                out_line.append(min_score)
-                out_line.append(max_score)
-
-                print >> out_file, "\t".join( map( str, out_line ) )
-            else:
-                skipped_lines += 1
-                if not invalid_line:
-                    first_invalid_line = i + 1
-                    invalid_line = line
-        elif line.startswith( '#' ):
-            # We'll save the original comments
-            print >> out_file, line
-
-    out_file.close()
-
-    if skipped_lines > 0:
-        print 'Data issue: skipped %d invalid lines starting at line #%d which is "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
-        if skipped_lines == i:
-            print 'Consider changing the metadata for the input dataset by clicking on the pencil icon in the history item.'
-
-if __name__ == "__main__": main()
--- a/tools/stats/column_maker.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-#!/usr/bin/env python
-# This tool takes a tab-delimited textfile as input and creates another column in the file which is the result of
-# a computation performed on every row in the original file.  The tool will skip over invalid lines within the file,
-# informing the user about the number of lines skipped.
-import sys, re, os.path
-from galaxy import eggs
-from galaxy.tools import validation
-from galaxy.datatypes import metadata
-from math import log,exp,sqrt,ceil,floor
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-inp_file = sys.argv[1]
-out_file = sys.argv[2]
-expr = sys.argv[3]
-round_result = sys.argv[4]
-try:
-    in_columns = int( sys.argv[5] )
-except:
-    stop_err( "Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
-if in_columns < 2:
-    # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
-    stop_err( "Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
-try:
-    in_column_types = sys.argv[6].split( ',' )
-except:
-    stop_err( "Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
-if len( in_column_types ) != in_columns:
-    stop_err( "The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
-
-# Unescape if input has been escaped
-mapped_str = {
-    '__lt__': '<',
-    '__le__': '<=',
-    '__eq__': '==',
-    '__ne__': '!=',
-    '__gt__': '>',
-    '__ge__': '>=',
-    '__sq__': '\'',
-    '__dq__': '"',
-}
-for key, value in mapped_str.items():
-    expr = expr.replace( key, value )
-
-# Prepare the column variable names and wrappers for column data types
-cols, type_casts = [], []
-for col in range( 1, in_columns + 1 ):
-    col_name = "c%d" % col
-    cols.append( col_name )
-    col_type = in_column_types[ col - 1 ].strip()
-    if round_result == 'no' and col_type == 'int':
-        col_type = 'float'
-    type_cast = "%s(%s)" % ( col_type, col_name )
-    type_casts.append( type_cast )
-
-col_str = ', '.join( cols )    # 'c1, c2, c3, c4'
-type_cast_str = ', '.join( type_casts )  # 'str(c1), int(c2), int(c3), str(c4)'
-assign = "%s = line.split( '\\t' )" % col_str
-wrap = "%s = %s" % ( col_str, type_cast_str )
-skipped_lines = 0
-first_invalid_line = 0
-invalid_line = None
-lines_kept = 0
-total_lines = 0
-out = open( out_file, 'wt' )
-
-# Read input file, skipping invalid lines, and perform computation that will result in a new column
-code = '''
-for i, line in enumerate( file( inp_file ) ):
-    total_lines += 1
-    line = line.rstrip( '\\r\\n' )
-    if not line or line.startswith( '#' ):
-        skipped_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-        continue
-    try:
-        %s
-        %s
-        new_val = %s
-        if round_result == "yes":
-            new_val = int( round( new_val ) )
-        new_line = line + '\\t' + str( new_val )
-        print >> out, new_line
-        lines_kept += 1
-    except:
-        skipped_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-''' % ( assign, wrap, expr )
-
-valid_expr = True
-try:
-    exec code
-except Exception, e:
-    out.close()
-    if str( e ).startswith( 'invalid syntax' ):
-        valid_expr = False
-        stop_err( 'Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr )
-    else:
-        stop_err( str( e ) )
-
-if valid_expr:
-    out.close()
-    valid_lines = total_lines - skipped_lines
-    print 'Creating column %d with expression %s' % ( in_columns + 1, expr )
-    if valid_lines > 0:
-        print 'kept %4.2f%% of %d lines.' % ( 100.0*lines_kept/valid_lines, total_lines )
-    else:
-        print 'Possible invalid expression "%s" or non-existent column referenced. See tool tips, syntax and examples.' % expr
-    if skipped_lines > 0:
-        print 'Skipped %d invalid lines starting at line #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
--- a/tools/stats/column_maker.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-<tool id="Add_a_column1" name="Compute" version="1.1.0">
-  <description>an expression on every row</description>
-  <command interpreter="python">
-    column_maker.py $input $out_file1 "$cond" $round ${input.metadata.columns} "${input.metadata.column_types}"
-  </command>
-  <inputs>
-    <param name="cond" size="40" type="text" value="c3-c2" label="Add expression"/>
-    <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>
-    <param name="round" type="select" label="Round result?">
-      <option value="no">NO</option>
-      <option value="yes">YES</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="cond" value="c3-c2"/>
-      <param name="input" value="1.bed"/>
-      <param name="round" value="no"/>
-      <output name="out_file1" file="column_maker_out1.interval"/>
-    </test>
-    <test>
-      <param name="cond" value="c4*1"/>
-      <param name="input" value="1.interval"/>
-      <param name="round" value="no"/>
-      <output name="out_file1" file="column_maker_out2.interval"/>
-    </test>
-    <test>
-      <param name="cond" value="c4*1"/>
-      <param name="input" value="1.interval"/>
-      <param name="round" value="yes"/>
-      <output name="out_file1" file="column_maker_out3.interval"/>
-    </test>
-  </tests>
-  <help>
-
- .. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**What it does**
-
-This tool computes an expression for every row of a dataset and appends the result as a new column (field).
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
-
-- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position
-
------
-
-**Example**
-
-If this is your input::
-
-   chr1  151077881  151077918  2  200  -
-   chr1  151081985  151082078  3  500  +
-
-computing "c4*c5" will produce::
-
-   chr1  151077881  151077918  2  200  -   400.0
-   chr1  151081985  151082078  3  500  +  1500.0
-
-if, at the same time, "Round result?" is set to **YES** results will look like this::
-
-   chr1  151077881  151077918  2  200  -   400
-   chr1  151081985  151082078  3  500  +  1500
-
-You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::
-
-   chr1  151077881  151077918  2  200  -  True
-   chr1  151081985  151082078  3  500  +  True
-
-or computing "type(c2)==type('') for Input will return::
-
-   chr1  151077881  151077918  2  200  -  False
-   chr1  151081985  151082078  3  500  +  False
-
-</help>
-</tool>
--- a/tools/stats/cor.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-#Greg Von Kuster
-"""
-Calculate correlations between numeric columns in a tab delim file.
-usage: %prog infile output.txt columns method
-"""
-
-import sys
-from rpy import *
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-    method = sys.argv[4]
-    assert method in ( "pearson", "kendall", "spearman" )
-
-    try:
-        columns = map( int, sys.argv[3].split( ',' ) )
-    except:
-        stop_err( "Problem determining columns, perhaps your query does not contain a column of numerical data." )
-
-    matrix = []
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_value = ''
-    invalid_column = 0
-
-    for i, line in enumerate( file( sys.argv[1] ) ):
-        valid = True
-        line = line.rstrip('\n\r')
-
-        if line and not line.startswith( '#' ):
-            # Extract values and convert to floats
-            row = []
-            for column in columns:
-                column -= 1
-                fields = line.split( "\t" )
-                if len( fields ) <= column:
-                    valid = False
-                else:
-                    val = fields[column]
-                    if val.lower() == "na":
-                        row.append( float( "nan" ) )
-                    else:
-                        try:
-                            row.append( float( fields[column] ) )
-                        except:
-                            valid = False
-                            skipped_lines += 1
-                            if not first_invalid_line:
-                                first_invalid_line = i+1
-                                invalid_value = fields[column]
-                                invalid_column = column+1
-        else:
-            valid = False
-            skipped_lines += 1
-            if not first_invalid_line:
-                first_invalid_line = i+1
-
-        if valid:
-            matrix.append( row )
-
-    if skipped_lines < i:
-        try:
-            out = open( sys.argv[2], "w" )
-        except:
-            stop_err( "Unable to open output file" )
-
-        # Run correlation
-        try:
-            value = r.cor( array( matrix ), use="pairwise.complete.obs", method=method )
-        except Exception, exc:
-            out.close()
-            stop_err("%s" %str( exc ))
-        for row in value:
-            print >> out, "\t".join( map( str, row ) )
-        out.close()
-
-    if skipped_lines > 0:
-        msg = "..Skipped %d lines starting with line #%d. " %( skipped_lines, first_invalid_line )
-        if invalid_value and invalid_column > 0:
-            msg += "Value '%s' in column %d is not numeric." % ( invalid_value, invalid_column )
-        print msg
-
-if __name__ == "__main__":
-    main()
--- a/tools/stats/cor.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-<tool id="cor2" name="Correlation">
-  <description>for numeric columns</description>
-  <command interpreter="python">cor.py $input1 $out_file1 $numeric_columns $method</command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Dataset" help="Dataset missing? See TIP below"/>
-    <param name="numeric_columns" label="Numerical columns" type="data_column" numerical="True" multiple="True" data_ref="input1" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
-    <param name="method" type="select" label="Method">
-      <option value="pearson">Pearson</option>
-      <option value="kendall">Kendall rank</option>
-      <option value="spearman">Spearman rank</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <!--
-    Test a tabular input with the first line being a comment without a # character to start
-    -->
-    <test>
-      <param name="input1" value="cor.tabular" />
-      <param name="numeric_columns" value="2,3" />
-      <param name="method" value="pearson" />
-      <output name="out_file1" file="cor_out.txt" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
-.. class:: warningmark
-
-Missing data ("nan") removed from each pairwise comparison
-
------
-
-**Syntax**
-
-This tool computes the matrix of correlation coefficients between numeric columns.
-
-- All invalid, blank and comment lines are skipped when performing computations.  The number of skipped lines is displayed in the resulting history item.
-
-- **Pearson's Correlation** reflects the degree of linear relationship between two variables. It ranges from +1 to -1. A correlation of +1 means that there is a perfect positive linear relationship between variables. The formula for Pearson's correlation is:
-
-    .. image:: ./static/images/pearson.png
-
-    where n is the number of items
-
-- **Kendall's rank correlation** is used to measure the degree of correspondence between two rankings and assessing the significance of this correspondence. The formula for Kendall's rank correlation is:
-
-    .. image:: ./static/images/kendall.png
-
-    where n is the number of items, and P is the sum.
-
-- **Spearman's rank correlation** assesses how well an arbitrary monotonic function could describe the relationship between two variables, without making any assumptions about the frequency distribution of the variables. The formula for Spearman's rank correlation is
-
-    .. image:: ./static/images/spearman.png
-
-    where D is the difference between the ranks of corresponding values of X and Y, and N is the number of pairs of values.
-
------
-
-**Example**
-
-- Input file::
-
-    #Person	Height	Self Esteem
-    1		68		4.1
-    2 		71 		4.6
-    3 		62 		3.8
-    4 		75 		4.4
-    5 		58 		3.2
-    6 		60 		3.1
-    7 		67 		3.8
-    8 		68 		4.1
-    9 		71 		4.3
-    10 		69 		3.7
-    11 		68 		3.5
-    12 		67 		3.2
-    13 		63 		3.7
-    14 		62 		3.3
-    15 		60 		3.4
-    16 		63 		4.0
-    17 		65 		4.1
-    18 		67 		3.8
-    19 		63 		3.4
-    20 		61 		3.6
-
-- Computing the correlation coefficients between columns 2 and 3 of the above file (using Pearson's Correlation), the output is::
-
-    1.0	0.730635686279
-    0.730635686279	1.0
-
-  So the correlation for our twenty cases is .73, which is a fairly strong positive relationship.
-  </help>
-</tool>
--- a/tools/stats/correlation.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-#!/usr/bin/perl
-
-###########################################################################
-# Purpose: To calculate the correlation of two sets of scores in one file.
-# Usage: correlation.pl infile.bed output.txt column1 column2
-#        (column start from 1)
-# Written by: Yi Zhang  (June, 2005)
-###########################################################################
-if (!$ARGV[0] || !$ARGV[1] || !defined($ARGV[2]) || !defined($ARGV[3]) ) {
-   print STDERR "Usage: correlation.pl infile.bed output.txt column1 column2\n";
-   print STDERR "       (column start from 1)\n";
-   exit;
-}
-my $file = $ARGV[0];
-my $out = $ARGV[1];
-
-die "<font color=\"yellow\">The input columns contain numerical values: $ARGV[2], $ARGV[3]</font>.\n" if ($ARGV[2] =~ /[a-zA-Z]+/ || $ARGV[3] =~ /[a-zA-Z]+/);
-
-my $col1 = $ARGV[2] - 1;
-my $col2 = $ARGV[3] - 1;
-
-my ($f, $o);
-my (@a, @b);
-
-my $n_t = 0;
-open($f, $file) or die "Could't open $file, $!\n";
-while(<$f>) {
-  chomp;
-  my @t = split(/\t/);
-  if ($n_t == 0) {
-     $n_t = scalar(@t) - 1;
-     die "<font color=\"yellow\">The input column number exceeds the size of the file: $col1, $col2, $n_t</font>\n" if ( $col1 > $n_t || $col2 > $n_t );
-  }
-  die "<font color=\"yellow\">The columns you have selected contain non numeric characters:$t[$col1] and $t[$col2] \n</font>" if ($t[$col1] =~ /[a-zA-Z]+/ || $t[$col2] =~ /[a-zA-Z]+/);
-  push(@a, $t[$col1]);
-  push(@b, $t[$col2]);
-}
-close($f);
-
-my $result = correlation(\@a, \@b);
-
-open($o, ">$out") or die "Couldn't open $out, $!\n";
-$col1 = $col1 + 1;
-$col2 = $col2 + 1;
-print $o "The correlation of column $col1 and $col2 is $result\n";
-close($o);
-print "The correlation of column $col1 and $col2 is $result\n";
-
-sub correlation {
-   my ($array1ref, $array2ref) = @_;
-   my ($sum1, $sum2);
-   my ($sum1_squared, $sum2_squared);
-   foreach (@$array1ref) { $sum1 += $_;  $sum1_squared += $_**2; }
-   foreach (@$array2ref) { $sum2 += $_;  $sum2_squared += $_**2; }
-   my $numerator = (@$array1ref**2) * covariance($array1ref, $array2ref);
-   my $denominator = sqrt(((@$array1ref * $sum1_squared) - ($sum1**2)) *
-                          ((@$array1ref * $sum2_squared) - ($sum2**2)));
-   my $r;
-   if ($denominator == 0) {
-     print STDERR "The denominator is 0.\n";
-	 exit 0;
-   } else {
-      $r = $numerator / $denominator;
-   }
-    return $r;
-}
-
-sub covariance {
-   my ($array1ref, $array2ref) = @_;
-   my ($i, $result);
-   for ($i = 0; $i < @$array1ref; $i++) {
-       $result += $array1ref->[$i] * $array2ref->[$i];
-   }
-   $result /= @$array1ref;
-   $result -= mean($array1ref) * mean($array2ref);
-}
-
-sub mean {
-  my ($arrayref) = @_;
-  my $result;
-  foreach (@$arrayref) { $result += $_; }
-  return $result/@$arrayref;
-}
-
--- a/tools/stats/correlation.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-<tool id="Pearson_and_apos_Correlation1" name="Pearson and apos Correlation">
-  <description>between any two numeric columns</description>
-  <command interpreter="perl">correlation.pl $input $out_file1 $col1 $col2</command>
-  <inputs>
-<!--    <display>on column $col1 and column $col2 of $input</display> -->
-    <param name="col1" size="3" type="text" value="5" label="Correlate data in column"/>
-    <param name="col2" size="3" type="text" value="6" label="with data in column"/>
-    <param format="txt" name="input" type="data" label="in Query"/>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <help>Computes Pearsons correlation coefficient between any two numerical columns. Column numbers start at 1.
-</help>
-</tool>
--- a/tools/stats/count_gff_features.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#!/usr/bin/env python
-# This tool takes a gff file as input and counts the number of features in it.
-
-import sys, fileinput
-from galaxy import eggs
-from galaxy.datatypes.util.gff_util import GFFReaderWrapper
-from bx.intervals.io import GenomicInterval
-
-# Get args.
-input_file = sys.argv[1:]
-
-# Count features.
-count = 0
-for feature in GFFReaderWrapper( fileinput.FileInput( input_file ), fix_strand=True ):
-    if isinstance( feature, GenomicInterval ):
-        count += 1
-
-print count
\ No newline at end of file
--- a/tools/stats/count_gff_features.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-<tool id="count_gff_features" name="Count GFF Features" version="0.1">
-    <description></description>
-    <command interpreter="python">
-        count_gff_features.py $input &gt; $output
-    </command>
-    <inputs>
-        <param format="gff" name="input" type="data" label="GFF Dataset to Filter"/>
-    </inputs>
-    <outputs>
-        <data format="txt" name="output"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="input" value="gff2bed_in2.gff"/>
-            <output name="output" file="count_gff_features_out1.txt"/>
-        </test>
-        <test>
-            <param name="input" value="gff_filter_by_feature_count_out1.gff"/>
-            <output name="output" file="count_gff_features_out2.txt"/>
-        </test>
-    </tests>
-    <help>
-        Counts the number of features in a GFF dataset. GFF features are often spread across multiple lines; this tool counts the number of
-        features in dataset rather than the number of lines.
-    </help>
-</tool>
--- a/tools/stats/dna_filtering.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,216 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This tool takes a tab-delimited text file as input and creates filters on columns based on certain properties. The tool will skip over invalid lines within the file, informing the user about the number of lines skipped.
-
-usage: %prog [options]
-    -i, --input=i: tabular input file
-    -o, --output=o: filtered output file
-    -c, --cond=c: conditions to filter on
-    -n, --n_handling=n: how to handle N and X
-    -l, --columns=l: columns
-    -t, --col_types=t: column types
-
-"""
-
-#from __future__ import division
-import os.path, re, string, string, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-#assert sys.version_info[:2] >= ( 2, 4 )
-
-def get_operands( filter_condition ):
-    # Note that the order of all_operators is important
-    items_to_strip = [ '==', '!=', ' and ', ' or ' ]
-    for item in items_to_strip:
-        if filter_condition.find( item ) >= 0:
-            filter_condition = filter_condition.replace( item, ' ' )
-    operands = set( filter_condition.split( ' ' ) )
-    return operands
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    input = options.input
-    output = options.output
-    cond = options.cond
-    n_handling = options.n_handling
-    columns = options.columns
-    col_types = options.col_types
-
-    try:
-        in_columns = int( columns )
-        assert col_types  #check to see that the column types variable isn't null
-        in_column_types = col_types.split( ',' )
-    except:
-        stop_err( "Data does not appear to be tabular.  This tool can only be used with tab-delimited data." )
-
-    # Unescape if input has been escaped
-    cond_text = cond.replace( '__eq__', '==' ).replace( '__ne__', '!=' ).replace( '__sq__', "'" )
-    orig_cond_text = cond_text
-    # Expand to allow for DNA codes
-    dot_letters = [ letter for letter in string.uppercase if letter not in \
-                   [ 'A', 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'N', 'R', 'S', 'V', 'W', 'X', 'Y' ] ]
-    dot_letters.append( '.' )
-    codes = {'A': [ 'A', 'D', 'H', 'M', 'R', 'V', 'W' ],
-             'C': [ 'C', 'B', 'H', 'M', 'S', 'V', 'Y' ],
-             'G': [ 'G', 'B', 'D', 'K', 'R', 'S', 'V' ],
-             'T': [ 'T', 'U', 'B', 'D', 'H', 'K', 'W', 'Y' ],
-             'U': [ 'T', 'U', 'B', 'D', 'H', 'K', 'W', 'Y' ],
-             'K': [ 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'R', 'S', 'V', 'W', 'Y' ],
-             'M': [ 'A', 'C', 'B', 'D', 'H', 'M', 'R', 'S', 'V', 'W', 'Y' ],
-             'R': [ 'A', 'G', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W' ],
-             'Y': [ 'C', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'S', 'V', 'W', 'Y' ],
-             'S': [ 'C', 'G', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'Y' ],
-             'W': [ 'A', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'V', 'W', 'Y' ],
-             'B': [ 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W', 'Y' ],
-             'V': [ 'A', 'C', 'G', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W' ],
-             'H': [ 'A', 'C', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W', 'Y' ],
-             'D': [ 'A', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W', 'Y' ],
-             '.': dot_letters,
-             '-': [ '-' ]}
-    # Add handling for N and X
-    if n_handling == "all":
-        codes[ 'N' ] = [ 'A', 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'N', 'R', 'S', 'V', 'W', 'X', 'Y' ]
-        codes[ 'X' ] = [ 'A', 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'N', 'R', 'S', 'V', 'W', 'X', 'Y' ]
-        for code in codes.keys():
-            if code != '.' and code != '-':
-                codes[code].append( 'N' )
-                codes[code].append( 'X' )
-    else:
-        codes[ 'N' ] = dot_letters
-        codes[ 'X' ] = dot_letters
-        codes[ '.' ].extend( [ 'N', 'X' ] )
-    # Expand conditions to allow for DNA codes
-    try:
-        match_replace = {}
-        pat = re.compile( 'c\d+\s*[!=]=\s*[\w\d"\'+-.]+' )
-        matches = pat.findall( cond_text )
-        for match in matches:
-            if match.find( 'chr' ) >= 0 or match.find( 'scaffold' ) >= 0 or match.find( '+' ) >= 0:
-                if match.find( '==' ) >= 0:
-                    match_parts = match.split( '==' )
-                elif match.find( '!=' ) >= 0:
-                    match_parts = match.split( '!=' )
-                else:
-                    raise Exception, "The operators '==' and '!=' were not found."
-                left = match_parts[0].strip()
-                right = match_parts[1].strip()
-                new_match = "(%s)" % ( match )
-            elif match.find( '==' ) > 0:
-                match_parts = match.split( '==' )
-                left = match_parts[0].strip()
-                right = match_parts[1].strip()
-                new_match = '(%s in codes[%s] and %s in codes[%s])' % ( left, right, right, left )
-            elif match.find( '!=' ) > 0 :
-                match_parts = match.split( '!=' )
-                left = match_parts[0].strip()
-                right = match_parts[1].strip()
-                new_match = '(%s not in codes[%s] or %s not in codes[%s])' % ( left, right, right, left )
-            else:
-                raise Exception, "The operators '==' and '!=' were not found."
-            assert left.startswith( 'c' ), 'The column names should start with c (lowercase)'
-            if right.find( "'" ) >= 0 or right.find( '"' ) >= 0:
-                test = right.replace( "'", '' ).replace( '"', '' )
-                assert test in string.uppercase or test.find( '+' ) >= 0 or test.find( '.' ) >= 0 or test.find( '-' ) >= 0\
-                        or test.startswith( 'chr' ) or test.startswith( 'scaffold' ), \
-                        'The value to search for should be a valid base, code, plus sign, chromosome (like "chr1") or scaffold (like "scaffold5"). ' \
-                        'Use the general filter tool to filter on anything else first'
-            else:
-                assert right.startswith( 'c' ), 'The column names should start with c (lowercase)'
-            match_replace[match] = new_match
-        if len( match_replace.keys() ) == 0:
-            raise Exception, 'There do not appear to be any valid conditions'
-        for match in match_replace.keys():
-            cond_text = cond_text.replace( match, match_replace[match] )
-    except Exception, e:
-        stop_err( "At least one of your conditions is invalid. Make sure to use only '!=' or '==', valid column numbers, and valid base values.\n" + str(e) )
-
-    # Attempt to determine if the condition includes executable stuff and, if so, exit
-    secured = dir()
-    operands = get_operands( cond_text )
-    for operand in operands:
-        try:
-            check = int( operand )
-        except:
-            if operand in secured:
-                stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) )
-
-    # Prepare the column variable names and wrappers for column data types
-    cols, type_casts = [], []
-    for col in range( 1, in_columns + 1 ):
-        col_name = "c%d" % col
-        cols.append( col_name )
-        col_type = in_column_types[ col - 1 ]
-        type_cast = "%s(%s)" % ( col_type, col_name )
-        type_casts.append( type_cast )
-
-    col_str = ', '.join( cols )    # 'c1, c2, c3, c4'
-    type_cast_str = ', '.join( type_casts )  # 'str(c1), int(c2), int(c3), str(c4)'
-    assign = "%s = line.split( '\\t' )" % col_str
-    wrap = "%s = %s" % ( col_str, type_cast_str )
-    skipped_lines = 0
-    first_invalid_line = 0
-    invalid_line = None
-    lines_kept = 0
-    total_lines = 0
-    out = open( output, 'wt' )
-    # Read and filter input file, skipping invalid lines
-    code = '''
-for i, line in enumerate( file( input ) ):
-    total_lines += 1
-    line = line.rstrip( '\\r\\n' )
-    if not line or line.startswith( '#' ):
-        skipped_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-        continue
-    try:
-        %s = line.split( '\\t' )
-        %s = %s
-        if %s:
-            lines_kept += 1
-            print >> out, line
-    except Exception, e:
-        skipped_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-''' % ( col_str, col_str, type_cast_str, cond_text )
-
-    valid_filter = True
-    try:
-        exec code
-    except Exception, e:
-        out.close()
-        if str( e ).startswith( 'invalid syntax' ):
-            valid_filter = False
-            stop_err( 'Filter condition "%s" likely invalid. See tool tips, syntax and examples.' % orig_cond_text + ' '+str(e))
-        else:
-            stop_err( str( e ) )
-
-    if valid_filter:
-        out.close()
-        valid_lines = total_lines - skipped_lines
-        print 'Filtering with %s, ' % orig_cond_text
-        if valid_lines > 0:
-            print 'kept %4.2f%% of %d lines.' % ( 100.0*lines_kept/valid_lines, total_lines )
-        else:
-            print 'Possible invalid filter condition "%s" or non-existent column referenced. See tool tips, syntax and examples.' % orig_cond_text
-        if skipped_lines > 0:
-            print 'Skipped %d invalid lines starting at line #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
-
-if __name__ == "__main__" : __main__()
--- a/tools/stats/dna_filtering.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-<tool id="dna_filter" name="Filter on ambiguities" version="1.0.0">
-  <description>in polymorphism datasets</description>
-  <command interpreter="python">
-    dna_filtering.py
-      --input=$input
-      --output=$out_file1
-      --cond="$cond"
-      --n_handling=$n_handling
-      --columns=${input.metadata.columns}
-      --col_types="${input.metadata.column_types}"
-  </command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
-    <param name="cond" size="40" type="text" value="c4 == 'G'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
-      <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
-    </param>
-    <param name="n_handling" type="select" label="What is the meaning of N" help="Everything matches everything, Unknown matches nothing">
-      <option value="all">Everything (A, T, C, G)</option>
-      <option value="none">Unknown</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" ftype="tabular" value="dna_filter_in1.tabular" />
-      <param name="cond" value="c8=='G'" />
-      <param name="n_handling" value="all" />
-      <output name="out_file1" ftype="tabular" file="dna_filter_out1.tabular" />
-    </test>
-    <test>
-      <param name="input" value="dna_filter_in1.tabular" />
-      <param name="cond" value="(c10 == c11 or c17 == c18) and c6 != 'C' and c23 == 'R'" />
-      <param name="n_handling" value="all" />
-      <output name="out_file1" file="dna_filter_out2.tabular" />
-    </test>
-    <test>
-      <param name="input" value="dna_filter_in1.tabular" />
-      <param name="cond" value="c4=='B' or c9==c10" />
-      <param name="n_handling" value="none" />
-      <output name="out_file1" file="dna_filter_out3.tabular" />
-    </test>
-    <test>
-      <param name="input" value="dna_filter_in1.tabular" />
-      <param name="cond" value="c1!='chr1' and c7!='Y' and c25!='+'" />
-      <param name="n_handling" value="none" />
-      <output name="out_file1" file="dna_filter_out4.tabular" />
-    </test>
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
-.. class:: warningmark
-
-**TIP:** This tool is intended primarily for comparing column values (such as "c5==c12"), although it is also possible to filter on specific values (like "c6!='G'"). Be aware that when searching for specific values, any possible match is considered. So if you search on "c6!='G'", rows will be excluded when c6 is G, K, R, S, B, V, or D (plus N or X if you set that to equal "Everything"), because it is possible those values could indicate G.
-
------
-
-**What it does**
-
-This tool is written for a very specific case related to an analysis of polymorphism data. Suppose you have a table of SNP data that looks like this::
-
-  chromosome start end patient1 parient2 patient3 patient4
-  --------------------------------------------------------
-  chr1       100   101 A        M        C        R
-  chr1       200   201 T        K        C        C
-
-and your want to select all rows where patient1 has the same base as patient2. Unfortunately you cannot do this with the *Filter and Sort -> Filter* tool because it does not understand DNA ambiguity codes (see below). For example, at position 100 patient1 is the same as patient2 because M is a mix of As and Cs. This tool is designed to make filtering on ambiguities possible.
-
------
-
-**Syntax**
-
-The filter tool allows you to restrict the dataset using simple conditional statements:
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file (e.g., **c4 == c5**)
-- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** )
-- Non-numerical values must be included in single or double quotes ( e.g., **c6=='C'** )
-- Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or c6=='+'** )
-
-------
-
-**Allowed types of filtering**
-
-The following types of filtering are allowed:
-
-- Testing columns for equality (e.g., c2 == c4 or c2 != c4)
-- Testing that a column contains a particular base (e.g., c4 == 'C'). Only bases listed in *DNA Codes* below are allowed.
-- Testing that a column represents a plus or a minus strand (e.g., c3 == '+' or c3 != '-')
-- Testing that a column is a chromosomes (c1 == 'chrX') or a scaffold (c1 == 'scaffold87976')
-
-All other types of filtering should be done with *Filter and Sort -> Filter* tool.
-
------
-
-**DNA Codes**
-
-The following are the DNA codes used for filtering::
-
-  Code   Meaning
-  ----   --------------------------
-   A     A
-   T     T
-   U     T
-   G     G
-   C     C
-   K     G or T
-   M     A or C
-   R     A or G
-   Y     C or T
-   S     C or G
-   W     A or T
-   B     C, G or T
-   V     A, C or G
-   H     A, C or T
-   D     A, G or T
-   X     A, C, G or T
-   N     A, C, G or T
-   .     not (A, C, G or T)
-   -     gap of indeterminate length
-
------
-
-**Example**
-
-- **c8=='A'** selects lines in which the eighth column is A, M, R, W, V, H, or D, or N or X if appropriate
-- **c12==c15** selects lines where the value in the twelfth column could be the same as the fifteenth and the fifteenth column could be the same as the twelfth column (based on appropriate codes)
-- **c9!=c19** selects lines where column nine could not be the same as column nineteen or column nineteen could not be the same as column nine (using appropriate codes)
-- **c4 == 'A' and c4 == c5** selects lines where column 4 and 5 are both A, M, R, W, V, H, D or N, or X if appropriate
-
-</help>
-</tool>
--- a/tools/stats/filtering.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-#!/usr/bin/env python
-# This tool takes a tab-delimited text file as input and creates filters on columns based on certain properties.
-# The tool will skip over invalid lines within the file, informing the user about the number of lines skipped.
-
-from __future__ import division
-import sys, re, os.path
-from galaxy import eggs
-
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def get_operands( filter_condition ):
-    # Note that the order of all_operators is important
-    items_to_strip = ['+', '-', '**', '*', '//', '/', '%', '<<', '>>', '&', '|', '^', '~', '<=', '<', '>=', '>', '==', '!=', '<>', ' and ', ' or ', ' not ', ' is ', ' is not ', ' in ', ' not in ']
-    for item in items_to_strip:
-        if filter_condition.find( item ) >= 0:
-            filter_condition = filter_condition.replace( item, ' ' )
-    operands = set( filter_condition.split( ' ' ) )
-    return operands
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-in_fname = sys.argv[1]
-out_fname = sys.argv[2]
-cond_text = sys.argv[3]
-try:
-    in_columns = int( sys.argv[4] )
-    assert sys.argv[5]  #check to see that the column types variable isn't null
-    in_column_types = sys.argv[5].split( ',' )
-except:
-    stop_err( "Data does not appear to be tabular.  This tool can only be used with tab-delimited data." )
-
-# Unescape if input has been escaped
-mapped_str = {
-    '__lt__': '<',
-    '__le__': '<=',
-    '__eq__': '==',
-    '__ne__': '!=',
-    '__gt__': '>',
-    '__ge__': '>=',
-    '__sq__': '\'',
-    '__dq__': '"',
-}
-for key, value in mapped_str.items():
-    cond_text = cond_text.replace( key, value )
-
-# Attempt to determine if the condition includes executable stuff and, if so, exit
-secured = dir()
-operands = get_operands(cond_text)
-for operand in operands:
-    try:
-        check = int( operand )
-    except:
-        if operand in secured:
-            stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) )
-
-# Work out which columns are used in the filter (save using 1 based counting)
-used_cols = sorted(set(int(match.group()[1:]) \
-                   for match in re.finditer('c(\d)+', cond_text)))
-largest_col_index = max(used_cols)
-
-# Prepare the column variable names and wrappers for column data types. Only
-# cast columns used in the filter.
-cols, type_casts = [], []
-for col in range( 1, largest_col_index + 1 ):
-    col_name = "c%d" % col
-    cols.append( col_name )
-    col_type = in_column_types[ col - 1 ]
-    if col in used_cols:
-        type_cast = "%s(%s)" % ( col_type, col_name )
-    else:
-        #If we don't use this column, don't cast it.
-        #Otherwise we get errors on things like optional integer columns.
-        type_cast = col_name
-    type_casts.append( type_cast )
-
-col_str = ', '.join( cols )    # 'c1, c2, c3, c4'
-type_cast_str = ', '.join( type_casts )  # 'str(c1), int(c2), int(c3), str(c4)'
-assign = "%s, = line.split( '\\t' )[:%i]" % ( col_str, largest_col_index )
-wrap = "%s = %s" % ( col_str, type_cast_str )
-skipped_lines = 0
-invalid_lines = 0
-first_invalid_line = 0
-invalid_line = None
-lines_kept = 0
-total_lines = 0
-out = open( out_fname, 'wt' )
-
-# Read and filter input file, skipping invalid lines
-code = '''
-for i, line in enumerate( file( in_fname ) ):
-    total_lines += 1
-    line = line.rstrip( '\\r\\n' )
-    if not line or line.startswith( '#' ):
-        skipped_lines += 1
-        continue
-    try:
-        %s
-        %s
-        if %s:
-            lines_kept += 1
-            print >> out, line
-    except:
-        invalid_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-''' % ( assign, wrap, cond_text )
-
-valid_filter = True
-try:
-    exec code
-except Exception, e:
-    out.close()
-    if str( e ).startswith( 'invalid syntax' ):
-        valid_filter = False
-        stop_err( 'Filter condition "%s" likely invalid. See tool tips, syntax and examples.' % cond_text )
-    else:
-        stop_err( str( e ) )
-
-if valid_filter:
-    out.close()
-    valid_lines = total_lines - skipped_lines
-    print 'Filtering with %s, ' % cond_text
-    if valid_lines > 0:
-        print 'kept %4.2f%% of %d valid lines (%d total lines).' % ( 100.0*lines_kept/valid_lines, valid_lines, total_lines )
-    else:
-        print 'Possible invalid filter condition "%s" or non-existent column referenced. See tool tips, syntax and examples.' % cond_text
-    if invalid_lines:
-        print 'Skipped %d invalid line(s) starting at line #%d: "%s"' % ( invalid_lines, first_invalid_line, invalid_line )
-    if skipped_lines:
-        print 'Skipped %i comment (starting with #) or blank line(s)' % skipped_lines
--- a/tools/stats/filtering.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-<tool id="Filter1" name="Filter" version="1.1.0">
-  <description>data on any column using simple expressions</description>
-  <command interpreter="python">
-    filtering.py $input $out_file1 "$cond" ${input.metadata.columns} "${input.metadata.column_types}"
-  </command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
-    <param name="cond" size="40" type="text" value="c1=='chr22'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
-      <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="1.bed"/>
-      <param name="cond" value="c1=='chr22'"/>
-      <output name="out_file1" file="filter1_test1.bed"/>
-    </test>
-    <test>
-      <param name="input" value="7.bed"/>
-      <param name="cond" value="c1=='chr1' and c3-c2>=2000 and c6=='+'"/>
-      <output name="out_file1" file="filter1_test2.bed"/>
-    </test>
-    <!-- Test filtering of file with a variable number of columns. -->
-    <test>
-      <param name="input" value="filter1_in3.sam"/>
-      <param name="cond" value="c3=='chr1' and c5>5"/>
-      <output name="out_file1" file="filter1_test3.sam"/>
-    </test>
-    <test>
-      <param name="input" value="filter1_inbad.bed"/>
-      <param name="cond" value="c1=='chr22'"/>
-      <output name="out_file1" file="filter1_test4.bed"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**)
-
-.. class:: infomark
-
-**TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the columns being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings).  If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition.  The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue".
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-The filter tool allows you to restrict the dataset using simple conditional statements.
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
-- Make sure that multi-character operators contain no white space ( e.g., **&lt;=** is valid while **&lt; =** is not valid )
-- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** )
-- Non-numerical values must be included in single or double quotes ( e.g., **c6=='+'** )
-- Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or not c6=='+'** )
-
------
-
-**Example**
-
-- **c1=='chr1'** selects lines in which the first column is chr1
-- **c3-c2&lt;100*c4** selects lines where subtracting column 3 from column 2 is less than the value of column 4 times 100
-- **len(c2.split(',')) &lt; 4** will select lines where the second column has less than four comma separated elements
-- **c2>=1** selects lines in which the value of column 2 is greater than or equal to 1
-- Numbers should not contain commas - **c2&lt;=44,554,350** will not work, but **c2&lt;=44554350** will
-- Some words in the data can be used, but must be single or double quoted ( e.g., **c3=='exon'** )
-
-</help>
-</tool>
--- a/tools/stats/generate_matrix_for_pca_lda.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-use warnings;
-
-my $Input_Matrix = $ARGV[0];
-my $Input_Label = $ARGV[1];
-
-my %Hash_X = ();
-my %Hash_Y = ();
-my $My_Num_X = 0;
-my $My_Num_Y = 0;
-
-open (OUT, "> $ARGV[2]");
-
-open (LABEL, "< $Input_Label")     ||
-	die "Sorry, I couldn't open the escape.txt for clone: $!\n";
-
-my $Label_Index = 0;
-my $X_Label;
-my $input_Label;
-while (defined($input_Label = <LABEL>)){
-	chomp($input_Label);
-	my @cArray_Label = $input_Label =~ /(\S+)\s*/g;
-	if ($input_Label =~ /\w/){
-		if ($Label_Index == 0){
-			$Hash_X{$cArray_Label[0]} = $cArray_Label[1];
-			$X_Label = $cArray_Label[1];
-			$Label_Index = 1;
-		}else{
-			if ($cArray_Label[1] eq $X_Label){
-				$Hash_X{$cArray_Label[0]} = $cArray_Label[1];
-			}else{
-				$Hash_Y{$cArray_Label[0]} = $cArray_Label[1];
-			}
-		}
-	}
-}
-close(LABEL);
-
-open (MATRIX, "< $Input_Matrix")     ||
-	die "Sorry, I couldn't open the escape.txt for clone: $!\n";
-
-my %Hash_Matrix = ();
-my %Hash_Features = ();
-my @cArray_Features = ();
-
-my %Hash_Sum = ();
-my $Matrix_Index = 0;
-my $input_Matrix;
-while (defined($input_Matrix = <MATRIX>)){
-	chomp($input_Matrix);
-	my @cArray_Matrix = $input_Matrix =~ /(\S+)\s*/g;
-	if ($input_Matrix =~ /\w/){
-		if ($Matrix_Index == 0){
-			@cArray_Features = @cArray_Matrix;
-			my $Temp_Num_Array = scalar(@cArray_Matrix);
-			my $Temp_Index = 0;
-			for(;$Temp_Index < $Temp_Num_Array; $Temp_Index++){
-				$Hash_Features{$cArray_Matrix[$Temp_Index]} = "BOL";
-				$Hash_Sum{$cArray_Matrix[$Temp_Index]} = 0;
-			}
-			$Matrix_Index = 1;
-		}else{
-			$Hash_Matrix{$cArray_Matrix[0]} = $input_Matrix;
-		}
-	}
-}
-close(MATRIX);
-
-my $Trace_Key;
-
-foreach $Trace_Key (sort {$a cmp $b} keys %Hash_X){
-	my @cArray_Trace_X = $Hash_Matrix{$Trace_Key} =~ /(\S+)\s*/g;
-	my $Num_Array_Feature_X = scalar(@cArray_Features);
-	my $Index_Feature_X = 0;
-	for(;$Index_Feature_X < $Num_Array_Feature_X; $Index_Feature_X++){
-		if ($Hash_Features{$cArray_Features[$Index_Feature_X]} eq "BOL"){
-			$Hash_Features{$cArray_Features[$Index_Feature_X]} = $cArray_Trace_X[$Index_Feature_X + 1];
-		}else{
-			$Hash_Features{$cArray_Features[$Index_Feature_X]} = $Hash_Features{$cArray_Features[$Index_Feature_X]} . "\t" . $cArray_Trace_X[$Index_Feature_X + 1];
-		}
-
-		$Hash_Sum{$cArray_Features[$Index_Feature_X]} += $cArray_Trace_X[$Index_Feature_X + 1];
-	}
-	$My_Num_X ++;
-}
-
-my $Append_Key;
-foreach $Append_Key (keys %Hash_Features){
-	$Hash_Features{$Append_Key} = $Hash_Features{$Append_Key} . "\t" . $Hash_Sum{$Append_Key};
-	$Hash_Sum{$Append_Key} = 0;
-}
-
-foreach $Trace_Key (sort {$a cmp $b} keys %Hash_Y){
-	my @cArray_Trace_Y = $Hash_Matrix{$Trace_Key} =~ /(\S+)\s*/g;
-	my $Num_Array_Feature_Y = scalar(@cArray_Features);
-	my $Index_Feature_Y = 0;
-	for(;$Index_Feature_Y < $Num_Array_Feature_Y; $Index_Feature_Y++){
-		if ($Hash_Features{$cArray_Features[$Index_Feature_Y]} eq "BOL"){
-			$Hash_Features{$cArray_Features[$Index_Feature_Y]} = $cArray_Trace_Y[$Index_Feature_Y + 1];
-		}else{
-			$Hash_Features{$cArray_Features[$Index_Feature_Y]} = $Hash_Features{$cArray_Features[$Index_Feature_Y]} . "\t" . $cArray_Trace_Y[$Index_Feature_Y + 1];
-		}
-
-		$Hash_Sum{$cArray_Features[$Index_Feature_Y]} += $cArray_Trace_Y[$Index_Feature_Y + 1];
-	}
-	$My_Num_Y ++;
-}
-
-foreach $Append_Key (keys %Hash_Features){
-	$Hash_Features{$Append_Key} = $Hash_Features{$Append_Key} . "\t" . $Hash_Sum{$Append_Key} . "\t" . "EOL";
-}
-
-my $Prt_Key;
-print OUT " \t";
-foreach $Prt_Key (sort {$a cmp $b} keys %Hash_X){
-	print OUT "$Prt_Key \t";
-}
-print OUT "X(SUM) \t";
-
-foreach $Prt_Key (sort {$a cmp $b} keys %Hash_Y){
-	print OUT "$Prt_Key \t";
-}
-print OUT "Y(SUM) \t";
-print OUT "\n";
-
-my $Prt_Index = 0;
-my $Prt_Array_Num = scalar (@cArray_Features);
-for(;$Prt_Index < $Prt_Array_Num; $Prt_Index++){
-	print OUT "$cArray_Features[$Prt_Index] \t$Hash_Features{$cArray_Features[$Prt_Index]}\n";
-}
-
-print OUT " \t";
-my $My_Label_Index = 0;
-for(;$My_Label_Index < $My_Num_X; $My_Label_Index++){
-	print OUT "X \t";
-}
-print OUT " \t";
-
-$My_Label_Index = 0;
-for(;$My_Label_Index < $My_Num_Y; $My_Label_Index++){
-	print OUT "Y \t";
-}
-print OUT " \t\n";
-
-close(OUT);
--- a/tools/stats/generate_matrix_for_pca_lda.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-<tool id="generate_matrix_for_pca_and_lda1" name="Generate A Matrix">
-    <description>for using PC and LDA</description>
-    <command interpreter="perl">generate_matrix_for_pca_lda.pl $input_1 $input_2 $output</command>
-
-    <inputs>
-        <param format="tabular" name="input_1" type="data" label="Source file First: a matrix (samples/observations in rows and variables/features in columns)"> </param>
-        <param format="tabular" name="input_2" type="data" label="Source file Second: a table (samples/observations with response/class label)"> </param>
-    </inputs>
-
-    <outputs>
-        <data format="tabular" name="output" />
-    </outputs>
-
-    <tests>
-        <test>
-            <param name="input_1" value="matrix_generator_for_pc_and_lda_input_1.tabular"/>
-            <param name="input_2" value="matrix_generator_for_pc_and_lda_input_2.tabular"/>
-            <output name="output" file="matrix_generator_for_pc_and_lda_output.tabular"/>
-        </test>
-    </tests>
-
-    <help>
-
-.. class:: infomark
-
-**What it does**
-
-This tool consists of a module to generate a matrix to be used for running the Linear Discriminant Analysis as described in Carrel et al., 2006 (PMID: 17009873)
-
-*Carrel L, Park C, Tyekucheva S, Dunn J, Chiaromonte F, et al. (2006) Genomic Environment Predicts Expression Patterns on the Human     Inactive X Chromosome. PLoS Genet 2(9): e151. doi:10.1371/journal.pgen.0020151*
-
------
-
-**Example**
-
-- Input file (Source file First)
-
-.. image:: ./static/images/tools/lda/first_matrix_generator_example_file.png
-
-
-- Input file (Source file Second)
-
-.. image:: ./static/images/tools/lda/second_matrix_generator_example_file.png
-
-
-</help>
-
-</tool>
--- a/tools/stats/grouping.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-#!/usr/bin/env python
-# Guruprasad Ananda
-# Refactored 2011, Kanwei Li
-# Refactored to use numpy instead of rpy
-"""
-This tool provides the SQL "group by" functionality.
-"""
-import sys, commands, tempfile, random
-try:
-    import numpy
-except:
-    from galaxy import eggs
-    eggs.require( "numpy" )
-    import numpy
-
-from itertools import groupby
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def mode(data):
-    counts = {}
-    for x in data:
-        counts[x] = counts.get(x,0) + 1
-    maxcount = max(counts.values())
-    modelist = []
-    for x in counts:
-        if counts[x] == maxcount:
-            modelist.append( str(x) )
-    return ','.join(modelist)
-
-def main():
-    inputfile = sys.argv[2]
-    ignorecase = int(sys.argv[4])
-    ops = []
-    cols = []
-    round_val = []
-    data_ary = []
-
-    for var in sys.argv[5:]:
-        op, col, do_round = var.split()
-        ops.append(op)
-        cols.append(col)
-        round_val.append(do_round)
-    """
-    At this point, ops, cols and rounds will look something like this:
-    ops:  ['mean', 'min', 'c']
-    cols: ['1', '3', '4']
-    round_val: ['no', 'yes' 'no']
-    """
-
-    try:
-        group_col = int( sys.argv[3] )-1
-    except:
-        stop_err( "Group column not specified." )
-
-    str_ops = ['c', 'length', 'unique', 'random', 'cuniq', 'Mode'] #ops that can handle string/non-numeric inputs
-
-    tmpfile = tempfile.NamedTemporaryFile()
-
-    try:
-        """
-        The -k option for the Posix sort command is as follows:
-        -k, --key=POS1[,POS2]
-        start a key at POS1, end it at POS2 (origin 1)
-        In other words, column positions start at 1 rather than 0, so
-        we need to add 1 to group_col.
-        if POS2 is not specified, the newer versions of sort will consider the entire line for sorting. To prevent this, we set POS2=POS1.
-        """
-        case = ''
-        if ignorecase == 1:
-            case = '-f'
-        command_line = "sort -t '	' %s -k%s,%s -o %s %s" % (case, group_col+1, group_col+1, tmpfile.name, inputfile)
-    except Exception, exc:
-        stop_err( 'Initialization error -> %s' %str(exc) )
-
-    error_code, stdout = commands.getstatusoutput(command_line)
-
-    if error_code != 0:
-        stop_err( "Sorting input dataset resulted in error: %s: %s" %( error_code, stdout ))
-
-    fout = open(sys.argv[1], "w")
-
-    def is_new_item(line):
-        item = line.strip().split("\t")[group_col]
-        if ignorecase == 1:
-            return item.lower()
-        return item
-
-    for key, line_list in groupby(tmpfile, key=is_new_item):
-        op_vals = [ [] for op in ops ]
-        out_str = key
-        multiple_modes = False
-        mode_index = None
-
-        for line in line_list:
-            fields = line.strip().split("\t")
-            for i, col in enumerate(cols):
-                col = int(col)-1 # cXX from galaxy is 1-based
-                try:
-                    val = fields[col].strip()
-                    op_vals[i].append(val)
-                except IndexError:
-                    sys.stderr.write( 'Could not access the value for column %s on line: "%s". Make sure file is tab-delimited.\n' % (col+1, line) )
-                    sys.exit( 1 )
-
-        # Generate string for each op for this group
-        for i, op in enumerate( ops ):
-            data = op_vals[i]
-            rval = ""
-            if op == "mode":
-                rval = mode( data )
-            elif op == "length":
-                rval = len( data )
-            elif op == "random":
-                rval = random.choice(data)
-            elif op in ['cat', 'cat_uniq']:
-                if op == 'cat_uniq':
-                    data = numpy.unique(data)
-                rval = ','.join(data)
-            elif op == "unique":
-                rval = len( numpy.unique(data) )
-            else:
-                # some kind of numpy fn
-                try:
-                    data = map(float, data)
-                except ValueError:
-                    sys.stderr.write( "Operation %s expected number values but got %s instead.\n" % (op, data) )
-                    sys.exit( 1 )
-                rval = getattr(numpy, op)( data )
-                if round_val[i] == 'yes':
-                    rval = round(rval)
-                else:
-                    rval = '%g' % rval
-
-            out_str += "\t%s" % rval
-
-        fout.write(out_str + "\n")
-
-    # Generate a useful info message.
-    msg = "--Group by c%d: " %(group_col+1)
-    for i, op in enumerate(ops):
-        if op == 'cat':
-            op = 'concat'
-        elif op == 'cat_uniq':
-            op = 'concat_distinct'
-        elif op == 'length':
-            op = 'count'
-        elif op == 'unique':
-            op = 'count_distinct'
-        elif op == 'random':
-            op = 'randomly_pick'
-
-        msg += op + "[c" + cols[i] + "] "
-
-    print msg
-    fout.close()
-    tmpfile.close()
-
-if __name__ == "__main__":
-    main()
--- a/tools/stats/grouping.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-<tool id="Grouping1" name="Group" version="2.0.0">
-  <description>data by a column and perform aggregate operation on other columns.</description>
-  <command interpreter="python">
-    grouping.py
-      $out_file1
-      $input1
-      $groupcol
-      $ignorecase
-      #for $op in $operations
-       '${op.optype}
-        ${op.opcol}
-        ${op.opround}'
-      #end for
-  </command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
-    <param name="groupcol" label="Group by column" type="data_column" data_ref="input1" />
-    <param name="ignorecase" type="boolean" truevalue="1" falsevalue="0">
-      <label>Ignore case while grouping?</label>
-    </param>
-    <repeat name="operations" title="Operation">
-      <param name="optype" type="select" label="Type">
-        <option value="mean">Mean</option>
-        <option value="median">Median</option>
-        <option value="mode">Mode</option>
-        <option value="max">Maximum</option>
-        <option value="min">Minimum</option>
-        <option value="sum">Sum</option>
-        <option value="length">Count</option>
-        <option value="unique">Count Distinct</option>
-        <option value="cat">Concatenate</option>
-        <option value="cat_uniq">Concatenate Distinct</option>
-        <option value="random">Randomly pick</option>
-        <option value="std">Standard deviation</option>
-      </param>
-      <param name="opcol" label="On column" type="data_column" data_ref="input1" />
-      <param name="opround" type="select" label="Round result to nearest integer?">
-         <option value="no">NO</option>
-         <option value="yes">YES</option>
-       </param>
-    </repeat>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">numpy</requirement>
-  </requirements>
-  <tests>
-    <!-- Test valid data -->
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="groupcol" value="1"/>
-      <param name="ignorecase" value="true"/>
-      <param name="optype" value="mean"/>
-      <param name="opcol" value="2"/>
-      <param name="opround" value="no"/>
-      <output name="out_file1" file="groupby_out1.dat"/>
-    </test>
-    <!-- Long case but test framework doesn't allow yet
-    <test>
-      <param name="input1" value="1.bed"/>
-      <param name="groupcol" value="1"/>
-      <param name="ignorecase" value="false"/>
-      <param name="operations" value='[{"opcol": "2", "__index__": 0, "optype": "mean", "opround": "no"}, {"opcol": "2", "__index__": 1, "optype": "median", "opround": "no"}, {"opcol": "6", "__index__": 2, "optype": "mode", "opround": "no"}, {"opcol": "2", "__index__": 3, "optype": "max", "opround": "no"}, {"opcol": "2", "__index__": 4, "optype": "min", "opround": "no"}, {"opcol": "2", "__index__": 5, "optype": "sum", "opround": "no"}, {"opcol": "1", "__index__": 6, "optype": "length", "opround": "no"}, {"opcol": "1", "__index__": 7, "optype": "unique", "opround": "no"}, {"opcol": "1", "__index__": 8, "optype": "cat", "opround": "no"}, {"opcol": "6", "__index__": 9, "optype": "cat_uniq", "opround": "no"}, {"opcol": "2", "__index__": 10, "optype": "random", "opround": "no"}, {"opcol": "2", "__index__": 11, "optype": "std", "opround": "no"}]'/>
-      <output name="out_file1" file="groupby_out3.tabular"/>
-    </test>
-    -->
-    <!-- Test data with an invalid value in a column. Can't do it because test framework doesn't allow testing of errors
-    <test>
-      <param name="input1" value="1.tabular"/>
-      <param name="groupcol" value="1"/>
-      <param name="ignorecase" value="true"/>
-      <param name="optype" value="mean"/>
-      <param name="opcol" value="2"/>
-      <param name="opround" value="no"/>
-      <output name="out_file1" file="groupby_out2.dat"/>
-    </test>
-     -->
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-This tool allows you to group the input dataset by a particular column and perform aggregate functions: Mean, Median, Mode, Sum, Max, Min, Count, Concatenate, and Randomly pick on any column(s).
-
-The Concatenate function will take, for each group, each item in the specified column and build a comma delimited list. Concatenate Unique will do the same but will build a list of unique items with no repetition.
-
-Count and Count Unique are equivalent to Concatenate and Concatenate Unique, but will only count the number of items and will return an integer.
-
-- If multiple modes are present, all are reported.
-
------
-
-**Example**
-
-- For the following input::
-
-   chr22  1000  1003  TTT
-   chr22  2000  2003  aaa
-   chr10  2200  2203  TTT
-   chr10  1200  1203  ttt
-   chr22  1600  1603  AAA
-
-- **Grouping on column 4** while ignoring case, and performing operation **Count on column 1** will return::
-
-   AAA    2
-   TTT    3
-
-- **Grouping on column 4** while not ignoring case, and performing operation **Count on column 1** will return::
-
-   aaa    1
-   AAA    1
-   ttt    1
-   TTT    2
-  </help>
-</tool>
--- a/tools/stats/gsummary.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,114 +0,0 @@
-#!/usr/bin/env python
-
-import sys, re, tempfile
-from rpy import *
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def S3_METHODS( all="key" ):
-    Group_Math =  [ "abs", "sign", "sqrt", "floor", "ceiling", "trunc", "round", "signif",
-        "exp", "log", "cos", "sin", "tan", "acos", "asin", "atan", "cosh", "sinh", "tanh",
-        "acosh", "asinh", "atanh", "lgamma", "gamma", "gammaCody", "digamma", "trigamma",
-        "cumsum", "cumprod", "cummax", "cummin", "c" ]
-    Group_Ops = [ "+", "-", "*", "/", "^", "%%", "%/%", "&", "|", "!", "==", "!=", "<", "<=", ">=", ">", "(", ")", "~", "," ]
-    if all is "key":
-        return { 'Math' : Group_Math, 'Ops' : Group_Ops }
-
-def main():
-    try:
-        datafile = sys.argv[1]
-        outfile_name = sys.argv[2]
-        expression = sys.argv[3]
-    except:
-        stop_err( 'Usage: python gsummary.py input_file ouput_file expression' )
-
-    math_allowed = S3_METHODS()[ 'Math' ]
-    ops_allowed = S3_METHODS()[ 'Ops' ]
-
-    # Check for invalid expressions
-    for word in re.compile( '[a-zA-Z]+' ).findall( expression ):
-        if word and not word in math_allowed:
-            stop_err( "Invalid expression '%s': term '%s' is not recognized or allowed" %( expression, word ) )
-    symbols = set()
-    for symbol in re.compile( '[^a-z0-9\s]+' ).findall( expression ):
-        if symbol and not symbol in ops_allowed:
-            stop_err( "Invalid expression '%s': operator '%s' is not recognized or allowed" % ( expression, symbol ) )
-        else:
-            symbols.add( symbol )
-    if len( symbols ) == 1 and ',' in symbols:
-        # User may have entered a comma-separated list r_data_frame columns
-        stop_err( "Invalid columns '%s': this tool requires a single column or expression" % expression )
-
-    # Find all column references in the expression
-    cols = []
-    for col in re.compile( 'c[0-9]+' ).findall( expression ):
-        try:
-            cols.append( int( col[1:] ) - 1 )
-        except:
-            pass
-
-    tmp_file = tempfile.NamedTemporaryFile( 'w+b' )
-    # Write the R header row to the temporary file
-    hdr_str = "\t".join( "c%s" % str( col+1 ) for col in cols )
-    tmp_file.write( "%s\n" % hdr_str )
-    skipped_lines = 0
-    first_invalid_line = 0
-    i = 0
-    for i, line in enumerate( file( datafile ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            valid = True
-            fields = line.split( '\t' )
-            # Write the R data row to the temporary file
-            for col in cols:
-                try:
-                    float( fields[ col ] )
-                except:
-                    skipped_lines += 1
-                    if not first_invalid_line:
-                        first_invalid_line = i + 1
-                    valid = False
-                    break
-            if valid:
-                data_str = "\t".join( fields[ col ] for col in cols )
-                tmp_file.write( "%s\n" % data_str )
-    tmp_file.flush()
-
-    if skipped_lines == i + 1:
-        stop_err( "Invalid column or column data values invalid for computation.  See tool tips and syntax for data requirements." )
-    else:
-        # summary function and return labels
-        summary_func = r( "function( x ) { c( sum=sum( as.numeric( x ), na.rm=T ), mean=mean( as.numeric( x ), na.rm=T ), stdev=sd( as.numeric( x ), na.rm=T ), quantile( as.numeric( x ), na.rm=TRUE ) ) }" )
-        headings = [ 'sum', 'mean', 'stdev', '0%', '25%', '50%', '75%', '100%' ]
-        headings_str = "\t".join( headings )
-
-        set_default_mode( NO_CONVERSION )
-        r_data_frame = r.read_table( tmp_file.name, header=True, sep="\t" )
-
-        outfile = open( outfile_name, 'w' )
-
-        for col in re.compile( 'c[0-9]+' ).findall( expression ):
-            r.assign( col, r[ "$" ]( r_data_frame, col ) )
-        try:
-            summary = summary_func( r( expression ) )
-        except RException, s:
-            outfile.close()
-            stop_err( "Computation resulted in the following error: %s" % str( s ) )
-        summary = summary.as_py( BASIC_CONVERSION )
-        outfile.write( "#%s\n" % headings_str )
-        outfile.write( "%s\n" % "\t".join( [ "%g" % ( summary[ k ] ) for k in headings ] ) )
-        outfile.close()
-
-        if skipped_lines:
-            print "Skipped %d invalid lines beginning with line #%d.  See tool tips for data requirements." % ( skipped_lines, first_invalid_line )
-
-if __name__ == "__main__": main()
--- a/tools/stats/gsummary.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-<tool id="Summary_Statistics1" name="Summary Statistics" version="1.1.0">
-  <description>for any numerical column</description>
-  <command interpreter="python">gsummary.py $input $out_file1 "$cond"</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Summary statistics on" help="Dataset missing? See TIP below"/>
-    <param name="cond" size="30" type="text" value="c5" label="Column or expression" help="See syntax below">
-      <validator type="empty_field" message="Enter a valid column or expression, see syntax below for examples"/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="python-module">rpy</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="1.bed"/>
-      <output name="out_file1" file="gsummary_out1.tabular"/>
-      <param name="cond" value="c2"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-This tool expects input datasets consisting of tab-delimited columns (blank or comment lines beginning with a # character are automatically skipped).
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert delimiters to TAB*
-
-.. class:: infomark
-
-**TIP:** Computing summary statistics may throw exceptions if the data value in every line of the columns being summarized is not numerical.  If a line is missing a value or contains a non-numerical value in the column being summarized, that line is skipped and the value is not included in the statistical computation.  The number of invalid skipped lines is documented in the resulting history item.
-
-.. class:: infomark
-
-**USING R FUNCTIONS:** Most functions (like *abs*) take only a single expression. *log* can take one or two parameters, like *log(expression,base)*
-
-Currently, these R functions are supported: *abs, sign, sqrt, floor, ceiling, trunc, round, signif, exp, log, cos, sin, tan, acos, asin, atan, cosh, sinh, tanh, acosh, asinh, atanh, lgamma, gamma, gammaCody, digamma, trigamma, cumsum, cumprod, cummax, cummin*
-
------
-
-**Syntax**
-
-This tool computes basic summary statistics on a given column, or on a valid expression containing one or more columns.
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file.
-
-- For example:
-
-  - **log(c5)** calculates the summary statistics for the natural log of column 5
-  - **(c5 + c6 + c7) / 3** calculates the summary statistics on the average of columns 5-7
-  - **log(c5,10)** summary statistics of the base 10 log of column 5
-  - **sqrt(c5+c9)** summary statistics of the square root of column 5 + column 9
-
------
-
-**Examples**
-
-- Input Dataset::
-
-    c1      c2      c3      c4      c5              c6
-    586     chrX    161416  170887  41108_at        16990
-    73      chrX    505078  532318  35073_at        1700
-    595     chrX    1361578 1388460 33665_s_at      1960
-    74      chrX    1420620 1461919 1185_at         8600
-
-- Summary Statistics on column c6 of the above input dataset::
-
-    #sum       mean      stdev     0%        25%       50%       75%        100%
-    29250.000  7312.500  7198.636  1700.000  1895.000  5280.000  10697.500  16990.000
-
-</help>
-</tool>
--- a/tools/stats/gsummary.xml.groups	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-<tool id="Summary Statistics1" name="Summary Statistics">
-  <description>of a column in a tab delimited file according to an expression</description>
-  <command interpreter="python">gsummary.py $input $out_file1 "$cond" "$groups"</command>
-  <inputs>
-    <param name="cond" size="40" type="text" value="c5" label="expression"/>
-    <param name="groups" size="40" type="text" value="none" label="group terms (c1,c4,etc.)"/>
-    <param format="txt" name="input" type="data" label="summary statistics on"/>
-
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <help>
-
-.. class:: warningmark
-
-This tool expects input datasets to consist of tab-delimited columns (blank or comment lines beginning with a # character are automatically skipped).
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
-.. class:: infomark
-
-**TIP:** Computing summary statistics may throw exceptions if the data value in every line of the columns being summarized is not numerical.  If a line is missing a value or contains a non-numerical value in the column being summarized, that line is skipped and the value is not included in the statistical computation.  The number of invalid skipped lines is documented in the resulting history item.
-
-**Syntax**
-
-This tool computes basic summary statistics on a given column, or on an expression containing those columns
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
-- To group the summary by the values in a column or columns, specify in the **group terms** box...
-    + **c1**  *group by the values in column 1*
-    + **c1,c4** *group by the values in column 1, then by the values in column 4*
-
-
------
-
-**Expression examples**
-
-- **log(c5)** calculates the summary statistics for the natural log of column 5
-- **(c5 + c6 + c7) / 3** calculates the summary statistics on the average of columns 5-7
-- **log(c5,10)** summary statistics of the base 10 log of column 5
-- **sqrt(c5+c9)** summary statistics of the square root of column 5 + column 9
-
-**Group examples**
-
-- **c1**  group by the values in column 1
-- **c1,c4** group by the values in column 1, then by the values in column 4
-
------
-
-.. class:: infomark
-
-**TIP:** Most functions (like *abs*) take only a single expression. *log* can take one or two parameters, like *log(expression,base)*
-
-Currently, these R functions are supported: *abs, sign, sqrt, floor, ceiling, trunc, round, signif, exp, log, cos, sin, tan, acos, asin, atan, cosh, sinh, tanh, acosh, asinh, atanh, lgamma, gamma, gammaCody, digamma, trigamma, cumsum, cumprod, cummax, cummin*
-
-.. |INFO| image:: ./static/images/icon_info_sml.gif
-
-</help>
-</tool>
--- a/tools/stats/lda_analy.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,285 +0,0 @@
-<tool id="lda_analy1" name="Perform LDA" version="1.0.1">
-	<description>Linear Discriminant Analysis</description>
-	<command interpreter="sh">r_wrapper.sh $script_file</command>
-	<inputs>
-		<param format="tabular" name="input" type="data" label="Source file"/>
-		<param name="cond" size="30" type="integer" value="3" label="Number of principal components" help="See TIP below">
-			<validator type="empty_field" message="Enter a valid number of principal components, see syntax below for examples"/>
-		</param>
-
-	</inputs>
-	<outputs>
-		<data format="txt" name="output" />
-	</outputs>
-
-	<tests>
-		<test>
-			<param name="input" value="matrix_generator_for_pc_and_lda_output.tabular"/>
-			<output name="output" file="lda_analy_output.txt"/>
-			<param name="cond" value="2"/>
-
-		</test>
-	</tests>
-
-	<configfiles>
-        	<configfile name="script_file">
-
-        rm(list = objects() )
-
-        ############# FORMAT X DATA #########################
-        format&lt;-function(data) {
-            ind=NULL
-            for(i in 1 : ncol(data)){
-                if (is.na(data[nrow(data),i])) {
-                    ind&lt;-c(ind,i)
-                }
-            }
-            #print(is.null(ind))
-            if (!is.null(ind)) {
-                data&lt;-data[,-c(ind)]
-            }
-
-            data
-        }
-
-        ########GET RESPONSES ###############################
-        get_resp&lt;- function(data) {
-            resp1&lt;-as.vector(data[,ncol(data)])
-                resp=numeric(length(resp1))
-            for (i in 1:length(resp1)) {
-                if (resp1[i]=="Y ") {
-                    resp[i] = 0
-                }
-                if (resp1[i]=="X ") {
-                    resp[i] = 1
-                }
-            }
-                return(resp)
-        }
-
-        ######## CHARS TO NUMBERS ###########################
-        f_to_numbers&lt;- function(F) {
-            ind&lt;-NULL
-            G&lt;-matrix(0,nrow(F), ncol(F))
-            for (i in 1:nrow(F)) {
-                for (j in 1:ncol(F)) {
-                    G[i,j]&lt;-as.integer(F[i,j])
-                }
-            }
-            return(G)
-        }
-
-        ###################NORMALIZING#########################
-        norm &lt;- function(M, a=NULL, b=NULL) {
-            C&lt;-NULL
-            ind&lt;-NULL
-
-            for (i in 1: ncol(M)) {
-                if (sd(M[,i])!=0) {
-                    M[,i]&lt;-(M[,i]-mean(M[,i]))/sd(M[,i])
-                }
-                #   else {print(mean(M[,i]))}
-            }
-            return(M)
-        }
-
-        ##### LDA DIRECTIONS #################################
-        lda_dec &lt;- function(data, k){
-            priors=numeric(k)
-            grandmean&lt;-numeric(ncol(data)-1)
-            means=matrix(0,k,ncol(data)-1)
-            B = matrix(0, ncol(data)-1, ncol(data)-1)
-            N=nrow(data)
-            for (i in 1:k){
-                priors[i]=sum(data[,1]==i)/N
-                grp=subset(data,data\$group==i)
-                means[i,]=mean(grp[,2:ncol(data)])
-                #print(means[i,])
-                #print(priors[i])
-                #print(priors[i]*means[i,])
-                grandmean = priors[i]*means[i,] + grandmean
-            }
-
-            for (i in 1:k) {
-                B= B + priors[i]*((means[i,]-grandmean)%*%t(means[i,]-grandmean))
-            }
-
-            W = var(data[,2:ncol(data)])
-            svdW = svd(W)
-            inv_sqrtW =solve(svdW\$v %*% diag(sqrt(svdW\$d)) %*% t(svdW\$v))
-            B_star= t(inv_sqrtW)%*%B%*%inv_sqrtW
-            B_star_decomp = svd(B_star)
-            directions  = inv_sqrtW%*%B_star_decomp\$v
-            return( list(directions, B_star_decomp\$d) )
-        }
-
-        ################ NAIVE BAYES FOR 1D SIR OR LDA ##############
-        naive_bayes_classifier &lt;- function(resp, tr_data, test_data, k=2, tau) {
-            tr_data=data.frame(resp=resp, dir=tr_data)
-            means=numeric(k)
-            #print(k)
-            cl=numeric(k)
-            predclass=numeric(length(test_data))
-            for (i in 1:k) {
-                grp = subset(tr_data, resp==i)
-                means[i] = mean(grp\$dir)
-            #print(i, means[i])
-            }
-            cutoff = tau*means[1]+(1-tau)*means[2]
-            #print(tau)
-            #print(means)
-            #print(cutoff)
-            if (cutoff&gt;means[1]) {
-               cl[1]=1
-               cl[2]=2
-            }
-            else {
-               cl[1]=2
-               cl[2]=1
-            }
-
-            for (i in 1:length(test_data)) {
-
-                if (test_data[i] &lt;= cutoff) {
-                    predclass[i] = cl[1]
-            }
-                else {
-                    predclass[i] = cl[2]
-            }
-                }
-            #print(means)
-            #print(mean(means))
-            #X11()
-            #plot(test_data,pch=predclass, col=resp)
-            predclass
-        }
-
-        ################# EXTENDED ERROR RATES #################
-        ext_error_rate &lt;- function(predclass, actualclass,msg=c("you forgot the message"), pr=1) {
-                 er=sum(predclass != actualclass)/length(predclass)
-
-                 matr&lt;-data.frame(predclass=predclass,actualclass=actualclass)
-                 escapes = subset(matr, actualclass==1)
-                 subjects = subset(matr, actualclass==2)
-                 er_esc=sum(escapes\$predclass != escapes\$actualclass)/length(escapes\$predclass)
-                 er_subj=sum(subjects\$predclass != subjects\$actualclass)/length(subjects\$predclass)
-
-                 if (pr==1) {
-        #             print(paste(c(msg, 'overall : ', (1-er)*100, "%."),collapse=" "))
-        #             print(paste(c(msg, 'within escapes : ', (1-er_esc)*100, "%."),collapse=" "))
-        #             print(paste(c(msg, 'within subjects: ', (1-er_subj)*100, "%."),collapse=" "))
-            }
-            return(c((1-er)*100, (1-er_esc)*100, (1-er_subj)*100))
-        }
-
-        ## Main Function ##
-
-	files&lt;-matrix("${input}", 1,1, byrow=T)
-
-	d&lt;-"${cond}"   # Number of PC
-
-	tau&lt;-seq(0,1, by=0.005)
-	#tau&lt;-seq(0,1, by=0.1)
-	for_curve=matrix(-10, 3,length(tau))
-
-	##############################################################
-
-	test_data_whole_X &lt;-read.delim(files[1,1], row.names=1)
-
-	#### FORMAT TRAINING DATA ####################################
-	# get only necessary columns
-
-	test_data_whole_X&lt;-format(test_data_whole_X)
-	oligo_labels&lt;-test_data_whole_X[1:(nrow(test_data_whole_X)-1),ncol(test_data_whole_X)]
-	test_data_whole_X&lt;-test_data_whole_X[,1:(ncol(test_data_whole_X)-1)]
-
-	X_names&lt;-colnames(test_data_whole_X)[1:ncol(test_data_whole_X)]
-	test_data_whole_X&lt;-t(test_data_whole_X)
-	resp&lt;-get_resp(test_data_whole_X)
-	ldaqda_resp = resp + 1
-	a&lt;-sum(resp)		# Number of Subject
-	b&lt;-length(resp) - a	# Number of Escape
-	## FREQUENCIES #################################################
-	F&lt;-test_data_whole_X[,1:(ncol(test_data_whole_X)-1)]
-	F&lt;-f_to_numbers(F)
-	FN&lt;-norm(F, a, b)
-	ss&lt;-svd(FN)
-	eigvar&lt;-NULL
-	eig&lt;-ss\$d^2
-
-	for ( i in 1:length(ss\$d)) {
-		eigvar[i]&lt;-sum(eig[1:i])/sum(eig)
-	}
-
-	#print(paste(c("Variance explained : ", eigvar[d]*100, "%"), collapse=""))
-
-	Z&lt;-F%*%ss\$v
-
-	ldaqda_data &lt;- data.frame(group=ldaqda_resp,Z[,1:d])
-	lda_dir&lt;-lda_dec(ldaqda_data,2)
-	train_lda_pred &lt;-Z[,1:d]%*%lda_dir[[1]]
-
-	############# NAIVE BAYES CROSS-VALIDATION #############
-	### LDA #####
-
-	y&lt;-ldaqda_resp
-	X&lt;-F
-	cv&lt;-matrix(c(rep('NA',nrow(test_data_whole_X))), nrow(test_data_whole_X), length(tau))
-	for (i in 1:nrow(test_data_whole_X)) {
-	#	print(i)
-		resp&lt;-y[-i]
-		p&lt;-matrix(X[-i,], dim(X)[1]-1, dim(X)[2])
-		testdata&lt;-matrix(X[i,],1,dim(X)[2])
-		p1&lt;-norm(p)
-		sss&lt;-svd(p1)
-		pred&lt;-(p%*%sss\$v)[,1:d]
-		test&lt;- (testdata%*%sss\$v)[,1:d]
-		lda  &lt;- lda_dec(data.frame(group=resp,pred),2)
-		pred &lt;- pred[,1:d]%*%lda[[1]][,1]
-		test &lt;- test%*%lda[[1]][,1]
-		test&lt;-matrix(test, 1, length(test))
-		for (t in 1:length(tau)) {
-			cv[i, t] &lt;- naive_bayes_classifier (resp, pred, test,k=2, tau[t])
-		}
- 	}
-
-	for (t in 1:length(tau)) {
-		tr_err&lt;-ext_error_rate(cv[,t], ldaqda_resp , c("CV"), 1)
-		for_curve[1:3,t]&lt;-tr_err
-	}
-
-	dput(for_curve, file="${output}")
-
-
-		</configfile>
-	</configfiles>
-
-	<help>
-
-.. class:: infomark
-
-**TIP:** If you want to perform Principal Component Analysis (PCA) on the give numeric input data (which corresponds to the "Source file First in "Generate A Matrix" tool), please use *Multivariate Analysis/Principal Component Analysis*
-
------
-
-.. class:: infomark
-
-**What it does**
-
-This tool consists of the module to perform the Linear Discriminant Analysis as described in Carrel et al., 2006 (PMID: 17009873)
-
-*Carrel L, Park C, Tyekucheva S, Dunn J, Chiaromonte F, et al. (2006) Genomic Environment Predicts Expression Patterns on the Human 	Inactive X Chromosome. PLoS Genet 2(9): e151. doi:10.1371/journal.pgen.0020151*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- Output from "Generate A Matrix" tool is used as input file for this tool
-- Output of this tool contains LDA classification success rates for different values of the turning parameter tau (from 0 to 1 with 0.005 interval). This output file will be used to establish the ROC plot, and you can obtain more detail information from this plot.
-
-
-</help>
-
-</tool>
--- a/tools/stats/plot_from_lda.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,258 +0,0 @@
-<tool id="plot_for_lda_output1" name="Draw ROC plot" version="1.0.1">
-	<description>on "Perform LDA" output</description>
-	<command interpreter="sh">r_wrapper.sh $script_file</command>
-
-	<inputs>
-		<param format="txt" name="input" type="data" label="Source file"> </param>
-		<param name="my_title" size="30" type="text" value="My Figure" label="Title of your plot" help="See syntax below"> </param>
-		<param name="X_axis" size="30" type="text" value="Text for X axis" label="Legend of X axis in your plot" help="See syntax below"> </param>
-		<param name="Y_axis" size="30" type="text" value="Text for Y axis" label="Legend of Y axis in your plot" help="See syntax below"> </param>
-	</inputs>
-	<outputs>
-		<data format="pdf" name="pdf_output" />
-	</outputs>
-
-	<tests>
-		<test>
-			<param name="input" value="lda_analy_output.txt"/>
-			<param name="my_title" value="Test Plot1"/>
-			<param name="X_axis" value="Test Plot2"/>
-			<param name="Y_axis" value="Test Plot3"/>
-			<output name="pdf_output" file="plot_for_lda_output.pdf"/>
-		</test>
-	</tests>
-
-    <configfiles>
-            <configfile name="script_file">
-
-        rm(list = objects() )
-
-        ############# FORMAT X DATA #########################
-        format&lt;-function(data) {
-            ind=NULL
-            for(i in 1 : ncol(data)){
-                if (is.na(data[nrow(data),i])) {
-                    ind&lt;-c(ind,i)
-                }
-            }
-            #print(is.null(ind))
-            if (!is.null(ind)) {
-                data&lt;-data[,-c(ind)]
-            }
-
-            data
-        }
-
-        ########GET RESPONSES ###############################
-        get_resp&lt;- function(data) {
-            resp1&lt;-as.vector(data[,ncol(data)])
-                resp=numeric(length(resp1))
-            for (i in 1:length(resp1)) {
-                if (resp1[i]=="Control ") {
-                    resp[i] = 0
-                }
-                if (resp1[i]=="XLMR ") {
-                    resp[i] = 1
-                }
-            }
-                return(resp)
-        }
-
-        ######## CHARS TO NUMBERS ###########################
-        f_to_numbers&lt;- function(F) {
-            ind&lt;-NULL
-            G&lt;-matrix(0,nrow(F), ncol(F))
-            for (i in 1:nrow(F)) {
-                for (j in 1:ncol(F)) {
-                    G[i,j]&lt;-as.integer(F[i,j])
-                }
-            }
-            return(G)
-        }
-
-        ###################NORMALIZING#########################
-        norm &lt;- function(M, a=NULL, b=NULL) {
-            C&lt;-NULL
-            ind&lt;-NULL
-
-            for (i in 1: ncol(M)) {
-                if (sd(M[,i])!=0) {
-                    M[,i]&lt;-(M[,i]-mean(M[,i]))/sd(M[,i])
-                }
-                #   else {print(mean(M[,i]))}
-            }
-            return(M)
-        }
-
-        ##### LDA DIRECTIONS #################################
-        lda_dec &lt;- function(data, k){
-            priors=numeric(k)
-            grandmean&lt;-numeric(ncol(data)-1)
-            means=matrix(0,k,ncol(data)-1)
-            B = matrix(0, ncol(data)-1, ncol(data)-1)
-            N=nrow(data)
-            for (i in 1:k){
-                priors[i]=sum(data[,1]==i)/N
-                grp=subset(data,data\$group==i)
-                means[i,]=mean(grp[,2:ncol(data)])
-                #print(means[i,])
-                #print(priors[i])
-                #print(priors[i]*means[i,])
-                grandmean = priors[i]*means[i,] + grandmean
-            }
-
-            for (i in 1:k) {
-                B= B + priors[i]*((means[i,]-grandmean)%*%t(means[i,]-grandmean))
-            }
-
-            W = var(data[,2:ncol(data)])
-            svdW = svd(W)
-            inv_sqrtW =solve(svdW\$v %*% diag(sqrt(svdW\$d)) %*% t(svdW\$v))
-            B_star= t(inv_sqrtW)%*%B%*%inv_sqrtW
-            B_star_decomp = svd(B_star)
-            directions  = inv_sqrtW%*%B_star_decomp\$v
-            return( list(directions, B_star_decomp\$d) )
-        }
-
-        ################ NAIVE BAYES FOR 1D SIR OR LDA ##############
-        naive_bayes_classifier &lt;- function(resp, tr_data, test_data, k=2, tau) {
-            tr_data=data.frame(resp=resp, dir=tr_data)
-            means=numeric(k)
-            #print(k)
-            cl=numeric(k)
-            predclass=numeric(length(test_data))
-            for (i in 1:k) {
-                grp = subset(tr_data, resp==i)
-                means[i] = mean(grp\$dir)
-            #print(i, means[i])
-            }
-            cutoff = tau*means[1]+(1-tau)*means[2]
-            #print(tau)
-            #print(means)
-            #print(cutoff)
-            if (cutoff&gt;means[1]) {
-               cl[1]=1
-               cl[2]=2
-            }
-            else {
-               cl[1]=2
-               cl[2]=1
-            }
-
-            for (i in 1:length(test_data)) {
-
-                if (test_data[i] &lt;= cutoff) {
-                    predclass[i] = cl[1]
-            }
-                else {
-                    predclass[i] = cl[2]
-            }
-                }
-            #print(means)
-            #print(mean(means))
-            #X11()
-            #plot(test_data,pch=predclass, col=resp)
-            predclass
-        }
-
-        ################# EXTENDED ERROR RATES #################
-        ext_error_rate &lt;- function(predclass, actualclass,msg=c("you forgot the message"), pr=1) {
-                 er=sum(predclass != actualclass)/length(predclass)
-
-                 matr&lt;-data.frame(predclass=predclass,actualclass=actualclass)
-                 escapes = subset(matr, actualclass==1)
-                 subjects = subset(matr, actualclass==2)
-                 er_esc=sum(escapes\$predclass != escapes\$actualclass)/length(escapes\$predclass)
-                 er_subj=sum(subjects\$predclass != subjects\$actualclass)/length(subjects\$predclass)
-
-                 if (pr==1) {
-        #             print(paste(c(msg, 'overall : ', (1-er)*100, "%."),collapse=" "))
-        #             print(paste(c(msg, 'within escapes : ', (1-er_esc)*100, "%."),collapse=" "))
-        #             print(paste(c(msg, 'within subjects: ', (1-er_subj)*100, "%."),collapse=" "))
-            }
-            return(c((1-er)*100, (1-er_esc)*100, (1-er_subj)*100))
-        }
-
-        ## Main Function ##
-
-	files_alias&lt;-c("${my_title}")
-	tau=seq(0,1,by=0.005)
-	nfiles=1
-	f = c("${input}")
-
-	rez_ext&lt;-list()
-	for (i in 1:nfiles) {
-		rez_ext[[i]]&lt;-dget(paste(f[i], sep="",collapse=""))
-	}
-
-	tau&lt;-tau[1:(length(tau)-1)]
-	for (i in 1:nfiles) {
-		rez_ext[[i]]&lt;-rez_ext[[i]][,1:(length(tau)-1)]
-	}
-
-	######## OPTIMAIL TAU ###########################
-
-	#rez_ext
-
-	rate&lt;-c("Optimal tau","Tr total", "Tr Y", "Tr X")
-
-	m_tr&lt;-numeric(nfiles)
-	m_xp22&lt;-numeric(nfiles)
-	m_x&lt;-numeric(nfiles)
-
-	for (i in 1:nfiles) {
-		r&lt;-rez_ext[[i]]
-		#tr
-	#	rate&lt;-rbind(rate, c(files_alias[i]," "," "," ") )
-		mm&lt;-which((r[3,])==max(r[3,]))
-
-		m_tr[i]&lt;-mm[1]
-		rate&lt;-rbind(rate,c(tau[m_tr[i]],r[,m_tr[i]]))
-	}
-	print(rate)
-
-	pdf(file= paste("${pdf_output}"))
-
-	plot(rez_ext[[i]][2,]~rez_ext[[i]][3,], xlim=c(0,100), ylim=c(0,100), xlab="${X_axis}   [1-FP(False Positive)]", ylab="${Y_axis}   [1-FP(False Positive)]", type="l", lty=1, col="blue", xaxt='n', yaxt='n')
-	for (i in 1:nfiles) {
-		lines(rez_ext[[i]][2,]~rez_ext[[i]][3,], xlab="${X_axis}   [1-FP(False Positive)]", ylab="${Y_axis}   [1-FP(False Positive)]", type="l", lty=1, col=i)
-		# pt=c(r,)
-		points(x=rez_ext[[i]][3,m_tr[i]],y=rez_ext[[i]][2,m_tr[i]], pch=16, col=i)
-	}
-
-
-	title(main="${my_title}", adj=0, cex.main=1.1)
-	axis(2, at=c(0,20,40,60,80,100), labels=c('0','20','40','60','80','100%'))
-	axis(1, at=c(0,20,40,60,80,100), labels=c('0','20','40','60','80','100%'))
-
-	#leg=c("10 kb","50 kb","100 kb")
-	#legend("bottomleft",legend=leg , col=c(1,2,3), lty=c(1,1,1))
-
-	#dev.off()
-
-		</configfile>
-	</configfiles>
-
-
-	<help>
-.. class:: infomark
-
-**What it does**
-
-This tool generates a Receiver Operating Characteristic (ROC) plot that shows LDA classification success rates for different values of the tuning parameter tau as Figure 3 in Carrel et al., 2006 (PMID: 17009873).
-
-*Carrel L, Park C, Tyekucheva S, Dunn J, Chiaromonte F, et al. (2006) Genomic Environment Predicts Expression Patterns on the Human Inactive X Chromosome. PLoS Genet 2(9): e151. doi:10.1371/journal.pgen.0020151*
-
------
-
-.. class:: warningmark
-
-**Note**
-
-- Output from "Perform LDA" tool is used as input file for this tool.
-
-</help>
-
-
-
-</tool>
--- a/tools/stats/r_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-#!/bin/sh
-
-### Run R providing the R script in $1 as standard input and passing
-### the remaining arguments on the command line
-
-# Function that writes a message to stderr and exits
-fail()
-{
-    echo "$@" >&2
-    exit 1
-}
-
-# Ensure R executable is found
-which R > /dev/null || fail "'R' is required by this tool but was not found on path"
-
-# Extract first argument
-infile=$1; shift
-
-# Ensure the file exists
-test -f $infile || fail "R input file '$infile' does not exist"
-
-# Invoke R passing file named by first argument to stdin
-R --vanilla --slave $* < $infile
--- a/tools/stats/wiggle_to_simple.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Read a wiggle track and print out a series of lines containing
-"chrom position score". Ignores track lines, handles bed, variableStep
-and fixedStep wiggle lines.
-"""
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.wiggle
-from galaxy.tools.exception_handling import *
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    if len( sys.argv ) > 1:
-        in_file = open( sys.argv[1] )
-    else:
-        in_file = open( sys.stdin )
-
-    if len( sys.argv ) > 2:
-        out_file = open( sys.argv[2], "w" )
-    else:
-        out_file = sys.stdout
-
-    try:
-        for fields in bx.wiggle.IntervalReader( UCSCOutWrapper( in_file ) ):
-            out_file.write( "%s\n" % "\t".join( map( str, fields ) ) )
-    except UCSCLimitException:
-        # Wiggle data was truncated, at the very least need to warn the user.
-        print 'Encountered message from UCSC: "Reached output limit of 100000 data values", so be aware your data was truncated.'
-    except ValueError, e:
-        in_file.close()
-        out_file.close()
-        stop_err( str( e ) )
-
-    in_file.close()
-    out_file.close()
-
-if __name__ == "__main__": main()
--- a/tools/stats/wiggle_to_simple.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-<tool id="wiggle2simple1" name="Wiggle-to-Interval">
-  <description>converter</description>
-  <command interpreter="python">wiggle_to_simple.py $input $out_file1 </command>
-  <inputs>
-    <param format="wig" name="input" type="data" label="Convert"/>
-  </inputs>
-  <outputs>
-    <data format="interval" name="out_file1" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="2.wig" />
-      <output name="out_file1" file="2.interval"/>
-    </test>
-  </tests>
-  <help>
-**Syntax**
-
-This tool converts wiggle data into interval type.
-
-- **Wiggle format**: The .wig format is line-oriented. Wiggle data is preceded by a UCSC track definition line.  Following the track definition line is the track data, which can be entered in three different formats described below.
-
-  - **BED format** with no declaration line and four columns of data::
-
-      chromA  chromStartA  chromEndA  dataValueA
-      chromB  chromStartB  chromEndB  dataValueB
-
-  - **variableStep** two column data; started by a declaration line and followed with chromosome positions and data values::
-
-      variableStep  chrom=chrN  [span=windowSize]
-      chromStartA  dataValueA
-      chromStartB  dataValueB
-
-  - **fixedStep** single column data; started by a declaration line and followed with data values::
-
-      fixedStep  chrom=chrN  start=position  step=stepInterval  [span=windowSize]
-      dataValue1
-      dataValue2
-
------
-
-**Example**
-
-- input wiggle format file::
-
-    #track type=wiggle_0 name="Bed Format" description="BED format"
-    chr19 59302000 59302300 -1.0
-    chr19 59302300 59302600 -0.75
-    chr19 59302600 59302900 -0.50
-    chr19 59302900 59303200 -0.25
-    chr19 59303200 59303500 0.0
-    #track type=wiggle_0 name="variableStep" description="variableStep format"
-    variableStep chrom=chr19 span=150
-    59304701 10.0
-    59304901 12.5
-    59305401 15.0
-    59305601 17.5
-    #track type=wiggle_0 name="fixedStep" description="fixed step" visibility=full
-    fixedStep chrom=chr19 start=59307401 step=300 span=200
-    1000
-    900
-    800
-    700
-    600
-
-- convert the above file to interval file::
-
-    chr19	59302000	59302300	+	-1.0
-    chr19	59302300	59302600	+	-0.75
-    chr19	59302600	59302900	+	-0.5
-    chr19	59302900	59303200	+	-0.25
-    chr19	59303200	59303500	+	0.0
-    chr19	59304701	59304851	+	10.0
-    chr19	59304901	59305051	+	12.5
-    chr19	59305401	59305551	+	15.0
-    chr19	59305601	59305751	+	17.5
-    chr19	59307701	59307901	+	1000.0
-    chr19	59308001	59308201	+	900.0
-    chr19	59308301	59308501	+	800.0
-    chr19	59308601	59308801	+	700.0
-    chr19	59308901	59309101	+	600.0
-
-</help>
-</tool>
--- a/tools/taxonomy/find_diag_hits.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,170 +0,0 @@
-#!/usr/bin/env python
-
-"""
-tax_read_grouping.py <file in taxonomy format> <id column> <taxonomic ranks> <output format> <output file>
-    finds reads that only hit one taxonomic group. For example, consider the folliowing example:
-
-    read1   mammalia
-    read1   insecta
-    read2   insecta
-
-    in this case only read2 will be selected becuase it stays within insecta
-
-    This program takes the following options:
-
-    file in taxonomy format - dataset that complies with Galaxy's taxonomy format
-    id column               - integer specifying the number of column containing seq id (starting with 1)
-    taxonomic ranks         - a comma separated list of ranks from this list:
-
-         superkingdom
-         kingdom
-         subkingdom
-         superphylum
-         phylum
-         subphylum
-         superclass
-         class
-         subclass
-         superorder
-         order
-         suborder
-         superfamily
-         family
-         subfamily
-         tribe
-         subtribe
-         genus
-         subgenus
-         species
-         subspecies
-
-    output format           - reads or counts
-
-"""
-
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( 'pysqlite' )
-from pysqlite2 import dbapi2 as sqlite
-import string, sys, tempfile
-
-# This dictionary maps taxonomic ranks to fields of Taxonomy file
-taxRank = {
-        'root'        :2,
-        'superkingdom':3,
-        'kingdom'     :4,
-        'subkingdom'  :5,
-        'superphylum' :6,
-        'phylum'      :7,
-        'subphylum'   :8,
-        'superclass'  :9,
-        'class'       :10,
-        'subclass'    :11,
-        'superorder'  :12,
-        'ord'         :13,
-        'suborder'    :14,
-        'superfamily' :15,
-        'family'      :16,
-        'subfamily'   :17,
-        'tribe'       :18,
-        'subtribe'    :19,
-        'genus'       :20,
-        'subgenus'    :21,
-        'species'     :22,
-        'subspecies'  :23,
-        'order'       :13
-    }
-
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-
-db = tempfile.NamedTemporaryFile('w')
-
-try:
-    con = sqlite.connect(db.name)
-    cur = con.cursor()
-except:
-    stop_err('Cannot connect to %s\n') % db.name
-
-try:
-    tax_file   = open(sys.argv[1], 'r')
-    id_col     = int(sys.argv[2]) - 1
-    taxa       = string.split(sys.argv[3].rstrip(),',')
-
-    if sys.argv[4] == 'reads':
-        out_format = True
-    elif sys.argv[4] == 'counts':
-        out_format = False
-    else:
-        stop_err('Please specify "reads" or "counts" for output format\n')
-    out_file = open(sys.argv[5], 'w')
-
-except:
-    stop_err('Check arguments\n')
-
-if taxa[0] == 'None': stop_err('Please, use checkboxes to specify taxonomic ranks.\n')
-
-sql = ""
-for i in range(len(taxa)):
-        if taxa[i] == 'order': taxa[i] = 'ord' # SQL does not like fields to be named 'order'
-        sql += '%s text, ' % taxa[i]
-
-sql = sql.strip(', ')
-sql = 'create table tax (name varchar(50) not null, ' + sql + ')'
-
-
-cur.execute(sql)
-
-invalid_line_number = 0
-
-try:
-    for line in tax_file:
-        fields = string.split(line.rstrip(), '\t')
-        if len(fields) < 24:
-            invalid_line_number += 1
-            continue # Skipping malformed taxonomy lines
-
-        val_string = '"' + fields[id_col] + '", '
-
-        for rank in taxa:
-            taxon = fields[taxRank[rank]]
-            val_string += '"%s", ' % taxon
-
-        val_string = val_string.strip(', ')
-        val_string = "insert into tax values(" + val_string + ")"
-        cur.execute(val_string)
-except Exception, e:
-    stop_err('%s\n' % e)
-
-tax_file.close()
-
-try:
-    for rank in taxa:
-        cur.execute('create temporary table %s (name varchar(50), id text, rank text)' % rank  )
-        cur.execute('insert into %s select name, name || %s as id, %s from tax group by id' % ( rank, rank, rank ) )
-        cur.execute('create temporary table %s_count(name varchar(50), id text, rank text, N int)' % rank)
-        cur.execute('insert into %s_count select name, id, rank, count(*) from %s group by name' % ( rank, rank) )
-
-        if rank == 'ord':
-            rankName = 'order'
-        else:
-            rankName = rank
-
-        if out_format:
-            cur.execute('select name,rank from %s_count where N = 1 and length(rank)>1' % rank)
-            for item in cur.fetchall():
-                out_string = '%s\t%s\t' % ( item[0], item[1] )
-                out_string += rankName
-                print >>out_file, out_string
-        else:
-            cur.execute('select rank, count(*) from %s_count where N = 1 and length(rank)>1 group by rank' % rank)
-            for item in cur.fetchall():
-                out_string = '%s\t%s\t' % ( item[0], item[1] )
-                out_string += rankName
-                print >>out_file, out_string
-except Exception, e:
-    stop_err("%s\n" % e)
-
--- a/tools/taxonomy/find_diag_hits.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-<tool id="find_diag_hits" name="Find diagnostic hits" version="1.0.0">
-    <description></description>
-    <requirements>
-        <requirement type="package">taxonomy</requirement>
-    </requirements>
-    <command interpreter="python">find_diag_hits.py $input1 $id_col $rank_list $out_format $out_file1</command>
-    <inputs>
-        <param format="taxonomy" name="input1" type="data" label="Find diagnostic hits in"/>
-        <param name="id_col" type="data_column" data_ref="input1" numerical="False" label="Select column with sequence id" />
-        <param name="rank_list" type="select" display="checkboxes" multiple="true" label="select taxonomic ranks">
-            <option value="superkingdom">Superkingdom</option>
-            <option value="kingdom">Kingdom</option>
-            <option value="subkingdom">Subkingdom</option>
-            <option value="superphylum">Superphylum</option>
-            <option value="phylum">Phylum</option>
-            <option value="subphylum">Subphylum</option>
-            <option value="superclass">Superclass</option>
-            <option value="class">Class</option>
-            <option value="subclass">Subclass</option>
-            <option value="superorder">Superorder</option>
-            <option value="order">Order</option>
-            <option value="suborder">Suborder</option>
-            <option value="superfamily">Superfamily</option>
-            <option value="family">Family</option>
-            <option value="subfamily">Subfamily</option>
-            <option value="tribe">Tribe</option>
-            <option value="subtribe">Subtribe</option>
-            <option value="genus">Genus</option>
-            <option value="subgenus">Subgenus</option>
-            <option selected="true" value="species">Species</option>
-            <option value="subspecies">Subspecies</option>
-        </param>
-        <param name="out_format" type="select" label="Select output format">
-            <option value="reads">Diagnostic read list</option>
-            <option value="counts">Number of diagnostic reads per taxonomic rank</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="out_file1" />
-    </outputs>
-      <tests>
-    <test>
-      <param name="input1" value="taxonomyGI.taxonomy" ftype="taxonomy"/>
-      <param name="id_col" value="1" />
-      <param name="rank_list" value="order,genus" />
-      <param name="out_format" value="counts" />
-      <output name="out_file1" file="find_diag_hits.tabular" />
-    </test>
-  </tests>
-
-
-<help>
-
-**What it does**
-
-When performing metagenomic analyses it is often necessary to identify sequence reads corresponding to a particular taxonomic group, or, in other words, diagnostic of a particular taxonomic rank. This utility performs this analysis. It takes data generated by *Taxonomy manipulation->Fetch Taxonomic Ranks* as input and outputs either a list of sequence reads unique to a particular taxonomic rank, or a list of taxonomic ranks and the count of unique reads corresponding to each rank.
-
-------
-
-**Example**
-
-Suppose the *Taxonomy manipulation->Fetch Taxonomic Ranks* generated the following taxonomy representation::
-
-    read1 2      root Eukaryota Metazoa n n Chordata   Craniata Gnathostomata Mammalia n        Laurasiatheria   n           Ruminantia  n             Bovidae     Bovinae      n          n          Bos        n Bos taurus        n
-    read2 12585	 root Eukaryota Metazoa n n Chordata   Craniata Gnathostomata Mammalia n        Euarchontoglires Primates	 Haplorrhini Hominoidea    Hominidae   n            n          n          Homo       n Homo sapiens      n
-    read1 58615  root Eukaryota Metazoa n n Arthropoda n        Hexapoda      Insecta  Neoptera Amphiesmenoptera Lepidoptera Glossata    Papilionoidea Nymphalidae Nymphalinae  Melitaeini Phyciodina Anthanassa n Anthanassa otanes n
-    read3 56785	 root Eukaryota Metazoa n n Chordata   Craniata Gnathostomata Mammalia n        Euarchontoglires Primates	 Haplorrhini Hominoidea    Hominidae   n            n          n          Homo       n Homo sapiens      n
-
-Running this tool with the following parameters:
-
-  * *Select column with sequence id* set to **c1**
-  * *Select taxonomic ranks* with **order**, and **genus** checked
-  * *Output format* set to **Diagnostic read list**
-
-will return::
-
-    read2 Primates order
-    read3 Primates order
-    read2 Homo     genus
-    read3 Homo     genus
-
-Changing *Output format* set to **Number of diagnostic reads per taxonomic rank** will produce::
-
-    Primates 2       order
-    Homo     2       genus
-
-.. class:: infomark
-
-Note that **read1** is omitted because it is non-unique: it hits Mammals and Insects at the same time.
-
---------
-
-.. class:: warningmark
-
-This tool omits "**n**" corresponding to ranks missing from NCBI taxonomy. In the above example *Home sapiens* contains the order name (Primates) while *Bos taurus* does not.
-
-
-</help>
-</tool>
--- a/tools/taxonomy/gi2taxonomy.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,174 +0,0 @@
-import sys
-import string
-import tempfile
-import subprocess
-from os import path
-
-# -----------------------------------------------------------------------------------
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-# -----------------------------------------------------------------------------------
-def gi_name_to_sorted_list(file_name, gi_col, name_col):
-    """ Suppose input file looks like this:
-        a       2
-        b       4
-        c       5
-        d       5
-        where column 1 is gi_col and column 0 is name_col
-        output of this function will look like this:
-        [[2, 'a'], [4, 'b'], [5, 'c'], [5, 'd']]
-    """
-
-    result = []
-    try:
-        F = open( file_name, 'r' )
-        try:
-            for line in F:
-                file_cols = string.split(line.rstrip(), '\t')
-                file_cols[gi_col] = int(  file_cols[gi_col] )
-                result.append( [ file_cols[gi_col], file_cols[name_col] ] )
-        except:
-            print >>sys.stderr, 'Non numeric GI field...skipping'
-
-    except Exception, e:
-        stop_err('%s\n' % e)
-    F.close()
-    result.sort()
-    return result
-
-# -----------------------------------------------------------------------------------
-
-def collapse_repeating_gis( L ):
-    """ Accepts 2-d array of gi-key pairs such as this
-        L = [
-                [gi1, 'key1'],
-                [gi1, 'key2'],
-                [gi2','key3']
-            ]
-
-         Returns this:
-         [      [gi1, 'key1', 'key2'],
-                [gi2, 'key3' ]
-         ]
-
-         The first value in each sublist MUST be int
-    """
-    gi = []
-    i = 0
-    result = []
-
-    try:
-        for item in L:
-            if i == 0:
-                prev = item[0]
-
-            if prev != item[0]:
-                prev_L = []
-                prev_L.append( prev )
-                result.append( prev_L + gi )
-                prev = item[0]
-                gi =[]
-
-            gi.append( item[1] )
-            i += 1
-
-    except Exception, e:
-        stop_err('%s\n' % e)
-
-    prev_L = []
-    prev_L.append( prev )
-    result.append( prev_L + gi )
-    del(L)
-    return result
-
-# -----------------------------------------------------------------------------------
-
-def get_taxId( gi2tax_file, gi_name_list, out_file ):
-    """ Maps GI numbers from gi_name_list to TaxId identifiers from gi2tax_file and
-        prints result to out_file
-
-        gi2tax_file MUST be sorted on GI column
-
-        gi_name_list is a list that look slike this:
-        [[1,'a'], [2,'b','x'], [7,'c'], [10,'d'], [90,'f']]
-        where the first element of each sublist is a GI number
-        this list MUST also be sorted on GI
-
-        This function searches through 117,000,000 rows of gi2taxId file from NCBI
-        in approximately 4 minutes. This time is not dependent on the length of
-        gi_name_list
-    """
-
-    L = gi_name_list.pop(0)
-    my_gi = L[0]
-    F = open( out_file, 'w' )
-    gi = 0
-    for line in file( gi2tax_file ):
-        line = line.rstrip()
-        gi, taxId = string.split( line, '\t' )
-        gi = int( gi )
-
-        if gi > my_gi:
-            try:
-                while ( my_gi < gi ):
-                    L = gi_name_list.pop(0)
-                    my_gi = L[0]
-            except:
-                break
-
-        if  gi == my_gi:
-            for i in range( 1,len( L ) ):
-                print >>F, '%s\t%s\t%d' % (L[i], taxId, gi)
-            try:
-                L = gi_name_list.pop(0)
-                my_gi = L[0]
-            except:
-                break
-
-# -----------------------------------------------------------------------------------
-
-
-try:
-    in_f          = sys.argv[1]            # input file with GIs
-    gi_col        = int( sys.argv[2] ) - 1 # column in input containing GIs
-    name_col      = int( sys.argv[3] ) - 1 # column containing sequence names
-    out_f         = sys.argv[4]            # output file
-    tool_data     = sys.argv[5]
-except:
-    stop_err('Check arguments\n')
-
-#  GI2TAX point to a file produced by concatenation of:
-#  ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_nucl.zip
-#  and
-#  ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.zip
-#  a sorting using this command:
-#  sort -n -k 1
-
-GI2TAX = path.join( tool_data, 'taxonomy', 'gi_taxid_sorted.txt' )
-
-#  NAME_FILE and NODE_FILE point to names.dmg and nodes.dmg
-#  files contained within:
-#  ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
-
-NAME_FILE = path.join( tool_data, 'taxonomy', 'names.dmp' )
-NODE_FILE = path.join( tool_data, 'taxonomy', 'nodes.dmp' )
-
-g2n =  gi_name_to_sorted_list(in_f, gi_col, name_col)
-
-if len(g2n) == 0:
-    stop_err('No valid GI-containing fields. Please, check your column assignments.\n')
-
-tb_F = tempfile.NamedTemporaryFile('w')
-
-get_taxId( GI2TAX, collapse_repeating_gis( g2n ), tb_F.name )
-
-try:
-    tb_cmd = 'taxBuilder %s %s %s %s' % ( NAME_FILE, NODE_FILE, tb_F.name, out_f )
-    retcode = subprocess.call( tb_cmd, shell=True )
-    if retcode < 0:
-        print >>sys.stderr, "Execution of taxBuilder terminated by signal", -retcode
-except OSError, e:
-    print >>sys.stderr, "Execution of taxBuilder2tree failed:", e
--- a/tools/taxonomy/gi2taxonomy.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-<tool id="Fetch Taxonomic Ranks" name="Fetch taxonomic representation" version="1.1.0">
-  <description></description>
-    <requirements>
-        <requirement type="package">taxonomy</requirement>
-    </requirements>
-  <command interpreter="python">gi2taxonomy.py $input $giField $idField $out_file1 ${GALAXY_DATA_INDEX_DIR}</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Show taxonomic representation for"></param>
-    <param name="giField" label="GIs column" type="data_column" data_ref="input" numerical="True" help="select column containing GI numbers"/>
-    <param name="idField" label="Name column" type="data_column" data_ref="input" help="select column containing identifiers you want to include into output"/>
-  </inputs>
-  <outputs>
-    <data format="taxonomy" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="binary">taxBuilder</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" ftype="tabular" value="taxonomy2gi-input.tabular"/>
-      <param name="giField" value="1"/>
-      <param name="idField" value="2"/>
-      <output name="out_file1" file="taxonomy2gi-output.tabular"/>
-    </test>
-  </tests>
-
-  <help>
-
-.. class:: infomark
-
-Use *Filter and Sort->Filter* to restrict output of this tool to desired taxonomic ranks. You can also use *Text Manipulation->Cut* to remove unwanted columns from the output.
-
-------
-
-**What it does**
-
-Fetches taxonomic information for a list of GI numbers (sequences identifiers used by the National Center for Biotechnology Information http://www.ncbi.nlm.nih.gov).
-
--------
-
-**Example**
-
-Suppose you have BLAST output that looks like this::
-
-   +-----------------------+----------+----------+-----------------+------------+------+--------+
-   | queryId               | targetGI | identity | alignmentLength | mismatches | gaps | score  |
-   +-----------------------+----------+----------+-----------------+------------+------+--------+
-   | 1L_EYKX4VC01BXWX1_265 |  1430919 |    90.09 |             212 |         15 |    6 | 252.00 |
-   +-----------------------+----------+----------+-----------------+------------+------+--------+
-
-and you want to obtain full taxonomic representation for GIs listed in *targetGI* column. If you set parameters as shown here:
-
-.. image:: ./static/images/fetchTax.png
-
-
-the tool will generate the following output (you may need to scroll sideways to see the entire line)::
-
-  1                     2    3    4         5       6 7 8        9        10            11       12 13               14       15         16          17        18  19  20 21  22  23           24 25
-  1L_EYKX4VC01BXWX1_265 9606 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n  Euarchontoglires Primates Haplorrhini Hominoidea Hominidae n   n   n  Homo n  Homo sapiens n  1430919
-
-In other words the tool printed *Name column*, *taxonomy Id*, appended 22 columns containing taxonomic ranks from Superkingdom to Subspecies and added *GI* as the last column. Below is a formal definition of the output columns::
-
-    Column Definition
-   ------- ------------------------------------------
-         1 Name (specified by 'Name column' dropdown)
-         2 GI   (specified by 'GI column' dropdown)
-         3 root
-         4 superkingdom
-         5 kingdom
-         6 subkingdom
-         7 superphylum
-         8 phylum
-         9 subphylum
-        10 superclass
-        11 class
-        12 subclass
-        13 superorder
-        14 order
-        15 suborder
-        16 superfamily
-        17 family
-        18 subfamily
-        19 tribe
-        20 subtribe
-        21 genus
-        22 subgenus
-        23 species
-        24 subspecies
-
-------
-
-.. class:: warningmark
-
-**Why do I have these "n" things?**
-
-Be aware that the NCBI taxonomy (ftp://ftp.ncbi.nih.gov/pub/taxonomy/) this tool relies upon is incomplete.  This means that for many species one or more ranks are absent and represented as "**n**". In the above example *subkingdom*, *superphylum* etc. are missing.
-
-
-</help>
-</tool>
-
-
--- a/tools/taxonomy/lca.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,193 +0,0 @@
-#!/usr/bin/env python
-#Guruprasad Ananda
-"""
-Least Common Ancestor tool.
-"""
-import sys, string, re, commands, tempfile, random
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def main():
-    try:
-        inputfile = sys.argv[1]
-        outfile = sys.argv[2]
-        rank_bound = int( sys.argv[3] )
-        """
-        Mapping of ranks:
-        root        :2,
-        superkingdom:3,
-        kingdom     :4,
-        subkingdom  :5,
-        superphylum :6,
-        phylum      :7,
-        subphylum   :8,
-        superclass  :9,
-        class       :10,
-        subclass    :11,
-        superorder  :12,
-        order       :13,
-        suborder    :14,
-        superfamily :15,
-        family      :16,
-        subfamily   :17,
-        tribe       :18,
-        subtribe    :19,
-        genus       :20,
-        subgenus    :21,
-        species     :22,
-        subspecies  :23,
-        """
-    except:
-        stop_err("Syntax error: Use correct syntax: program infile outfile")
-
-    fin = open(sys.argv[1],'r')
-    for j, line in enumerate( fin ):
-        elems = line.strip().split('\t')
-        if len(elems) < 24:
-            stop_err("The format of the input dataset is incorrect. Taxonomy datatype should contain at least 24 columns.")
-        if j > 30:
-            break
-        cols = range(1,len(elems))
-    fin.close()
-
-    group_col = 0
-    tmpfile = tempfile.NamedTemporaryFile()
-
-    try:
-        """
-        The -k option for the Posix sort command is as follows:
-        -k, --key=POS1[,POS2]
-        start a key at POS1, end it at POS2 (origin 1)
-        In other words, column positions start at 1 rather than 0, so
-        we need to add 1 to group_col.
-        if POS2 is not specified, the newer versions of sort will consider the entire line for sorting. To prevent this, we set POS2=POS1.
-        """
-        command_line = "sort -f -k " + str(group_col+1) +"," + str(group_col+1) + " -o " + tmpfile.name + " " + inputfile
-    except Exception, exc:
-        stop_err( 'Initialization error -> %s' %str(exc) )
-
-    error_code, stdout = commands.getstatusoutput(command_line)
-
-    if error_code != 0:
-        stop_err( "Sorting input dataset resulted in error: %s: %s" %( error_code, stdout ))
-
-    prev_item = ""
-    prev_vals = []
-    remaining_vals = []
-    skipped_lines = 0
-    fout = open(outfile, "w")
-    block_valid = False
-
-
-    for ii, line in enumerate( file( tmpfile.name )):
-        if line and not line.startswith( '#' ) and len(line.split('\t')) >= 24: #Taxonomy datatype should have at least 24 columns
-            line = line.rstrip( '\r\n' )
-            try:
-                fields = line.split("\t")
-                item = fields[group_col]
-                if prev_item != "":
-                    # At this level, we're grouping on values (item and prev_item) in group_col
-                    if item == prev_item:
-                        # Keep iterating and storing values until a new value is encountered.
-                        if block_valid:
-                            for i, col in enumerate(cols):
-                                if col >= 3:
-                                    prev_vals[i].append(fields[col].strip())
-                                    if len(set(prev_vals[i])) > 1:
-                                        block_valid = False
-                                        break
-
-                    else:
-                        """
-                        When a new value is encountered, write the previous value and the
-                        corresponding aggregate values into the output file.  This works
-                        due to the sort on group_col we've applied to the data above.
-                        """
-                        out_list = ['']*24
-                        out_list[0] = str(prev_item)
-                        out_list[1] = str(prev_vals[0][0])
-                        out_list[2] = str(prev_vals[1][0])
-
-                        for k, col in enumerate(cols):
-                            if col >= 3 and col < 24:
-                                if len(set(prev_vals[k])) == 1:
-                                    out_list[col] = prev_vals[k][0]
-                                else:
-                                    break
-                        while k < 23:
-                            out_list[k+1] = 'n'
-                            k += 1
-
-                        j = 0
-                        while True:
-                            try:
-                                out_list.append(str(prev_vals[23+j][0]))
-                                j += 1
-                            except:
-                                break
-
-                        if rank_bound == 0:
-                            print >>fout, '\t'.join(out_list).strip()
-                        else:
-                            if ''.join(out_list[rank_bound:24]) != 'n'*( 24 - rank_bound ):
-                                print >>fout, '\t'.join(out_list).strip()
-
-                        block_valid = True
-                        prev_item = item
-                        prev_vals = []
-                        for col in cols:
-                            val_list = []
-                            val_list.append(fields[col].strip())
-                            prev_vals.append(val_list)
-
-                else:
-                    # This only occurs once, right at the start of the iteration.
-                    block_valid = True
-                    prev_item = item    #groupby item
-                    for col in cols:    #everyting else
-                        val_list = []
-                        val_list.append(fields[col].strip())
-                        prev_vals.append(val_list)
-
-            except:
-                skipped_lines += 1
-        else:
-            skipped_lines += 1
-
-    # Handle the last grouped value
-    out_list = ['']*24
-    out_list[0] = str(prev_item)
-    out_list[1] = str(prev_vals[0][0])
-    out_list[2] = str(prev_vals[1][0])
-
-    for k, col in enumerate(cols):
-        if col >= 3 and col < 24:
-            if len(set(prev_vals[k])) == 1:
-                out_list[col] = prev_vals[k][0]
-            else:
-                break
-    while k < 23:
-        out_list[k+1] = 'n'
-        k += 1
-
-    j = 0
-    while True:
-        try:
-            out_list.append(str(prev_vals[23+j][0]))
-            j += 1
-        except:
-            break
-
-    if rank_bound == 0:
-        print >>fout, '\t'.join(out_list).strip()
-    else:
-        if ''.join(out_list[rank_bound:24]) != 'n'*( 24 - rank_bound ):
-            print >>fout, '\t'.join(out_list).strip()
-
-    if skipped_lines > 0:
-        print "Skipped %d invalid lines." % ( skipped_lines )
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/tools/taxonomy/lca.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-<tool id="lca1" name="Find lowest diagnostic rank" version="1.0.1">
-  <description></description>
-    <requirements>
-        <requirement type="package">taxonomy</requirement>
-    </requirements>
-  <command interpreter="python">
-    lca.py $input1 $out_file1 $rank_bound
-  </command>
-  <inputs>
-    <param format="taxonomy" name="input1" type="data" label="for taxonomy dataset"/>
-    <param name="rank_bound" label="require the lowest rank to be at least" type="select">
-        <option value="0">No restriction</option>
-        <option value="3">Superkingdom</option>
-        <option value="4">Kingdom</option>
-        <option value="5">Subkingdom</option>
-        <option value="6">Superphylum</option>
-        <option value="7">Phylum</option>
-        <option value="8">Subphylum</option>
-        <option value="9">Superclass</option>
-        <option value="10">Class</option>
-        <option value="11">Subclass</option>
-        <option value="12">Superorder</option>
-        <option value="13">Order</option>
-        <option value="14">Suborder</option>
-        <option value="15">Superfamily</option>
-        <option value="16">Family</option>
-        <option value="17">Subfamily</option>
-        <option value="18">Tribe</option>
-        <option value="19">Subtribe</option>
-        <option value="20">Genus</option>
-        <option value="21">Subgenus</option>
-        <option value="22">Species</option>
-        <option value="23">Subspecies</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="taxonomy" name="out_file1" metadata_source="input1" />
-  </outputs>
-  <tests>
-     <test>
-          <param name="input1" value="lca_input.taxonomy" ftype="taxonomy"/>
-          <param name="rank_bound" value="0" />
-          <output name="out_file1" file="lca_output.taxonomy" ftype="taxonomy"/>
-     </test>
-     <test>
-          <param name="input1" value="lca_input2.taxonomy" ftype="taxonomy"/>
-          <param name="rank_bound" value="7" />
-          <output name="out_file1" file="lca_output2.taxonomy" ftype="taxonomy"/>
-     </test>
-
-     <!--Test case with invalid lines -->
-     <test>
-          <param name="input1" value="lca_input3.taxonomy" ftype="taxonomy"/>
-          <param name="rank_bound" value="10" />
-          <output name="out_file1" file="lca_output3.taxonomy" ftype="taxonomy"/>
-     </test>
- </tests>
-
- <help>
-
-**What it does**
-
-This tool identifies the lowest taxonomic rank for which a mategenomic sequencing read is diagnostic. It takes datasets produced by *Fetch Taxonomic Ranks* tool (aka Taxonomy format) as the input.
-
--------
-
-**Example**
-
-Suppose you have two reads, **read_1** and **read_2**, with the following taxonomic profiles (scroll sideways to see the entire dataset)::
-
-    read_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus1 subgenus1 species1 subspecies1
-    read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus2 subgenus2 species2 subspecies2
-    read_2 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum3 subphylum3 superclass3 class3 subclass3 superorder3 order3 suborder3 superfamily3 family3 subfamily3 tribe3 subtribe3 genus3 subgenus3 species3 subspecies3
-    read_2 4 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum4 subphylum4 superclass4 class4 subclass4 superorder4 order4 suborder4 superfamily4 family4 subfamily4 tribe4 subtribe4 genus4 subgenus4 species4 subspecies4
-
-For **read_1** taxonomic labels are consistent until the genus level, where the taxonomy splits into two branches, one ending with *subspecies1* and the other with *subspecies2*. This implies **that the lowest taxomomic rank read_1 can identify is SUBTRIBE**.  Similarly, read_2 is diagnostic up until the **superphylum** level.  As a results the output of this tool will be::
-
-    read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 n n n n
-    read_2 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 n       n          n           n      n         n           n      n         n            n       n          n      n         n n n n
-
-where, **n** means *EMPTY*.
-
---------
-
-**What's up with the drop down?**
-
-Why do we need the *require the lowest rank to be at least* dropdown?  Let's look at the above example again. Suppose you need to find only those reads that are diagnostic on at least phylum level. To do this you need to set the *require the lowest rank to be at least* to **phylum**. As a result your output will look like this::
-
-    read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 n n n n
-
-.. class:: infomark
-
-Note, that **read_2** is now omitted as it matches two phyla (**phylum3** and **phylum4**) and therefore is not diagnostic (but rather cosmopolitan) on *phylum* level.
-
-
-
-
-
-</help>
-</tool>
--- a/tools/taxonomy/poisson2test.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#!/usr/local/bin/python
-
-import sys
-from math import *
-from rpy import *
-
-
-if ((len(sys.argv)-1) != 6):
-    print 'too few parameters'
-    print 'usage: inputfile, col1, col2, d-value(not 0), p-val correction method(0 or 1)'
-    sys.exit()
-
-try:
-    lines_arr = open(sys.argv[1]).readlines()
-except IOError:
-    print'cannot open',sys.argv[1]
-    sys.exit()
-
-try:
-    i = int(sys.argv[2]) #first column to compare
-    j = int(sys.argv[3]) #second colum to compare
-    d = float(sys.argv[4]) #correction factor
-    k = int(sys.argv[5]) #p-val correction method
-    outfile = open(sys.argv[6],'w') # output data
-
-    if (i>j):
-        print 'column order not correct col1 < col2'
-        print 'usage: inputfile, col1, col2, d-value, p-val correction method'
-        sys.exit()
-
-    try:
-        a = 1 / d
-        assert k in [0,1]
-    except ZeroDivisionError:
-        print 'd cannot be 0'
-        print 'usage: inputfile, col1, col2, d-value, p-val correction method'
-        sys.exit()
-    except:
-        print ' p-val correction should be 0 or 1 (0 = "bonferroni", 1 = "fdr")'
-        print 'usage: inputfile, col1, col2, d-value, p-val correction method'
-        sys.exit()
-except ValueError:
-    print 'parameters are not integers'
-    print 'usage: inputfile, col1, col2, d-value, p-val correction method'
-    sys.exit()
-
-
-fsize = len(lines_arr)
-
-z1 = []
-z2 = []
-pz1 = []
-pz2 = []
-field = []
-
-if d<1: # Z score calculation
-    for line in lines_arr:
-        line.strip()
-        field = line.split('\t')
-
-        x = int(field[j-1]) #input column 2
-        y = int(field[i-1]) #input column 1
-        if y>x:
-            z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y))))
-            z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d))))
-        else:
-            tmp_var1 = x
-            x = y
-            y = tmp_var1
-            z1.append(float((y - (d*x))/sqrt(d*(x + y))))
-            z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d)))
-
-else: #d>1 Z score calculation
-    for line in lines_arr:
-        line.strip()
-        field = line.split('\t')
-        x = int(field[i-1]) #input column 1
-        y = int(field[j-1]) #input column 2
-
-        if y>x:
-            z1.append(float((y - (d*x))/sqrt(d*(x + y))))
-            z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d)))
-        else:
-            tmp_var2 = x
-            x = y
-            y = tmp_var2
-            z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y))))
-            z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d))))
-
-
-
-
-
-# P-value caluculation for z1 and z2
-for p in z1:
-
-    pz1.append(float(r.pnorm(-abs(float(p)))))
-
-for q in z2:
-
-    pz2.append(float(r.pnorm(-abs(float(q)))))
-
-# P-value correction for pz1 and pz2
-
-if k == 0:
-    corrz1 = r.p_adjust(pz1,"bonferroni",fsize)
-    corrz2 = r.p_adjust(pz2,"bonferroni",fsize)
-
-
-else:
-
-    corrz1 = r.p_adjust(pz1,"fdr",fsize)
-    corrz2 = r.p_adjust(pz2,"fdr",fsize)
-
-
-#printing all columns
-for n in range(fsize):
-    print >> outfile, "%s\t%4.3f\t%4.3f\t%8.6f\t%8.6f\t%8.6f\t%8.6f" %(lines_arr[n].strip(),z1[n],z2[n],pz1[n],pz2[n],corrz1[n],corrz2[n])
-
-
-
-
-
-
--- a/tools/taxonomy/poisson2test.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-<tool id="poisson2test" name="Poisson two-sample test" version="1.0.0">
-  <description></description>
-    <requirements>
-        <requirement type="package">taxonomy</requirement>
-    </requirements>
-  <command interpreter="python">poisson2test.py $input1 $input2 $input3 $input4 $input5 $output1 2>/dev/null </command>
-  <inputs>
-    <param name="input1" format="tabular" type="data" label="Input File"/>
-    <param name="input2" type="integer" size="5" value="2" label="First Column"/>
-    <param name="input3" type="integer" size="5" value="3" label="Second Column"/>
-    <param name="input4" type="float" size="5" value="1" label="D value"/>
-    <param name="input5" type="select" label="correction method">
-        <option value="0">Bonferroni</option>
-        <option value="1">FDR</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output1" />
-  </outputs>
-  <tests>
-    <test>
-        <param name="input1" value="poisson2test1.tabular" ftype="tabular"/>
-        <param name="input2" value="2" />
-        <param name="input3" value="3" />
-        <param name="input4" value="0.44" />
-        <param name="input5" value="0" />
-        <output name="output1" file="poisson2test1_out.tabular" />
-    </test>
-    <test>
-        <param name="input1" value="poisson2test2.tabular" ftype="tabular"/>
-        <param name="input2" value="2" />
-        <param name="input3" value="3" />
-        <param name="input4" value="0.44" />
-        <param name="input5" value="0" />
-        <output name="output1" file="poisson2test2_out.tabular" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-Suppose you have metagenomic samples from two different locations and have classified the reads unique to various taxa. Now you want to test if the number of reads that fall in a particular taxon in location 1 is different from those that fall in the same taxon in location 2.
-This utility performs this analysis. It assumes that the data comes from a Poisson process and calculates two Z scores (Z1 and Z2) based on the work by Shiue and Bain; 1982 (Z1) and Huffman; 1984 (Z2).
-
------
-
-**Z score formula**
-
-Equation 1:
-
-.. image:: ./static/images/poisson2test_eqn1.png
-
-
-Equation 2:
-
-.. image:: ./static/images/poisson2test_eqn2.png
-
-
-X = number of reads falling in a particular taxon in location 1
-
-Y = number of reads falling in the same taxon in location 2
-
-d = correction factor that accounts for biases in sample collection, DNA concentration, read numbers etc. between the two locations.
-
-Not only that, this utility also provides corresponding p-values and corrected p-values (using Bonferroni or False Discovery Rate (FDR)). It takes in an input file (a tab delimited file consisting of three or more columns (taxon/category, read counts in location 1, read counts in location 2)), columns to compare, d value and a correction method 0 (Bonferroni) or 1 (FDR).
-
------
-
-**Example**
-
-- Input File: phylum, read count in location-1, read count in location-2::
-
-    Annelida            36     2
-    Apicomplexa         17     8
-    Arthropoda        1964   928
-    Ascomycota         436    49
-    Basidiomycota       77    55
-
-- Arguments to be supplied by the user::
-
-    col_i   col_j   d-value    correction-method
-
-    2       3       0.44       Bonferroni
-
-- Output File: phylum, readcount1, readcount2, z1, z2, p1, p2, corrected p1, corrected p2::
-
-    Annelida            36     2   3.385   4.276  0.000356  0.000010  0.00463  0.00012
-    Apicomplexa         17     8  -0.157  -0.156  0.437707  0.438103  1.00000  1.00000
-    Arthropoda        1964   928  -1.790  -1.777  0.036755  0.037744  0.47782  0.49067
-    Ascomycota         436    49   9.778  11.418  0.000000  0.000000  0.00000  0.00000
-    Basidiomycota       77    55  -2.771  -2.659  0.002792  0.003916  0.03629  0.05091
-
------
-
-**Note**
-
-- Input file should be Tab delimited
-- i &lt; j
-- d cannot be 0
-- k = Bonferroni or FDR
-
------
-
-**References**
-
-- Shiue, W. and Bain, L. (1982). Experiment Size and Power Comparisons for Two-Sample Poisson Tests. Applied Statistics 31, 130-134.
-
-- Huffman, M. D. (1984). An Improved Approximate Two-Sample Poisson Test. Applied Statistics 33, 224-226.
-
-  </help>
-</tool>
-
-
--- a/tools/taxonomy/t2ps_wrapper.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-"""
-Wrapper for tree2PS-fast
-Requires ps2pdf (a part of ghostscript package) to be installed
-
-t2ps_wrapper.py <taxonomy file> <output PDF file> <max_tree_level> <font_size> <max_leaves> <count_duplicate_tax_id>
-
-    taxonomy file    - tree file produced by taxonomy2tree program written by Sergei Kosakovski Pond
-    output PDF file  - tree image
-    max_tree_level   - integer from 0 to 21; 0 = show all levels
-    font_size        - integer from 2 to 255 (8 is the best)
-    max_leaves       - integer from 0 to infinity (0 = show all)
-    count_duplicate  - 0 (do not count) or 1 (count)
-
-anton nekrutenko | anton@bx.psu.edu
-tree2PS-fast is written by Sergei Kosakovski Pond | sergeilkp@mac.com
-"""
-
-import string, sys, tempfile, subprocess
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-
-try:
-    tree_file = sys.argv[1]
-    pdf_file  = sys.argv[2]
-    max_tree_level = sys.argv[3]
-    font_size = sys.argv[4]
-    max_leaves = sys.argv[5]
-    dups = sys.argv[6]
-except:
-    stop_err('Check arguments\n')
-
-newick_file = tempfile.NamedTemporaryFile('w')
-ps_file = tempfile.NamedTemporaryFile('w')
-
-# Execute taxonomy2tree
-
-try:
-    t2t_cmd = 'taxonomy2tree %s %s %s /dev/null 1 > /dev/null 2>&1' % ( tree_file, max_tree_level, newick_file.name )
-    retcode = subprocess.call( t2t_cmd, shell=True )
-    if retcode < 0:
-        print >>sys.stderr, "Execution of taxonomy2tree terminated by signal", -retcode
-except OSError, e:
-    print >>sys.stderr, "Execution of taxonomy2tree failed:", e
-
-
-# Execute tree2PS-fast
-
-try:
-    t2ps_cmd = 'tree2PS-fast %s %s %s %s %s %s' % ( newick_file.name, ps_file.name, max_tree_level, font_size, max_leaves, dups )
-    retcode = subprocess.call( t2ps_cmd, shell=True )
-    if retcode < 0:
-        print >>sys.stderr, "Execution of tree2PS-fast terminated by signal", -retcode
-except OSError, e:
-    print >>sys.stderr, "Execution of tree2PS-fast failed:", e
-
-# Convert PS to PDF
-
-try:
-    ps2pdf_cmd = 'ps2pdf %s %s' % ( ps_file.name, pdf_file )
-    retcode = subprocess.call( ps2pdf_cmd, shell=True )
-    if retcode < 0:
-        print >>sys.stderr, "Execution of ps2pdf terminated by signal", -retcode
-except OSError, e:
-    print >>sys.stderr, "Execution of ps2pdf failed:", e
--- a/tools/taxonomy/t2ps_wrapper.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,129 +0,0 @@
-<tool id="Draw_phylogram" name="Draw phylogeny" version="1.0.0">
-  <description></description>
-    <requirements>
-        <requirement type="package">taxonomy</requirement>
-    </requirements>
-  <command interpreter="python">t2ps_wrapper.py $input $out_file1 $max_tree_level $font_size $max_leaves 1</command>
-  <inputs>
-    <param format="taxonomy" name="input" type="data" label="Draw phylogram for"></param>
-    <param name="max_tree_level" label="show ranks from root to" type="select" help="Choosing to show entire tree may produce very large PDF file disabling your viewer">
-        <option value="8">Class</option>
-        <option value="0">Show entire tree</option>
-        <option value="1">Superkingdom</option>
-        <option value="2">Kingdom</option>
-        <option value="3">Subkingdom</option>
-        <option value="4">Superphylum</option>
-        <option value="5">Phylum</option>
-        <option value="6">Subphylum</option>
-        <option value="7">Superclass</option>
-        <option value="9">Subclass</option>
-        <option value="10">Superorder</option>
-        <option value="11">Order</option>
-        <option value="12">Suborder</option>
-        <option value="13">Superfamily</option>
-        <option value="14">Family</option>
-        <option value="15">Subfamily</option>
-        <option value="16">Tribe</option>
-        <option value="17">Subtribe</option>
-        <option value="18">Genus</option>
-        <option value="19">Subgenus</option>
-        <option value="20">Species</option>
-        <option value="21">Subspecies</option>
-    </param>
-    <param name="font_size" type="select" label="select font size">
-        <option value="8">Normal</option>
-        <option value="4">Tiny</option>
-        <option value="12">Large</option>
-    </param>
-    <param name="max_leaves" type="text" size="5" value="0" label="maximum number of leaves" help="set to 0 to show all"/>
-  </inputs>
-  <outputs>
-    <data format="pdf" name="out_file1" />
-  </outputs>
-  <requirements>
-    <requirement type="binary">tree2PS-fast</requirement>
-  </requirements>
-  <help>
-
-**What it does**
-
-Given taxonomy representation (produced by *Taxonomy manipulation->Fetch Taxonomic Ranks* tool) this utility produces a graphical representations of phylogenetic tree in PDF format.
-
---------
-
-**Example 1: Fake data**
-
-Suppose you have the following dataset::
-
-    Species_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus1 subgenus1 species1 subspecies1
-    Species_2 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus2 subgenus2 species2 subspecies2
-    Species_3 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum3 subphylum3 superclass3 class3 subclass3 superorder3 order3 suborder3 superfamily3 family3 subfamily3 tribe3 subtribe3 genus3 subgenus3 species3 subspecies3
-    Species_4 4 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum4 subphylum4 superclass4 class4 subclass4 superorder4 order4 suborder4 superfamily4 family4 subfamily4 tribe4 subtribe4 genus4 subgenus4 species4 subspecies4
-
-Drawing the tree with default parameters (without changing anything in the interface) will produce this tree:
-
-.. image:: ./static/images/t2ps_ideal.png
-   :width: 500
-
-(for explanation of colors and numbers on the tree scroll to the bottom of this help section)
-
-Here *Class* rank represent terminal nodes (leaves) of the tree because it is the default setting of the "*show ranks from root to*" drop-down.  Changing the drop-down to "*Subspecies*" will produce this:
-
-.. image:: ./static/images/t2ps_ideal_ssp.png
-   :width: 1000
-
---------
-
-**Example 2: Fake data with missing nodes**
-
-Real taxonomic datasets almost always contain empty nodes.  These are represented with "**n**" as shown below::
-
-    Species_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus1 subgenus1 species1 subspecies1
-    Species_2 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus2 n         species2 subspecies2
-    Species_3 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 n       subphylum3 superclass3 class3 subclass3 superorder3 order3 suborder3 superfamily3 family3 subfamily3 tribe3 subtribe3 genus3 subgenus3 species3 subspecies3
-    Species_4 4 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum4 subphylum4 superclass4 class4 subclass4 superorder4 order4 suborder4 superfamily4 family4 subfamily4 tribe4 subtribe4 genus4 subgenus4 species4 subspecies4
-
-(here *phylum* for Species_3 and *subgenus* for Species_2 are unassigned)
-
-A full tree for this dataset will look like this:
-
-.. image:: ./static/images/t2ps_missing_nodes.png
-   :width: 1000
-
-Missing nodes are simply omitted from the tree (there are no gray boxes corresponding to "n") but the branch length is maintained so that taxa belonging to the same taxonomic rank are always aligned with each other
-
---------
-
-**Autoscaling the tree**
-
-You can use the "*maximum number of leaves*" to restrict the tree to a specified number of leaves (external nodes).  Using the following setting on the above dataset (note *show ranks from root to* set to *show entire tree* and *maximum number of leaves* is set *3*):
-
-.. image:: ./static/images/t2ps_autoscale.png
-
-will produce this tree:
-
-.. image:: ./static/images/t2ps_autoscale_tree.png
-   :width: 1000
-
-Here the tree is automatically trimmed at a taxonomic rank that will only have 3 outer nodes.  This is very useful for initial evaluation of very large trees where you want to only see, say, 1,000 outer nodes at once.
-
--------
-
-**Explanation of phylogenetic tree markup**
-
-Branches of the tree are colored according to the heatmap below.  The "bluer" the branch the lesser the number of leaves it leads to and vice versa.
-
-.. image:: ./static/images/t2ps_heatmap.png
-
-Each node is labeled with taxonomic name and the number of tree leaves belonging to this taxonomic group:
-
-.. image:: ./static/images/t2ps_node_label.png
-
-
-
-
-
-  </help>
-</tool>
-
-
--- a/tools/taxonomy/t2t_report.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="t2t_report" name="Summarize taxonomy" version="1.0.0">
-    <description></description>
-    <requirements>
-        <requirement type="package">taxonomy</requirement>
-    </requirements>
-    <command>taxonomy2tree $input 0 /dev/null $out_file1 0</command>
-    <inputs>
-        <param format="taxonomy" name="input" type="data" label="Summarize taxonomic representation for"/>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="out_file1" />
-    </outputs>
-  <requirements>
-    <requirement type="binary">taxonomy2tree</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="input" value="taxonomyGI.taxonomy" ftype="taxonomy"/>
-      <output name="out_file1" file="t2t_report.tabular"/>
-    </test>
-  </tests>
-
-
-<help>
-
-**What it does**
-
-Given taxonomy representation (produced by *Taxonomy manipulation->Fetch Taxonomic Ranks* tool) this utility computes a summary of all taxonomic ranks.
-
-------
-
-**Example**
-
-Suppose the *Taxonomy manipulation->Fetch Taxonomic Ranks* generated the following taxonomy representation::
-
-    9916 2      root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Laurasiatheria   n        Ruminantia  n           Bovidae  Bovinae n n Bos  n Bos taurus   n
-    9606 12585	root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Primates	Haplorrhini Hominoidea Hominidae n       n n Homo n Homo sapiens n
-
-Running this tool will generate the following output::
-
-    Rank         Rank Name          Count
-    -------------------------------------
-    root         root               2
-    superkingdom Eukaryota          2
-    kingdom      Metazoa            2
-    phylum       Chordata           2
-    subphylum	 Craniata           2
-    superclass	 Gnathostomata      2
-    class        Mammalia           2
-    superorder   Euarchontoglires   1
-    superorder   Laurasiatheria     1
-    order        Primates           1
-    suborder     Haplorrhini        1
-    suborder     Ruminantia         1
-    superfamily  Hominoidea         1
-    family       Bovidae            1
-    family       Hominidae          1
-    subfamily    Bovinae            1
-    genus        Bos                1
-    genus        Homo               1
-    species      Bos taurus         1
-    species      Homo sapiens       1
-
-The output is sorted on Rank and then on Rank Name.
-
-.. class:: warningmark
-
-**Note** that this tool omits "**n**" corresponding to ranks missing from NCBI taxonomy. In the above example *Home sapiens* contains the order name (Primates) while *Bos taurus* does not.
-
-
-</help>
-</tool>
--- a/tools/tool_conf.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,701 +0,0 @@
-<?xml version="1.0"?>
-<toolbox>
-<label text="Sharp lab tools" id="sharplab" />
-  <section name="Single interval manipulation" id="singleinterval">
-    <tool file="mytools/bedclean.xml"/>
-    <tool file="mytools/bedsort.xml"/>
-    <tool file="mytools/collapseBed.xml" />
-        <tool file="mytools/makewindow.xml" />
-    <tool file="mytools/resize.xml" />
-    <tool file="mytools/random_interval.xml"/>
-    <tool file="mytools/shuffleBed.xml"/>
-    <tool file="mytools/genomeView.xml"/>
-  </section>
-    <section name="Meta-Analysis" id="xuebing">
-    <tool file="mytools/genomeView.xml"/>
-    <tool file="mytools/intersectbed.xml"/>
-    <tool file="mytools/closestBed.xml"/>
-    <tool file="mytools/spatial_proximity.xml"/>
-    <tool file="mytools/bwBinavg.xml"/>
-    <tool file="mytools/metaintv.xml" />
-    <tool file="mytools/metaintv_ext.xml" />
-    <tool file="mytools/alignr.xml" />
-    <tool file="mytools/align2multiple.xml" />
-    <tool file="mytools/align2database.xml" />
-    <tool file="mytools/intersectSig.xml" />
-    <tool file="mytools/bigWigAverageOverBed.xml" />
-    <tool file="mytools/endbias.xml" />
-  </section>
-    <section name="Statistics/Visualization" id="sharpvis">
-    <tool file="mytools/genomeView.xml"/>
-    <tool file="mytools/intervalSize.xml" />
-    <tool file="mytools/intersectSig.xml" />
-    <tool file="mytools/cdf.xml" />
-    <tool file="mytools/binaverage.xml" />
-    <tool file="mytools/alignvis.xml" />
-    <tool file="mytools/plotmatrix.xml" />
-    <tool file="mytools/venn.xml"/>
-  </section>
-  <section name="Text/Format manipulation" id="sharptext">
-     <tool file="mytools/collapseTab.xml" />
-     <tool file="mytools/fastqdump.xml" />
-    <tool file="mytools/bowtie2bed.xml" />
-    <tool file="mytools/sampline.xml" />
-    <tool file="mytools/headtail.xml" />
-    <tool file="mytools/convertEnsembl.xml" />
-    <tool file="mytools/removeDuplicate.xml" />
-    <tool file="mytools/bed_to_bam.xml" />
-     <tool file="mytools/makebigwig.xml" />
-
-  </section>
-    <section name="Sequence/Motif" id="sharpsequence">
-    <tool file="extract/extract_genomic_dna.xml" />
-    <tool file="mytools/revcompl.xml"  />
-    <tool file="mytools/fastashuffle1.xml"  />
-    <tool file="mytools/fastashuffle2.xml"  />
-    <tool file="mytools/iupac2meme.xml" />
-    <tool file="mytools/seq2meme.xml" />
-    <tool file="mytools/memelogo.xml" />
-    <tool file="mytools/fastamarkov.xml" />
-    <tool file="mytools/meme.xml"/>
-    <tool file="mytools/dreme.xml"/>
-    <tool file="mytools/fimo2.xml"/>
-        <tool file="mytools/fimo2bed.xml"/>
-    <tool file="rgenetics/rgWebLogo3.xml" />
-    <tool file="mytools/splicesite.xml" />
-  </section>
-
-    <section name="Conservation/Other scores" id="score">
-    <tool file="mytools/phastCons.xml"  />
-  </section>
-
-<label text="selected tools" id="selectedtools" />
-  <section name="Get Data" id="sharpgetext">
-    <tool file="data_source/upload.xml"/>
-    <tool file="data_source/ucsc_tablebrowser.xml" />
-    <tool file="data_source/biomart.xml" />
-  </section>
-  <section name="Operate on Genomic Intervals" id="sharpbxops">
-    <tool file="new_operations/intersect.xml" />
-    <tool file="new_operations/subtract.xml" />
-    <tool file="new_operations/merge.xml" />
-    <tool file="new_operations/concat.xml" />
-
-    <tool file="mytools/closestBed.xml" />
-    <tool file="mytools/flankBed.xml" />
-    <tool file="mytools/shuffleBed.xml" />
-    <tool file="mytools/sortBed.xml" />
-
-    <tool file="new_operations/basecoverage.xml" />
-    <tool file="new_operations/coverage.xml" />
-    <tool file="new_operations/complement.xml" />
-    <tool file="new_operations/cluster.xml" id="cluster" />
-    <tool file="new_operations/join.xml" />
-    <tool file="new_operations/get_flanks.xml" />
-    <tool file="new_operations/flanking_features.xml" />
-    <tool file="annotation_profiler/annotation_profiler.xml" />
-  </section>
-    <section name="Unix Tools" id="unix_tools">
-    <tool file="unix_tools/awk_tool.xml" />
-    <tool file="unix_tools/sed_tool.xml" />
-    <tool file="unix_tools/grep_tool.xml" />
-    <tool file="unix_tools/sort_tool.xml" />
-    <tool file="unix_tools/uniq_tool.xml" />
-    <tool file="unix_tools/cut_tool.xml" />
-    <tool file="unix_tools/join_tool.xml" />
-    <tool file="unix_tools/word_list_grep.xml" />
-    <tool file="unix_tools/remove_ending.xml" />
-    <tool file="unix_tools/find_and_replace.xml" />
-  </section>
-  <section name="Lift-Over" id="liftOver">
-    <tool file="extract/liftOver_wrapper.xml" />
-  </section>
-  <section name="Text Manipulation" id="textutil">
-    <tool file="filters/fixedValueColumn.xml" />
-    <tool file="stats/column_maker.xml" />
-    <tool file="filters/catWrapper.xml" />
-    <tool file="filters/cutWrapper.xml" />
-    <tool file="filters/mergeCols.xml" />
-    <tool file="filters/convert_characters.xml" />
-    <tool file="filters/CreateInterval.xml" />
-    <tool file="filters/cutWrapper.xml" />
-    <tool file="filters/changeCase.xml" />
-    <tool file="filters/pasteWrapper.xml" />
-    <tool file="filters/remove_beginning.xml" />
-    <tool file="filters/randomlines.xml" />
-    <tool file="filters/headWrapper.xml" />
-    <tool file="filters/tailWrapper.xml" />
-    <tool file="filters/trimmer.xml" />
-    <tool file="filters/wc_gnu.xml" />
-    <tool file="stats/dna_filtering.xml" />
-    <tool file="new_operations/tables_arithmetic_operations.xml" />
-  </section>
-  <section name="Filter and Sort" id="filter">
-    <tool file="stats/filtering.xml" />
-    <tool file="filters/sorter.xml" />
-    <tool file="filters/grep.xml" />
-    <label text="GFF" id="gff" />
-        <tool file="filters/gff/extract_GFF_Features.xml" />
-        <tool file="filters/gff/gff_filter_by_attribute.xml" />
-        <tool file="filters/gff/gff_filter_by_feature_count.xml" />
-        <tool file="filters/gff/gtf_filter_by_attribute_values_list.xml" />
-  </section>
-  <section name="Join, Subtract and Group" id="group">
-    <tool file="filters/joiner.xml" />
-    <tool file="filters/compare.xml"/>
-    <tool file="new_operations/subtract_query.xml"/>
-    <tool file="stats/grouping.xml" />
-    <tool file="new_operations/column_join.xml" />
-  </section>
-  <section name="Convert Formats" id="convert">
-    <tool file="filters/axt_to_concat_fasta.xml" />
-    <tool file="filters/axt_to_fasta.xml" />
-    <tool file="filters/axt_to_lav.xml" />
-    <tool file="filters/bed2gff.xml" />
-    <tool file="fasta_tools/fasta_to_tabular.xml" />
-    <tool file="filters/gff2bed.xml" />
-    <tool file="filters/lav_to_bed.xml" />
-    <tool file="maf/maf_to_bed.xml" />
-    <tool file="maf/maf_to_interval.xml" />
-    <tool file="maf/maf_to_fasta.xml" />
-    <tool file="fasta_tools/tabular_to_fasta.xml" />
-    <tool file="fastq/fastq_to_fasta.xml" />
-    <tool file="filters/wiggle_to_simple.xml" />
-    <tool file="filters/sff_extractor.xml" />
-    <tool file="filters/gtf2bedgraph.xml" />
-    <tool file="filters/wig_to_bigwig.xml" />
-    <tool file="filters/bed_to_bigbed.xml" />
-    <tool file="samtools/sam_to_bam.xml" />
-    <tool file="samtools/bam_to_sam.xml" />
-    <tool file="mytools/bedToBam.xml" />
-    <tool file="mytools/bamToBed.xml" />
-  </section>
-
-
-
-  <section name="FASTA manipulation" id="fasta_manipulation">
-    <tool file="fasta_tools/fasta_compute_length.xml" />
-    <tool file="fasta_tools/fasta_filter_by_length.xml" />
-    <tool file="fasta_tools/fasta_concatenate_by_species.xml" />
-    <tool file="fasta_tools/fasta_to_tabular.xml" />
-    <tool file="fasta_tools/tabular_to_fasta.xml" />
-    <tool file="fastx_toolkit/fasta_formatter.xml" />
-    <tool file="fastx_toolkit/fasta_nucleotide_changer.xml" />
-    <tool file="fastx_toolkit/fastx_collapser.xml" />
-  </section>
-  <section name="NGS: QC and manipulation" id="NGS_QC">
-   <label text="FastQC: fastq/sam/bam" id="fastqcsambam" />
-    <tool file="rgenetics/rgFastQC.xml" />
-   <label text="Illumina fastq" id="illumina" />
-    <tool file="fastq/fastq_groomer.xml" />
-    <tool file="fastq/fastq_paired_end_splitter.xml" />
-    <tool file="fastq/fastq_paired_end_joiner.xml" />
-    <tool file="fastq/fastq_stats.xml" />
-   <label text="Roche-454 data" id="454" />
-    <tool file="metag_tools/short_reads_figure_score.xml" />
-    <tool file="metag_tools/short_reads_trim_seq.xml" />
-    <tool file="fastq/fastq_combiner.xml" />
-   <label text="AB-SOLiD data" id="solid" />
-    <tool file="next_gen_conversion/solid2fastq.xml" />
-    <tool file="solid_tools/solid_qual_stats.xml" />
-    <tool file="solid_tools/solid_qual_boxplot.xml" />
-   <label text="Generic FASTQ manipulation" id="generic_fastq" />
-    <tool file="fastq/fastq_filter.xml" />
-    <tool file="fastq/fastq_trimmer.xml" />
-    <tool file="fastq/fastq_trimmer_by_quality.xml" />
-    <tool file="fastq/fastq_masker_by_quality.xml" />
-    <tool file="fastq/fastq_paired_end_interlacer.xml" />
-    <tool file="fastq/fastq_paired_end_deinterlacer.xml" />
-    <tool file="fastq/fastq_manipulation.xml" />
-    <tool file="fastq/fastq_to_fasta.xml" />
-    <tool file="fastq/fastq_to_tabular.xml" />
-    <tool file="fastq/tabular_to_fastq.xml" />
-   <label text="FASTX-Toolkit for FASTQ data" id="fastx_toolkit" />
-    <tool file="fastx_toolkit/fastq_quality_converter.xml" />
-    <tool file="fastx_toolkit/fastx_quality_statistics.xml" />
-    <tool file="fastx_toolkit/fastq_quality_boxplot.xml" />
-    <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
-    <tool file="fastx_toolkit/fastq_to_fasta.xml" />
-    <tool file="fastx_toolkit/fastq_quality_filter.xml" />
-    <tool file="fastx_toolkit/fastq_to_fasta.xml" />
-    <tool file="fastx_toolkit/fastx_artifacts_filter.xml" />
-    <tool file="fastx_toolkit/fastx_barcode_splitter.xml" />
-    <tool file="fastx_toolkit/fastx_clipper.xml" />
-    <tool file="fastx_toolkit/fastx_collapser.xml" />
-    <tool file="fastx_toolkit/fastx_renamer.xml" />
-    <tool file="fastx_toolkit/fastx_reverse_complement.xml" />
-     <tool file="fastx_toolkit/fastx_trimmer.xml" />
-  </section>
-  <section name="NGS: Mapping" id="solexa_tools">
-   <tool file="sr_mapping/bowtie_wrapper.xml" />
-  </section>
-  <section name="NGS: SAM Tools" id="samtools">
-   <tool file="samtools/sam_bitwise_flag_filter.xml" />
-   <tool file="samtools/sam2interval.xml" />
-   <tool file="samtools/sam_merge.xml" />
-   <tool file="samtools/sam_pileup.xml" />
-   <tool file="samtools/pileup_parser.xml" />
-   <tool file="samtools/pileup_interval.xml" />
-   <tool file="samtools/samtools_flagstat.xml" />
-  </section>
-  <section name="Functional Enrichment" id="enrichment">
-      <tool file="human_genome_variation/linkToDavid.xml"/>
-  </section>
-
-<label text="other galaxy tools" id="galaxy_tools"/>
-  <section name="Extract Features" id="features">
-    <tool file="filters/ucsc_gene_bed_to_exon_bed.xml" />
-  </section>
-  <section name="Fetch Alignments" id="fetchAlign">
-    <tool file="maf/interval2maf_pairwise.xml" />
-    <tool file="maf/interval2maf.xml" />
-    <tool file="maf/maf_split_by_species.xml"/>
-    <tool file="maf/interval_maf_to_merged_fasta.xml" />
-    <tool file="maf/genebed_maf_to_fasta.xml"/>
-    <tool file="maf/maf_stats.xml"/>
-    <tool file="maf/maf_thread_for_species.xml"/>
-    <tool file="maf/maf_limit_to_species.xml"/>
-    <tool file="maf/maf_limit_size.xml"/>
-    <tool file="maf/maf_by_block_number.xml"/>
-    <tool file="maf/maf_reverse_complement.xml"/>
-    <tool file="maf/maf_filter.xml"/>
-  </section>
-  <section name="Get Genomic Scores" id="scores">
-    <tool file="stats/wiggle_to_simple.xml" />
-    <tool file="stats/aggregate_binned_scores_in_intervals.xml" />
-    <tool file="extract/phastOdds/phastOdds_tool.xml" />
-  </section>
-  <section name="Operate on Genomic Intervals" id="bxops">
-    <tool file="new_operations/intersect.xml" />
-    <tool file="new_operations/subtract.xml" />
-    <tool file="new_operations/merge.xml" />
-    <tool file="new_operations/concat.xml" />
-    <tool file="new_operations/basecoverage.xml" />
-    <tool file="new_operations/coverage.xml" />
-    <tool file="new_operations/complement.xml" />
-    <tool file="new_operations/cluster.xml" id="cluster" />
-    <tool file="new_operations/join.xml" />
-    <tool file="new_operations/get_flanks.xml" />
-    <tool file="new_operations/flanking_features.xml" />
-    <tool file="annotation_profiler/annotation_profiler.xml" />
-  </section>
-  <section name="Statistics" id="stats">
-    <tool file="stats/gsummary.xml" />
-    <tool file="filters/uniq.xml" />
-    <tool file="stats/cor.xml" />
-    <tool file="stats/generate_matrix_for_pca_lda.xml" />
-    <tool file="stats/lda_analy.xml" />
-    <tool file="stats/plot_from_lda.xml" />
-    <tool file="regVariation/t_test_two_samples.xml" />
-    <tool file="regVariation/compute_q_values.xml" />
-    <label text="GFF" id="gff" />
-      <tool file="stats/count_gff_features.xml" />
-  </section>
-  <section name="Wavelet Analysis" id="dwt">
-    <tool file="discreteWavelet/execute_dwt_var_perFeature.xml" />
-    <!--
-    Keep this section/tools commented until all of the tools have functional tests
-    <tool file="discreteWavelet/execute_dwt_IvC_all.xml" />
-    <tool file="discreteWavelet/execute_dwt_cor_aVa_perClass.xml" />
-    <tool file="discreteWavelet/execute_dwt_cor_aVb_all.xml" />
-    <tool file="discreteWavelet/execute_dwt_var_perClass.xml" />
-    -->
- </section>
-  <section name="Graph/Display Data" id="plots">
-    <tool file="plotting/histogram2.xml" />
-    <tool file="plotting/scatterplot.xml" />
-    <tool file="plotting/bar_chart.xml" />
-    <tool file="plotting/xy_plot.xml" />
-    <tool file="plotting/boxplot.xml" />
-    <tool file="visualization/GMAJ.xml" />
-    <tool file="visualization/LAJ.xml" />
-    <tool file="visualization/build_ucsc_custom_track.xml" />
-    <tool file="maf/vcf_to_maf_customtrack.xml" />
-    <tool file="mutation/visualize.xml" />
-  </section>
-  <section name="Regional Variation" id="regVar">
-    <tool file="regVariation/windowSplitter.xml" />
-    <tool file="regVariation/featureCounter.xml" />
-    <tool file="regVariation/quality_filter.xml" />
-    <tool file="regVariation/maf_cpg_filter.xml" />
-    <tool file="regVariation/getIndels_2way.xml" />
-    <tool file="regVariation/getIndels_3way.xml" />
-    <tool file="regVariation/getIndelRates_3way.xml" />
-    <tool file="regVariation/substitutions.xml" />
-    <tool file="regVariation/substitution_rates.xml" />
-    <tool file="regVariation/microsats_alignment_level.xml" />
-    <tool file="regVariation/microsats_mutability.xml" />
-    <tool file="regVariation/delete_overlapping_indels.xml" />
-    <tool file="regVariation/compute_motifs_frequency.xml" />
-    <tool file="regVariation/compute_motif_frequencies_for_all_motifs.xml" />
-    <tool file="regVariation/categorize_elements_satisfying_criteria.xml" />s
-    <tool file="regVariation/draw_stacked_barplots.xml" />
-    <tool file="regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.xml" />
-    <tool file="regVariation/microsatellite_birthdeath.xml" />
-  </section>
-  <section name="Multiple regression" id="multReg">
-    <tool file="regVariation/linear_regression.xml" />
-    <tool file="regVariation/best_regression_subsets.xml" />
-    <tool file="regVariation/rcve.xml" />
-  </section>
-  <section name="Multivariate Analysis" id="multVar">
-    <tool file="multivariate_stats/pca.xml" />
-    <tool file="multivariate_stats/cca.xml" />
-    <tool file="multivariate_stats/kpca.xml" />
-    <tool file="multivariate_stats/kcca.xml" />
-  </section>
- <section name="Evolution" id="hyphy">
-    <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" />
-    <tool file="hyphy/hyphy_nj_tree_wrapper.xml" />
-    <tool file="hyphy/hyphy_dnds_wrapper.xml" />
-    <tool file="evolution/mutate_snp_codon.xml" />
-    <tool file="evolution/codingSnps.xml" />
-    <tool file="evolution/add_scores.xml" />
- </section>
-  <section name="Multiple Alignments" id="clustal">
-    <tool file="rgenetics/rgClustalw.xml" />
-  </section>
- <section name="Metagenomic analyses" id="tax_manipulation">
-    <tool file="taxonomy/gi2taxonomy.xml" />
-    <tool file="taxonomy/t2t_report.xml" />
-    <tool file="taxonomy/t2ps_wrapper.xml" />
-    <tool file="taxonomy/find_diag_hits.xml" />
-    <tool file="taxonomy/lca.xml" />
-    <tool file="taxonomy/poisson2test.xml" />
-  </section>
-  <section name="FASTA manipulation" id="fasta_manipulation">
-    <tool file="fasta_tools/fasta_compute_length.xml" />
-    <tool file="fasta_tools/fasta_filter_by_length.xml" />
-    <tool file="fasta_tools/fasta_concatenate_by_species.xml" />
-    <tool file="fasta_tools/fasta_to_tabular.xml" />
-    <tool file="fasta_tools/tabular_to_fasta.xml" />
-    <tool file="fastx_toolkit/fasta_formatter.xml" />
-    <tool file="fastx_toolkit/fasta_nucleotide_changer.xml" />
-    <tool file="fastx_toolkit/fastx_collapser.xml" />
-  </section>
-  <section name="NCBI BLAST+" id="ncbi_blast_plus_tools">
-   <tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" />
-   <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" />
-   <tool file="ncbi_blast_plus/ncbi_blastx_wrapper.xml" />
-   <tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" />
-   <tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" />
-   <tool file="ncbi_blast_plus/blastxml_to_tabular.xml" />
-  </section>
-  <section name="NGS: QC and manipulation" id="NGS_QC">
-   <label text="FastQC: fastq/sam/bam" id="fastqcsambam" />
-    <tool file="rgenetics/rgFastQC.xml" />
-   <label text="Illumina fastq" id="illumina" />
-    <tool file="fastq/fastq_groomer.xml" />
-    <tool file="fastq/fastq_paired_end_splitter.xml" />
-    <tool file="fastq/fastq_paired_end_joiner.xml" />
-    <tool file="fastq/fastq_stats.xml" />
-   <label text="Roche-454 data" id="454" />
-    <tool file="metag_tools/short_reads_figure_score.xml" />
-    <tool file="metag_tools/short_reads_trim_seq.xml" />
-    <tool file="fastq/fastq_combiner.xml" />
-   <label text="AB-SOLiD data" id="solid" />
-    <tool file="next_gen_conversion/solid2fastq.xml" />
-    <tool file="solid_tools/solid_qual_stats.xml" />
-    <tool file="solid_tools/solid_qual_boxplot.xml" />
-   <label text="Generic FASTQ manipulation" id="generic_fastq" />
-    <tool file="fastq/fastq_filter.xml" />
-    <tool file="fastq/fastq_trimmer.xml" />
-    <tool file="fastq/fastq_trimmer_by_quality.xml" />
-    <tool file="fastq/fastq_masker_by_quality.xml" />
-    <tool file="fastq/fastq_paired_end_interlacer.xml" />
-    <tool file="fastq/fastq_paired_end_deinterlacer.xml" />
-    <tool file="fastq/fastq_manipulation.xml" />
-    <tool file="fastq/fastq_to_fasta.xml" />
-    <tool file="fastq/fastq_to_tabular.xml" />
-    <tool file="fastq/tabular_to_fastq.xml" />
-   <label text="FASTX-Toolkit for FASTQ data" id="fastx_toolkit" />
-    <tool file="fastx_toolkit/fastq_quality_converter.xml" />
-    <tool file="fastx_toolkit/fastx_quality_statistics.xml" />
-    <tool file="fastx_toolkit/fastq_quality_boxplot.xml" />
-    <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
-    <tool file="fastx_toolkit/fastq_to_fasta.xml" />
-    <tool file="fastx_toolkit/fastq_quality_filter.xml" />
-    <tool file="fastx_toolkit/fastq_to_fasta.xml" />
-    <tool file="fastx_toolkit/fastx_artifacts_filter.xml" />
-    <tool file="fastx_toolkit/fastx_barcode_splitter.xml" />
-    <tool file="fastx_toolkit/fastx_clipper.xml" />
-    <tool file="fastx_toolkit/fastx_collapser.xml" />
-    <tool file="fastx_toolkit/fastx_renamer.xml" />
-    <tool file="fastx_toolkit/fastx_reverse_complement.xml" />
-     <tool file="fastx_toolkit/fastx_trimmer.xml" />
-  </section>
-  <section name="NGS: Picard (beta)" id="picard_beta">
-    <label text="QC/Metrics for sam/bam" id="qcsambam"/>
-    <tool file="picard/picard_BamIndexStats.xml" />
-    <tool file="picard/rgPicardASMetrics.xml" />
-    <tool file="picard/rgPicardGCBiasMetrics.xml" />
-    <tool file="picard/rgPicardLibComplexity.xml" />
-    <tool file="picard/rgPicardInsertSize.xml" />
-    <tool file="picard/rgPicardHsMetrics.xml" />
-  <label text="bam/sam Cleaning" id="picard-clean" />
-    <tool file="picard/picard_AddOrReplaceReadGroups.xml" />
-    <tool file="picard/picard_ReorderSam.xml" />
-    <tool file="picard/picard_ReplaceSamHeader.xml" />
-    <tool file="picard/rgPicardFixMate.xml" />
-    <tool file="picard/rgPicardMarkDups.xml" />
-  </section>
-  <!--
-  Keep this section commented until it includes tools that
-  will be hosted on test/main.  The velvet wrappers have been
-  included in the distribution but will not be hosted on our
-  public servers for the current time.
-  <section name="NGS: Assembly" id="ngs_assembly">
-        <label text="Velvet" id="velvet"/>
-        <tool file="sr_assembly/velvetg.xml" />
-        <tool file="sr_assembly/velveth.xml" />
-  </section>
-  -->
-  <section name="NGS: Mapping" id="solexa_tools">
-   <tool file="sr_mapping/lastz_wrapper.xml" />
-   <tool file="sr_mapping/lastz_paired_reads_wrapper.xml" />
-   <tool file="sr_mapping/bowtie_wrapper.xml" />
-   <tool file="sr_mapping/bowtie_color_wrapper.xml" />
-   <tool file="sr_mapping/bwa_wrapper.xml" />
-   <tool file="sr_mapping/bwa_color_wrapper.xml" />
-   <tool file="sr_mapping/bfast_wrapper.xml" />
-   <tool file="metag_tools/megablast_wrapper.xml" />
-   <tool file="metag_tools/megablast_xml_parser.xml" />
-   <tool file="sr_mapping/PerM.xml" />
-   <tool file="sr_mapping/srma_wrapper.xml" />
-   <tool file="sr_mapping/mosaik.xml"/>
-  </section>
-  <section name="NGS: Indel Analysis" id="indel_analysis">
-   <tool file="indels/sam_indel_filter.xml" />
-   <tool file="indels/indel_sam2interval.xml" />
-   <tool file="indels/indel_table.xml" />
-   <tool file="indels/indel_analysis.xml" />
-  </section>
-  <section name="NGS: RNA Analysis" id="ngs-rna-tools">
-   <label text="RNA-seq" id="rna_seq" />
-      <tool file="ngs_rna/tophat_wrapper.xml" />
-      <tool file="ngs_rna/tophat_color_wrapper.xml" />
-      <tool file="ngs_rna/cufflinks_wrapper.xml" />
-      <tool file="ngs_rna/cuffcompare_wrapper.xml" />
-      <tool file="ngs_rna/cuffdiff_wrapper.xml" />
-   <label text="De novo Assembly" id="de_novo_assembly "/>
-      <tool file="ngs_rna/trinity_all.xml" />
-   <label text="Filtering" id="filtering" />
-      <tool file="ngs_rna/filter_transcripts_via_tracking.xml" />
-  </section>
-  <section name="NGS: SAM Tools" id="samtools">
-   <tool file="samtools/sam_bitwise_flag_filter.xml" />
-   <tool file="samtools/sam2interval.xml" />
-   <tool file="samtools/sam_to_bam.xml" />
-   <tool file="samtools/bam_to_sam.xml" />
-   <tool file="samtools/sam_merge.xml" />
-   <tool file="samtools/sam_pileup.xml" />
-   <tool file="samtools/pileup_parser.xml" />
-   <tool file="samtools/pileup_interval.xml" />
-   <tool file="samtools/samtools_flagstat.xml" />
-  </section>
-  <section name="NGS: GATK Tools" id="gatk">
-   <label text="Realignment" id="gatk_realignment" />
-     <tool file="gatk/realigner_target_creator.xml" />
-     <tool file="gatk/indel_realigner.xml" />
-   <label text="Base Recalibration" id="gatk_recalibration" />
-     <tool file="gatk/count_covariates.xml" />
-     <tool file="gatk/table_recalibration.xml" />
-     <tool file="gatk/analyze_covariates.xml" />
-   <label text="Genotyping" id="gatk_genotyping" />
-     <tool file="gatk/unified_genotyper.xml" />
-  </section>
-  <section name="NGS: Peak Calling" id="peak_calling">
-   <tool file="peak_calling/macs_wrapper.xml" />
-   <tool file="peak_calling/sicer_wrapper.xml" />
-   <tool file="peak_calling/ccat_wrapper.xml" />
-   <tool file="genetrack/genetrack_indexer.xml" />
-   <tool file="genetrack/genetrack_peak_prediction.xml" />
-  </section>
-  <section name="NGS: Simulation" id="ngs-simulation">
-    <tool file="ngs_simulation/ngs_simulation.xml" />
-  </section>
-  <section name="SNP/WGA: Data; Filters" id="rgdat">
-  <label text="Data: Import and upload" id="rgimport" />
-    <tool file="data_source/upload.xml"/>
-    <tool file="data_source/access_libraries.xml" />
-  <label text="Data: Filter and Clean" id="rgfilter" />
-    <tool file="rgenetics/rgClean.xml"/>
-    <tool file="rgenetics/rgPedSub.xml"/>
-    <tool file="rgenetics/rgLDIndep.xml"/>
-  <label text="Simulate" id="rgsim" />
-    <tool file="rgenetics/rgfakePhe.xml"/>
-    <tool file="rgenetics/rgfakePed.xml"/>
-  </section>
-  <section name="SNP/WGA: QC; LD; Plots" id="rgqcplot">
-  <label text="QC; Eigenstrat" id="rgvisual" />
-    <tool file="rgenetics/rgQC.xml"/>
-    <tool file="rgenetics/rgEigPCA.xml"/>
-  <label text="LD; Manhattan/QQ; GRR" id="rgld" />
-    <tool file="rgenetics/rgHaploView.xml"/>
-    <tool file="rgenetics/rgManQQ.xml"/>
-    <tool file="rgenetics/rgGRR.xml"/>
-  </section>
-  <section name="SNP/WGA: Statistical Models" id="rgmodel">
-    <tool file="rgenetics/rgCaCo.xml"/>
-    <tool file="rgenetics/rgTDT.xml"/>
-    <tool file="rgenetics/rgGLM.xml"/>
-    <tool file="rgenetics/rgManQQ.xml"/>
-  </section>
-  <section name="Human Genome Variation" id="hgv">
-    <tool file="evolution/codingSnps.xml" />
-    <tool file="evolution/add_scores.xml" />
-    <tool file="human_genome_variation/sift.xml" />
-    <tool file="human_genome_variation/linkToGProfile.xml" />
-    <tool file="human_genome_variation/linkToDavid.xml"/>
-    <tool file="human_genome_variation/ctd.xml" />
-    <tool file="human_genome_variation/funDo.xml" />
-    <tool file="human_genome_variation/snpFreq.xml" />
-    <tool file="human_genome_variation/ldtools.xml" />
-    <tool file="human_genome_variation/pass.xml" />
-    <tool file="human_genome_variation/gpass.xml" />
-    <tool file="human_genome_variation/beam.xml" />
-    <tool file="human_genome_variation/lps.xml" />
-    <tool file="human_genome_variation/hilbertvis.xml" />
-    <tool file="human_genome_variation/freebayes.xml" />
-  </section>
-  <section name="Genome Diversity" id="gd">
-    <tool file="genome_diversity/extract_primers.xml" />
-    <tool file="genome_diversity/select_snps.xml" />
-    <tool file="genome_diversity/select_restriction_enzymes.xml" />
-    <tool file="genome_diversity/extract_flanking_dna.xml" />
-  </section>
-  <section name="VCF Tools" id="vcf_tools">
-    <tool file="vcf_tools/intersect.xml" />
-    <tool file="vcf_tools/annotate.xml" />
-    <tool file="vcf_tools/filter.xml" />
-    <tool file="vcf_tools/extract.xml" />
-  </section>
-  <section name="PacBio/Illumina Assembly" id="hybrid">
-    <tool file="ilmn_pacbio/quake.xml"/>
-    <tool file="ilmn_pacbio/quake_pe.xml"/>
-    <tool file="ilmn_pacbio/soap_denovo.xml"/>
-<!--
-    Uncomment this tool when we support the HDF5 format
-    <tool file="ilmn_pacbio/smrtpipe_filter.xml"/>
--->
-    <tool file="ilmn_pacbio/smrtpipe_hybrid.xml"/>
-    <tool file="ilmn_pacbio/assembly_stats.xml"/>
-  </section>
-<!--
-  TODO: uncomment the following EMBOSS section whenever
-  moving to test, but comment it in .sample to eliminate
-  it from buildbot functional tests since these tools
-  rarely change.
--->
-<!--
-  <section name="EMBOSS" id="EMBOSSLite">
-    <tool file="emboss_5/emboss_antigenic.xml" />
-    <tool file="emboss_5/emboss_backtranseq.xml" />
-    <tool file="emboss_5/emboss_banana.xml" />
-    <tool file="emboss_5/emboss_biosed.xml" />
-    <tool file="emboss_5/emboss_btwisted.xml" />
-    <tool file="emboss_5/emboss_cai_custom.xml" />
-    <tool file="emboss_5/emboss_cai.xml" />
-    <tool file="emboss_5/emboss_chaos.xml" />
-    <tool file="emboss_5/emboss_charge.xml" />
-    <tool file="emboss_5/emboss_checktrans.xml" />
-    <tool file="emboss_5/emboss_chips.xml" />
-    <tool file="emboss_5/emboss_cirdna.xml" />
-    <tool file="emboss_5/emboss_codcmp.xml" />
-    <tool file="emboss_5/emboss_coderet.xml" />
-    <tool file="emboss_5/emboss_compseq.xml" />
-    <tool file="emboss_5/emboss_cpgplot.xml" />
-    <tool file="emboss_5/emboss_cpgreport.xml" />
-    <tool file="emboss_5/emboss_cusp.xml" />
-    <tool file="emboss_5/emboss_cutseq.xml" />
-    <tool file="emboss_5/emboss_dan.xml" />
-    <tool file="emboss_5/emboss_degapseq.xml" />
-    <tool file="emboss_5/emboss_descseq.xml" />
-    <tool file="emboss_5/emboss_diffseq.xml" />
-    <tool file="emboss_5/emboss_digest.xml" />
-    <tool file="emboss_5/emboss_dotmatcher.xml" />
-    <tool file="emboss_5/emboss_dotpath.xml" />
-    <tool file="emboss_5/emboss_dottup.xml" />
-    <tool file="emboss_5/emboss_dreg.xml" />
-    <tool file="emboss_5/emboss_einverted.xml" />
-    <tool file="emboss_5/emboss_epestfind.xml" />
-    <tool file="emboss_5/emboss_equicktandem.xml" />
-    <tool file="emboss_5/emboss_est2genome.xml" />
-    <tool file="emboss_5/emboss_etandem.xml" />
-    <tool file="emboss_5/emboss_extractfeat.xml" />
-    <tool file="emboss_5/emboss_extractseq.xml" />
-    <tool file="emboss_5/emboss_freak.xml" />
-    <tool file="emboss_5/emboss_fuzznuc.xml" />
-    <tool file="emboss_5/emboss_fuzzpro.xml" />
-    <tool file="emboss_5/emboss_fuzztran.xml" />
-    <tool file="emboss_5/emboss_garnier.xml" />
-    <tool file="emboss_5/emboss_geecee.xml" />
-    <tool file="emboss_5/emboss_getorf.xml" />
-    <tool file="emboss_5/emboss_helixturnhelix.xml" />
-    <tool file="emboss_5/emboss_hmoment.xml" />
-    <tool file="emboss_5/emboss_iep.xml" />
-    <tool file="emboss_5/emboss_infoseq.xml" />
-    <tool file="emboss_5/emboss_isochore.xml" />
-    <tool file="emboss_5/emboss_lindna.xml" />
-    <tool file="emboss_5/emboss_marscan.xml" />
-    <tool file="emboss_5/emboss_maskfeat.xml" />
-    <tool file="emboss_5/emboss_maskseq.xml" />
-    <tool file="emboss_5/emboss_matcher.xml" />
-    <tool file="emboss_5/emboss_megamerger.xml" />
-    <tool file="emboss_5/emboss_merger.xml" />
-    <tool file="emboss_5/emboss_msbar.xml" />
-    <tool file="emboss_5/emboss_needle.xml" />
-    <tool file="emboss_5/emboss_newcpgreport.xml" />
-    <tool file="emboss_5/emboss_newcpgseek.xml" />
-    <tool file="emboss_5/emboss_newseq.xml" />
-    <tool file="emboss_5/emboss_noreturn.xml" />
-    <tool file="emboss_5/emboss_notseq.xml" />
-    <tool file="emboss_5/emboss_nthseq.xml" />
-    <tool file="emboss_5/emboss_octanol.xml" />
-    <tool file="emboss_5/emboss_oddcomp.xml" />
-    <tool file="emboss_5/emboss_palindrome.xml" />
-    <tool file="emboss_5/emboss_pasteseq.xml" />
-    <tool file="emboss_5/emboss_patmatdb.xml" />
-    <tool file="emboss_5/emboss_pepcoil.xml" />
-    <tool file="emboss_5/emboss_pepinfo.xml" />
-    <tool file="emboss_5/emboss_pepnet.xml" />
-    <tool file="emboss_5/emboss_pepstats.xml" />
-    <tool file="emboss_5/emboss_pepwheel.xml" />
-    <tool file="emboss_5/emboss_pepwindow.xml" />
-    <tool file="emboss_5/emboss_pepwindowall.xml" />
-    <tool file="emboss_5/emboss_plotcon.xml" />
-    <tool file="emboss_5/emboss_plotorf.xml" />
-    <tool file="emboss_5/emboss_polydot.xml" />
-    <tool file="emboss_5/emboss_preg.xml" />
-    <tool file="emboss_5/emboss_prettyplot.xml" />
-    <tool file="emboss_5/emboss_prettyseq.xml" />
-    <tool file="emboss_5/emboss_primersearch.xml" />
-    <tool file="emboss_5/emboss_revseq.xml" />
-    <tool file="emboss_5/emboss_seqmatchall.xml" />
-    <tool file="emboss_5/emboss_seqret.xml" />
-    <tool file="emboss_5/emboss_showfeat.xml" />
-    <tool file="emboss_5/emboss_shuffleseq.xml" />
-    <tool file="emboss_5/emboss_sigcleave.xml" />
-    <tool file="emboss_5/emboss_sirna.xml" />
-    <tool file="emboss_5/emboss_sixpack.xml" />
-    <tool file="emboss_5/emboss_skipseq.xml" />
-    <tool file="emboss_5/emboss_splitter.xml" />
-    <tool file="emboss_5/emboss_supermatcher.xml" />
-    <tool file="emboss_5/emboss_syco.xml" />
-    <tool file="emboss_5/emboss_tcode.xml" />
-    <tool file="emboss_5/emboss_textsearch.xml" />
-    <tool file="emboss_5/emboss_tmap.xml" />
-    <tool file="emboss_5/emboss_tranalign.xml" />
-    <tool file="emboss_5/emboss_transeq.xml" />
-    <tool file="emboss_5/emboss_trimest.xml" />
-    <tool file="emboss_5/emboss_trimseq.xml" />
-    <tool file="emboss_5/emboss_twofeat.xml" />
-    <tool file="emboss_5/emboss_union.xml" />
-    <tool file="emboss_5/emboss_vectorstrip.xml" />
-    <tool file="emboss_5/emboss_water.xml" />
-    <tool file="emboss_5/emboss_wobble.xml" />
-    <tool file="emboss_5/emboss_wordcount.xml" />
-    <tool file="emboss_5/emboss_wordmatch.xml" />
-  </section>
--->
-</toolbox>
Binary file tools/unix_tools/._awk_tool.xml has changed
Binary file tools/unix_tools/._awk_wrapper.sh has changed
Binary file tools/unix_tools/._cut_tool.xml has changed
Binary file tools/unix_tools/._cut_wrapper.sh has changed
Binary file tools/unix_tools/._find_and_replace.pl has changed
Binary file tools/unix_tools/._find_and_replace.xml has changed
Binary file tools/unix_tools/._grep_tool.xml has changed
Binary file tools/unix_tools/._grep_wrapper.sh has changed
Binary file tools/unix_tools/._grep_wrapper_old.sh has changed
Binary file tools/unix_tools/._join_tool.sh has changed
Binary file tools/unix_tools/._join_tool.xml has changed
Binary file tools/unix_tools/._remove_ending.sh has changed
Binary file tools/unix_tools/._remove_ending.xml has changed
Binary file tools/unix_tools/._sed_tool.xml has changed
Binary file tools/unix_tools/._sed_wrapper.sh has changed
Binary file tools/unix_tools/._sort_tool.xml has changed
Binary file tools/unix_tools/._uniq_tool.xml has changed
Binary file tools/unix_tools/._word_list_grep.pl has changed
Binary file tools/unix_tools/._word_list_grep.xml has changed
--- a/tools/unix_tools/awk_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,138 +0,0 @@
-<tool id="cshl_awk_tool" name="awk">
-  <description></description>
-  <command interpreter="sh">awk_wrapper.sh $input $output '$file_data' '$FS' '$OFS'</command>
-  <inputs>
-    <param format="txt" name="input" type="data" label="File to process" />
-
-    <param name="FS" type="select" label="Input field-separator">
-	<option value=",">comma (,)</option>
-	<option value=":">colons (:) </option>
-	<option value=" ">single space</option>
-	<option value=".">dot (.)</option>
-	<option value="-">dash (-)</option>
-	<option value="|">pipe (|)</option>
-	<option value="_">underscore (_)</option>
-	<option selected="True" value="tab">tab</option>
-    </param>
-
-    <param name="OFS" type="select" label="Output field-separator">
-	<option value=",">comma (,)</option>
-	<option value=":">colons (:)</option>
-	<option value=" ">space ( )</option>
-	<option value="-">dash (-)</option>
-	<option value=".">dot (.)</option>
-	<option value="|">pipe (|)</option>
-	<option value="_">underscore (_)</option>
-	<option selected="True" value="tab">tab</option>
-    </param>
-
-
-    <!-- Note: the parameter ane MUST BE 'url_paste' -
-         This is a hack in the galaxy library (see ./lib/galaxy/util/__init__.py line 142)
-	 If the name is 'url_paste' the string won't be sanitized, and all the non-alphanumeric characters
-	 will be passed to the shell script -->
-    <param name="file_data" type="text" area="true" size="5x35" label="AWK Program" help="">
-    	<validator type="expression" message="Invalid Program!">value.find('\'')==-1</validator>
-    </param>
-
-  </inputs>
-  <tests>
-	  <test>
-		  <param name="input" value="unix_awk_input1.txt" />
-		  <output name="output" file="unix_awk_output1.txt" />
-		  <param name="FS" value="tab" />
-		  <param name="OFS" value="tab" />
-		  <param name="file_data"  value="$2>0.5 { print $2*9, $1 }" />
-	  </test>
-  </tests>
-  <outputs>
-    <data format="input" name="output" metadata_source="input" />
-  </outputs>
-<help>
-
-**What it does**
-
-This tool runs the unix **awk** command on the selected data file.
-
-.. class:: infomark
-
-**TIP:** This tool uses the **extended regular** expression syntax (not the perl syntax).
-
-
-**Further reading**
-
-- Awk by Example (http://www.ibm.com/developerworks/linux/library/l-awk1.html)
-- Long AWK tutorial (http://www.grymoire.com/Unix/Awk.html)
-- Learn AWK in 1 hour (http://www.selectorweb.com/awk.html)
-- awk cheat-sheet (http://cbi.med.harvard.edu/people/peshkin/sb302/awk_cheatsheets.pdf)
-- Collection of useful awk one-liners (http://student.northpark.edu/pemente/awk/awk1line.txt)
-
------
-
-**AWK programs**
-
-Most AWK programs consist of **patterns** (i.e. rules that match lines of text) and **actions** (i.e. commands to execute when a pattern matches a line).
-
-The basic form of AWK program is::
-
-    pattern { action 1; action 2; action 3; }
-
-
-
-
-
-**Pattern Examples**
-
-- **$2 == "chr3"**  will match lines whose second column is the string 'chr3'
-- **$5-$4>23**  will match lines that after subtracting the value of the fourth column from the value of the fifth column, gives value alrger than 23.
-- **/AG..AG/** will match lines that contain the regular expression **AG..AG** (meaning the characeters AG followed by any two characeters followed by AG). (This is the way to specify regular expressions on the entire line, similar to GREP.)
-- **$7 ~ /A{4}U/**  will match lines whose seventh column contains 4 consecutive A's followed by a U. (This is the way to specify regular expressions on a specific field.)
-- **10000 &lt; $4 &amp;&amp; $4 &lt; 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000
-- If no pattern is specified, all lines match (meaning the **action** part will be executed on all lines).
-
-
-
-**Action Examples**
-
-- **{ print }** or **{ print $0 }**   will print the entire input line (the line that matched in **pattern**). **$0** is a special marker meaning 'the entire line'.
-- **{ print $1, $4, $5 }** will print only the first, fourth and fifth fields of the input line.
-- **{ print $4, $5-$4 }** will print the fourth column and the difference between the fifth and fourth column. (If the fourth column was start-position in the input file, and the fifth column was end-position - the output file will contain the start-position, and the length).
-- If no action part is specified (not even the curly brackets) - the default action is to print the entire line.
-
-
-
-
-
-
-
-
-
-**AWK's Regular Expression Syntax**
-
-The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
-
-- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
-- **^** matches the beginning of a string(but not an internal line).
-- **(** .. **)** groups a particular pattern.
-- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
-
-  - **{n}** The preceding item is matched exactly n times.
-  - **{n,}** The preceding item ismatched n or more times.
-  - **{n,m}** The preceding item is matched at least n times but not more than m times.
-
-- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
-- **.** Matches any single character except a newline.
-- ***** The preceding item will be matched zero or more times.
-- **?** The preceding item is optional and matched at most once.
-- **+** The preceding item will be matched one or more times.
-- **^** has two meaning:
-  - matches the beginning of a line or string.
-  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
-- **$** matches the end of a line or string.
-- **\|** Separates alternate possibilities.
-
-
-**Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
-
-</help>
-</tool>
--- a/tools/unix_tools/awk_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#!/bin/sh
-
-##
-## Galaxy wrapper for AWK command
-##
-
-##
-## command line arguments:
-##   input_file
-##   output_file
-##   awk-program
-##   input-field-separator
-##   output-field-separator
-
-INPUT="$1"
-OUTPUT="$2"
-PROG="$3"
-FS="$4"
-OFS="$5"
-
-shift 5
-
-if [ -z "$OFS" ]; then
-	echo usage: $0 INPUTFILE OUTPUTFILE AWK-PROGRAM FS OFS>&2
-	exit 1
-fi
-
-if [ ! -r "$INPUT" ]; then
-	echo "error: input file ($INPUT) not found!" >&2
-	exit 1
-fi
-
-if [ "$FS" == "tab" ]; then
-	FS="\t"
-fi
-if [ "$OFS" == "tab" ]; then
-	OFS="\t"
-fi
-
-# Messages printed to STDOUT will be displayed in the "INFO" field in the galaxy dataset.
-# This way the user can tell what was the command
-echo "awk" "$PROG"
-
-awk --sandbox -v OFS="$OFS" -v FS="$FS" --re-interval "$PROG" "$INPUT" > "$OUTPUT"
-if (( $? ));  then exit; fi
-
-exit 0
--- a/tools/unix_tools/cut_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-<tool id="cshl_cut_tool" name="cut">
-  <description>columns from files</description>
-  <command interpreter="sh">
-  	cut_wrapper.sh '$complement' '$cutwhat' '$list' '$input' '$output'
-  </command>
-
-  <inputs>
-	<param format="txt" name="input" type="data" label="file to cut" />
-
-    	<param name="complement" type="select" label="Operation">
-	      <option value="">Keep</option>
-	      <option value="--complement">Discard</option>
-	</param>
-
-    	<param name="cutwhat" type="select" label="Cut by">
-	      <option value="-f">fields</option>
-	      <option value="-c">characters</option>
-	</param>
-
-	<param name="list" type="text" size="20" label="List of Fields/Characters/Bytes" help="These will be kept/discarded (depending on 'operation'). &lt;BR /&gt; Examples: 1,3,4 or 2-5" value = "" />
-  </inputs>
-
-  <tests>
-	  <test>
-		  <param name="input" value="unix_cut_input1.txt" />
-		  <output name="output" file="unix_cut_output1.txt" />
-		  <param name="complement" value="Keep" />
-		  <param name="cutwhat" value="fields" />
-		  <param name="list"  value="1,3,4" />
-	  </test>
-	  <test>
-		  <param name="input" value="unix_cut_input1.txt" />
-		  <output name="output" file="unix_cut_output1.txt" />
-		  <param name="complement" value="Discard" />
-		  <param name="cutwhat" value="fields" />
-		  <param name="list"  value="2" />
-	  </test>
-  </tests>
-
-  <outputs>
-    <data format="input" name="output" metadata_source="input"/>
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool runs the **cut** unix command, which extract or delete columns from a file.
-
------
-
-Field List Example:
-
-**1,3,7** - Cut specific fields/characters.
-
-**3-**    - Cut from the third field/character to the end of the line.
-
-**2-5**   - Cut from the second to the fifth field/character.
-
-**-8**    - Cut from the first to the eight field/characters.
-
-
-
-
-Input Example::
-
-    fruit	color	price	weight
-    apple	red	1.4	0.5
-    orange	orange	1.5	0.3
-    banana	yellow	0.9	0.3
-
-
-Output Example ( **Keeping fields 1,3,4** )::
-
-    fruit	price	weight
-    apple	1.4	0.5
-    orange	1.5	0.3
-    banana	0.9	0.3
-
-Output Example ( **Discarding field 2** )::
-
-    fruit	price	weight
-    apple	1.4	0.5
-    orange	1.5	0.3
-    banana	0.9	0.3
-
-Output Example ( **Keeping 3 characters** )::
-
-    fru
-    app
-    ora
-    ban
-
-  </help>
-</tool>
--- a/tools/unix_tools/cut_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-#!/bin/sh
-
-##
-## Galaxy wrapper for cut command.
-##
-
-##
-## command line arguments:
-##   complement flag (might be empty string)
-##   what to cut (fields or characters)
-##   cut list (e.g. 1,2,3,4)
-##   input_file
-##   output_file
-
-COMPLEMENT="$1"
-CUTWHAT="$2"
-CUTLIST="$3"
-INPUT="$4"
-OUTPUT="$5"
-
-if [ -z "$OUTPUT" ]; then
-	echo "This script should be run from inside galaxy!" >&2
-	exit 1
-fi
-
-if [ ! -r "$INPUT" ]; then
-	echo "error: input file ($INPUT) not found!" >&2
-	exit 1
-fi
-
-# Messages printed to STDOUT will be displayed in the "INFO" field in the galaxy dataset.
-# This way the user can tell what was the command
-if [ -z "$COMPLEMENT" ]; then
-	echo -n "Extracting "
-else
-	echo "Deleting "
-fi
-
-case $CUTWHAT in
-	-f)	echo -n "field(s) "
-		;;
-
-	-c)	echo -n "character(s) "
-		;;
-esac
-
-echo "$CUTLIST"
-
-
-cut $COMPLEMENT $CUTWHAT $CUTLIST < $INPUT > $OUTPUT
-
-exit
--- a/tools/unix_tools/find_and_replace.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,202 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Getopt::Std;
-
-sub parse_command_line();
-sub build_regex_string();
-sub usage();
-
-my $input_file ;
-my $output_file;
-my $find_pattern ;
-my $replace_pattern ;
-my $find_complete_words ;
-my $find_pattern_is_regex ;
-my $find_in_specific_column ;
-my $find_case_insensitive ;
-my $replace_global ;
-my $skip_first_line ;
-
-
-##
-## Program Start
-##
-usage() if @ARGV<2;
-parse_command_line();
-my $regex_string = build_regex_string() ;
-
-# Allow first line to pass without filtering?
-if ( $skip_first_line ) {
-	my $line = <$input_file>;
-	print $output_file $line ;
-}
-
-
-##
-## Main loop
-##
-
-## I LOVE PERL (and hate it, at the same time...)
-##
-## So what's going on with the self-compiling perl code?
-##
-## 1. The program gets the find-pattern and the replace-pattern from the user (as strings).
-## 2. If both the find-pattern and replace-pattern are simple strings (not regex),
-##    it would be possible to pre-compile a regex (with qr//) and use it in a 's///'
-## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references)
-##    it is still possible to pre-compile the regex and use it in a 's///'
-## However,
-## 4. If the replace-pattern contains back-references, pre-compiling is not possible.
-##    (in perl, you can't precompile a substitute regex).
-##    See these examples:
-##    http://www.perlmonks.org/?node_id=84420
-##    http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl
-##
-##    The solution:
-##    we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ),
-##    Then eval() a new perl code that contains the substitution regex as inlined code.
-##    Gotta love perl!
-
-my $perl_program ;
-if ( $find_in_specific_column ) {
-	# Find & replace in specific column
-
-	$perl_program = <<EOF;
-	while ( <STDIN> ) {
-		chomp ;
-		my \@columns = split ;
-
-		#not enough columns in this line - skip it
-		next if ( \@columns < $find_in_specific_column ) ;
-
-		\$columns [ $find_in_specific_column - 1 ] =~ $regex_string ;
-
-		print STDOUT join("\t", \@columns), "\n" ;
-	}
-EOF
-
-} else {
-	# Find & replace the entire line
-	$perl_program = <<EOF;
-		while ( <STDIN> ) {
-			$regex_string ;
-			print STDOUT;
-		}
-EOF
-}
-
-
-# The dynamic perl code reads from STDIN and writes to STDOUT,
-# so connect these handles (if the user didn't specifiy input / output
-# file names, these might be already be STDIN/OUT, so the whole could be a no-op).
-*STDIN = $input_file ;
-*STDOUT = $output_file ;
-eval $perl_program ;
-
-
-##
-## Program end
-##
-
-
-sub parse_command_line()
-{
-	my %opts ;
-	getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n";
-
-	die "$0: missing Find-Pattern argument\n" if (@ARGV==0);
-	$find_pattern = $ARGV[0];
-	die "$0: missing Replace-Pattern argument\n" if (@ARGV==1);
-	$replace_pattern = $ARGV[1];
-
-	$find_complete_words = ( exists $opts{w} ) ;
-	$find_case_insensitive = ( exists $opts{i} ) ;
-	$skip_first_line = ( exists $opts{s} ) ;
-	$find_pattern_is_regex = ( exists $opts{r} ) ;
-	$replace_global = ( exists $opts{g} ) ;
-
-	# Search in specific column ?
-	if ( defined $opts{c} ) {
-		$find_in_specific_column = $opts{c};
-
-		die "$0: invalid column number ($find_in_specific_column).\n"
-			unless $find_in_specific_column =~ /^\d+$/ ;
-
-		die "$0: invalid column number ($find_in_specific_column).\n"
-			if $find_in_specific_column <= 0;
-	}
-	else {
-		$find_in_specific_column = 0 ;
-	}
-
-	# Output File specified (instead of STDOUT) ?
-	if ( defined $opts{o} ) {
-		my $filename = $opts{o};
-		open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
-	} else {
-		$output_file = *STDOUT ;
-	}
-
-
-	# Input file Specified (instead of STDIN) ?
-	if ( @ARGV>2 ) {
-		my $filename = $ARGV[2];
-		open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
-	} else {
-		$input_file = *STDIN;
-	}
-}
-
-sub build_regex_string()
-{
-	my $find_string ;
-	my $replace_string ;
-
-	if ( $find_pattern_is_regex ) {
-		$find_string = $find_pattern ;
-		$replace_string = $replace_pattern ;
-	} else {
-		$find_string = quotemeta $find_pattern ;
-		$replace_string = quotemeta $replace_pattern;
-	}
-
-	if ( $find_complete_words ) {
-		$find_string = "\\b($find_string)\\b";
-	}
-
-	my $regex_string = "s/$find_string/$replace_string/";
-
-	$regex_string .= "i" if ( $find_case_insensitive );
-	$regex_string .= "g" if ( $replace_global ) ;
-
-
-	return $regex_string;
-}
-
-sub usage()
-{
-print <<EOF;
-
-Find and Replace
-Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
-
-Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE]
-
-   -g   - Global replace - replace all occurences in line/column.
-          Default - replace just the first instance.
-   -w   - search for complete words (not partial sub-strings).
-   -i   - case insensitive search.
-   -c N - check only column N, instead of entire line (line split by whitespace).
-   -l   - skip first line (don't replace anything in it)
-   -r   - FIND-PATTERN and REPLACE-PATTERN are perl regular expression,
-          usable inside a 's///' statement.
-          By default, they are used as verbatim text strings.
-   -o OUT - specify output file (default = STDOUT).
-   INPUT-FILE - (optional) read from file (default = from STDIN).
-
-
-EOF
-
-	exit;
-}
--- a/tools/unix_tools/find_and_replace.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-<tool id="cshl_find_and_replace" name="Find and Replace">
-  <description>text</description>
-  <command interpreter="perl">
-	find_and_replace.pl
-	#if $searchwhere.choice == "column":
-		-c $searchwhere.column
-	#end if
-	-o $output
-	$caseinsensitive
-	$wholewords
-	$skip_first_line
-	$is_regex
-	'$url_paste'
-	'$file_data'
-	'$input'
-  </command>
-  <inputs>
-    <param format="txt" name="input" type="data" label="File to process" />
-
-    <!-- Note: the parameter ane MUST BE 'url_paste' -
-         This is a hack in the galaxy library (see ./lib/galaxy/util/__init__.py line 142)
-	 If the name is 'url_paste' the string won't be sanitized, and all the non-alphanumeric characters
-	 will be passed to the shell script -->
-	 <param name="url_paste" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
-    		<validator type="expression" message="Invalid Program!">value.find('\'')==-1</validator>
-	</param>
-
-	 <param name="file_data" type="text" size="20" label="Replace with" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
-    		<validator type="expression" message="Invalid Program!">value.find('\'')==-1</validator>
-	</param>
-
-	<param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Find-Pattern is a regular expression"
-		help="see help section for details." />
-
-	<param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search"
-		help="" />
-
-	<param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words"
-		help="ignore partial matches (e.g. 'apple' will not match 'snapple') " />
-
-	<param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line"
-		help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
-
-	<conditional name="searchwhere">
-		<param name="choice" type="select" label="Replace text in">
-			<option value="line" selected="true">entire line</option>
-			<option value="column">specific column</option>
-		</param>
-
-		<when value="line">
-		</when>
-
-		<when value="column">
-    			<param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" />
-		</when>
-	</conditional>
-  </inputs>
-
-  <outputs>
-    <data format="input" name="output" metadata_source="input" />
-  </outputs>
-
-<help>
-
-**What it does**
-
-This tool finds &amp; replaces text in an input dataset.
-
-.. class:: infomark
-
-The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
-
-.. class:: infomark
-
-When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
-
-.. class:: infomark
-
-This tool uses Perl regular expression syntax.
-
------
-
-**Examples of *regular-expression* Find Patterns**
-
-- **HELLO**     The word 'HELLO' (case sensitive).
-- **AG.T**      The letters A,G followed by any single character, followed by the letter T.
-- **A{4,}**     Four or more consecutive A's.
-- **chr2[012]\\t**       The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
-- **hsa-mir-([^ ]+)**        The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
-
-
-**Examples of Replace Patterns**
-
-- **WORLD**  The word 'WORLD' will be placed whereever the find pattern was found.
-- **FOO-&amp;-BAR**  Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&amp;** (ampersand) represents the matched find pattern.
-- **\\1**   The text which matched the first parenthesis in the Find Pattern.
-
-
------
-
-**Example 1**
-
-**Find Pattern:** HELLO
-**Replace Pattern:** WORLD
-**Regular Expression:** no
-**Replace what:** entire line
-
-Every time the word HELLO is found, it will be replaced with the word WORLD.
-
------
-
-**Example 2**
-
-**Find Pattern:** ^chr
-**Replace Pattern:** (empty)
-**Regular Expression:** yes
-**Replace what:** column 11
-
-If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
-
-
------
-
-**Perl's Regular Expression Syntax**
-
-The Find &amp; Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
-
-- **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
-- **^** matches the beginning of a string(but not an internal line).
-- **(** .. **)** groups a particular pattern.
-- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
-
-  - **{n}** The preceding item is matched exactly n times.
-  - **{n,}** The preceding item ismatched n or more times.
-  - **{n,m}** The preceding item is matched at least n times but not more than m times.
-
-- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
-- **.** Matches any single character except a newline.
-- ***** The preceding item will be matched zero or more times.
-- **?** The preceding item is optional and matched at most once.
-- **+** The preceding item will be matched one or more times.
-- **^** has two meaning:
-  - matches the beginning of a line or string.
-  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
-- **$** matches the end of a line or string.
-- **\\|** Separates alternate possibilities.
-- **\\d** matches a single digit
-- **\\w** matches a single letter or digit or an underscore.
-- **\\s** matches a single white-space (space or tabs).
-
-
-</help>
-
-</tool>
--- a/tools/unix_tools/grep_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-<tool id="cshl_grep_tool" name="grep">
-  <description></description>
-  <command interpreter="sh">grep_wrapper.sh $input $output '$url_paste' $color -A $lines_after -B $lines_before $invert $case_sensitive</command>
-  <inputs>
-    <param format="txt" name="input" type="data" label="Select lines from" />
-
-    <param name="invert" type="select" label="that">
-      <option value="">Match</option>
-      <option value="-v">Don't Match</option>
-    </param>
-
-    <!-- Note: the parameter ane MUST BE 'url_paste' -
-         This is a hack in the galaxy library (see ./lib/galaxy/util/__init__.py line 142)
-	 If the name is 'url_paste' the string won't be sanitized, and all the non-alphanumeric characters
-	 will be passed to the shell script -->
-    <param name="url_paste" type="text" size="40" label="Regular Expression" help="">
-    	<validator type="expression" message="Invalid Program!">value.find('\'')==-1</validator>
-    </param>
-
-    <param name="case_sensitive" type="select"  label="Match type">
-      <option value="-i">case insensitive</option>
-      <option value="">case sensitive</option>
-    </param>
-
-    <param name="lines_before" type="integer"  label="Show lines preceding the matched line" help="(same as grep -B, leave it at zero unless you know what you're doing)" value="0" />
-    <param name="lines_after" type="integer"  label="Show lines trailing the matched line" help="(same as grep -A, leave it at zero unless you know what you're doing)" value="0" />
-
-    <param name="color" type="select"  label="Output">
-      <option value="NOCOLOR">text file (for further processing)</option>
-      <option value="COLOR">Highlighted HTML (for easier viewing)</option>
-    </param>
-
-  </inputs>
-  <tests>
-	  <test>
-		  <!-- grep a FASTA file for sequences with specific motif -->
-		  <param name="input" value="unix_grep_input1.txt" />
-		  <output name="output" file="unix_grep_output1.txt" />
-		  <param name="case_sensitive" value="case sensitive" />
-		  <param name="invert" value="" />
-		  <param name="url_paste" value="AA.{2}GT" />
-		  <param name="lines_before" value="1" />
-		  <param name="lines_after" value="0" />
-		  <param name="color" value="NOCOLOR" />
-	  </test>
-	  <test>
-		  <!-- grep a FASTA file for sequences with specific motif -
-		 	show highlighed output -->
-		  <param name="input" value="unix_grep_input1.txt" />
-		  <output name="output" file="unix_grep_output2.html" />
-		  <param name="case_sensitive" value="case sensitive" />
-		  <param name="invert" value="" />
-		  <param name="url_paste" value="AA.{2}GT" />
-		  <param name="lines_before" value="0" />
-		  <param name="lines_after" value="0" />
-		  <param name="color" value="COLOR" />
-	  </test>
-  </tests>
-  <outputs>
-	  <data format="input" name="output" metadata_source="input" >
-		<change_format>
-			<when input="color" value="COLOR" format="HTML" />
-		</change_format>
- 	  </data>
-  </outputs>
-<help>
-
-**What it does**
-
-This tool runs the unix **grep** command on the selected data file.
-
-.. class:: infomark
-
-**TIP:** This tool uses the **perl** regular expression syntax (same as running 'grep -P'). This is **NOT** the POSIX or POSIX-extended syntax (unlike the awk/sed tools).
-
-
-**Further reading**
-
-- Wikipedia's Regular Expression page (http://en.wikipedia.org/wiki/Regular_expression)
-- Regular Expressions cheat-sheet (PDF) (http://www.addedbytes.com/cheat-sheets/download/regular-expressions-cheat-sheet-v2.pdf)
-- Grep Tutorial (http://www.panix.com/~elflord/unix/grep.html)
-
------
-
-**Grep Examples**
-
-- **AGC.AAT** would match lines with AGC followed by any character, followed by AAT (e.g. **AGCQAAT**, **AGCPAAT**, **AGCwAAT**)
-- **C{2,5}AGC** would match lines with 2 to 5 consecutive Cs followed by AGC
-- **TTT.{4,10}AAA** would match lines with 3 Ts, followed by 4 to 10 characters (any characeters), followed by 3 As.
-- **^chr([0-9A-Za-z])+** would match lines that begin with chromsomes, such as lines in a BED format file.
-- **(ACGT){1,5}** would match at least 1 "ACGT" and at most 5 "ACGT" consecutively.
-- **hsa|mmu** would match lines containing "hsa" or "mmu" (or both).
-
------
-
-**Regular Expression Syntax**
-
-The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
-
-- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
-- **^** matches the beginning of a string(but not an internal line).
-- **\\d** matches a digit, same as [0-9].
-- **\\D** matches a non-digit.
-- **\\s** matches a whitespace character.
-- **\\S** matches anything BUT a whitespace.
-- **\\t** matches a tab.
-- **\\w** matches an alphanumeric character ( A to Z, 0 to 9 and underscore )
-- **\\W** matches anything but an alphanumeric character.
-- **(** .. **)** groups a particular pattern.
-- **\\Z** matches the end of a string(but not a internal line).
-- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
-
-  - **{n}** The preceding item is matched exactly n times.
-  - **{n,}** The preceding item ismatched n or more times.
-  - **{n,m}** The preceding item is matched at least n times but not more than m times.
-
-- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
-- **.** Matches any single character except a newline.
-- ***** The preceding item will be matched zero or more times.
-- **?** The preceding item is optional and matched at most once.
-- **+** The preceding item will be matched one or more times.
-- **^** has two meaning:
-  - matches the beginning of a line or string.
-  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
-- **$** matches the end of a line or string.
-- **\|** Separates alternate possibilities.
-
-
-</help>
-</tool>
--- a/tools/unix_tools/grep_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-#!/bin/sh
-
-##
-## Galaxy wrapper for GREP command.
-##
-
-##
-## command line arguments:
-##   input_file
-##   output_file
-##   regex
-##   COLOR or NOCOLOR
-##   [other parameters passed on to grep]
-
-INPUT="$1"
-OUTPUT="$2"
-REGEX="$3"
-COLOR="$4"
-
-shift 4
-
-if [ -z "$COLOR" ]; then
-	echo usage: $0 INPUTFILE OUTPUTFILE REGEX COLOR\|NOCOLOR [other grep patameters] >&2
-	exit 1
-fi
-
-if [ ! -r "$INPUT" ]; then
-	echo "error: input file ($INPUT) not found!" >&2
-	exit 1
-fi
-
-# Messages printed to STDOUT will be displayed in the "INFO" field in the galaxy dataset.
-# This way the user can tell what was the command
-echo "grep" "$@" "$REGEX"
-
-if [ "$COLOR" == "COLOR" ]; then
-	#
-	# What the heck is going on here???
-	# 1. "GREP_COLORS" is an environment variable, telling GREP which ANSI colors to use.
-	# 2. "--colors=always" tells grep to actually use colors (according to the GREP_COLORS variable)
-	# 3. first sed command translates the ANSI color to a <FONT> tag with blue color (and a <B> tag, too)
-	# 4. second sed command translates the no-color ANSI command to a </FONT> tag (and a </B> tag, too)
-	# 5. htmlize_pre scripts takes a text input and wraps it in <HTML><BODY><PRE> tags, making it a fixed-font HTML file.
-
-	GREP_COLORS="ms=31" grep --color=always "$@" -- "$REGEX" "$INPUT" | \
-		grep -v "^\[36m\[K--\[m\[K$" | \
-		sed -r 's/\[[0123456789;]+m\[K?/<font color="blue"><b>/g' | \
-		sed -r 's/\[m\[K?/<\/b><\/font>/g' | \
-		htmlize_pre.sh > "$OUTPUT"
-
-
-	if (( $? ));  then exit; fi
-
-elif [ "$COLOR" == "NOCOLOR" ]; then
-	grep "$@" -- "$REGEX" "$INPUT" | grep -v "^--$" > "$OUTPUT"
-	if (( $? ));  then exit; fi
-else
-	echo Error: third parameter must be "COLOR" or "NOCOLOR" >&2
-	exit 1
-fi
-
-exit 0
--- a/tools/unix_tools/grep_wrapper_old.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-#!/bin/sh
-
-##
-## Galaxy wrapper for GREP command.
-##
-
-##
-## command line arguments:
-##   input_file
-##   output_file
-##   regex
-##   COLOR or NOCOLOR
-##   [other parameters passed on to grep]
-
-INPUT="$1"
-OUTPUT="$2"
-REGEX="$3"
-COLOR="$4"
-
-shift 4
-
-if [ -z "$COLOR" ]; then
-	echo usage: $0 INPUTFILE OUTPUTFILE REGEX COLOR\|NOCOLOR [other grep patameters] >&2
-	exit 1
-fi
-
-if [ ! -r "$INPUT" ]; then
-	echo "error: input file ($INPUT) not found!" >&2
-	exit 1
-fi
-
-# Messages printed to STDOUT will be displayed in the "INFO" field in the galaxy dataset.
-# This way the user can tell what was the command
-echo "grep" "$@" "$REGEX"
-
-if [ "$COLOR" == "COLOR" ]; then
-	#
-	# What the heck is going on here???
-	# 1. "GREP_COLORS" is an environment variable, telling GREP which ANSI colors to use.
-	# 2. "--colors=always" tells grep to actually use colors (according to the GREP_COLORS variable)
-	# 3. first sed command translates the ANSI color to a <FONT> tag with blue color (and a <B> tag, too)
-	# 4. second sed command translates the no-color ANSI command to a </FONT> tag (and a </B> tag, too)
-	# 5. htmlize_pre scripts takes a text input and wraps it in <HTML><BODY><PRE> tags, making it a fixed-font HTML file.
-
-	GREP_COLORS="ms=31" grep --color=always -P "$@" -- "$REGEX" "$INPUT" | \
-		grep -v "^\[36m\[K--\[m\[K$" | \
-		sed -r 's/\[[0123456789;]+m\[K?/<font color="blue"><b>/g' | \
-		sed -r 's/\[m\[K?/<\/b><\/font>/g' | \
-		htmlize_pre.sh > "$OUTPUT"
-
-
-	if (( $? ));  then exit; fi
-
-elif [ "$COLOR" == "NOCOLOR" ]; then
-	grep -P "$@" -- "$REGEX" "$INPUT" | grep -v "^--$" > "$OUTPUT"
-	if (( $? ));  then exit; fi
-else
-	echo Error: third parameter must be "COLOR" or "NOCOLOR" >&2
-	exit 1
-fi
-
-exit 0
--- a/tools/unix_tools/join_tool.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#!/bin/sh
-
-#
-# NOTE:
-#  This is a wrapper for GNU's join under galaxy
-#  not ment to be used from command line (if you're using the command line, simply run 'join' directly...)
-#
-# All parameters must be supplied.
-# the join_tool.xml file takes care of that.
-
-JOINTYPE="$1"
-OUTPUT_FORMAT="$2"
-EMPTY_STRING="$3"
-DELIMITER="$4"
-IGNORE_CASE="$5"
-
-INPUT1="$6"
-COLUMN1="$7"
-INPUT2="$8"
-COLUMN2="$9"
-OUTPUT="${10}"
-
-if [ "$OUTPUT" == "" ]; then
-	echo "This script is part of galaxy. Don't run it manually.\n" >&2
-	exit 1;
-fi
-
-#This a TAB hack for galaxy (which can't transfer a "\t" as a parameter)
-[ "$DELIMITER" == "tab" ] && DELIMITER="	"
-
-#Remove spaces from the output format (if the user entered any)
-OUTPUT_FORMAT=${OUTPUT_FORMAT// /}
-[ "$OUTPUT_FORMAT" != "" ] && OUTPUT_FORMAT="-o $OUTPUT_FORMAT"
-
-echo join $OUTPUT_FORMAT -t "$DELIMITER" -e "$EMPTY_STRING" $IGNORE_CASE $JOINTYPE -1 "$COLUMN1" -2 "$COLUMN2"
-#echo join $OUTPUT_FORMAT -t "$DELIMITER" -e "$EMPTY_STRING" $IGNORE_CASE $JOINTYPE -1 "$COLUMN1" -2 "$COLUMN2" "$INPUT1" "$INPUT2" \> "$OUTPUT"
-join $OUTPUT_FORMAT -t "$DELIMITER" -e "$EMPTY_STRING" $JOINTYPE -1 "$COLUMN1" -2 "$COLUMN2" "$INPUT1" "$INPUT2" > "$OUTPUT" || exit 1
--- a/tools/unix_tools/join_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-<tool id="cshl_join_tool" name="join">
-  <description>two files</description>
-  <command interpreter="sh">join_tool.sh "$jointype" "$output_format"
-  				"$empty_string_filler" "$delimiter"
-				"$ignore_case"
-				"$input1" "$column1"
-				"$input2" "$column2"
-				"$output"
-  </command>
-
-  <inputs>
-	<param format="txt" name="input1" type="data" label="1st file" />
-	<param name="column1" label="Column to use from 1st file" type="data_column" data_ref="input1" accept_default="true" />
-
-	<param format="txt" name="input2" type="data" label="2nd File" />
-	<param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="input2" accept_default="true" />
-
-	<param name="jointype" type="select" label="Output lines appearing in">
-	      <option value=" ">BOTH 1st &amp; 2nd file.</option>
-	      <option value="-v 1">1st but not in 2nd file. [-v 1]</option>
-	      <option value="-v 2">2nd but not in 1st file. [-v 2]</option>
-	      <option value="-a 1">both 1st &amp; 2nd file, plus unpairable lines from 1st file. [-a 1]</option>
-	      <option value="-a 2">both 1st &amp; 2nd file, plus unpairable lines from 2st file. [-a 2]</option>
-	      <option value="-a 1 -a 2">All Lines [-a 1 -a 2]</option>
-	</param>
-
-	    <param name="delimiter" type="select" label="field-separator [-t]">
-		<option value=",">comma (,)</option>
-		<option value=":">colons (:) </option>
-		<option value=" ">single space</option>
-		<option value=".">dot (.)</option>
-		<option value="-">dash (-)</option>
-		<option value="|">pipe (|)</option>
-		<option value="_">underscore (_)</option>
-		<option selected="True" value="tab">tab</option>
-	    </param>
-
-	<param name="ignore_case" type="select" label="Case sensitivity">
-	      <option value="">Case sensitive</option>
-	      <option value="-i">Case INsensitive [-i]</option>
-	</param>
-
-	<param name="empty_string_filler" type="text" size="20" label="String replacement for empty fields [-e EMPTY]" help="Leave empty unless you know what you're doing. Use this when specifing output format" />
-
-	<param name="output_format" type="text" size="30" label="Output line format [-o FORMAT]" help="Leave empty unless you know what you're doing. Example: 1.1,2.1,2.1" />
-
-  </inputs>
-  <outputs>
-    <data name="output" format="input" metadata_source="input1" />
-  </outputs>
-
-<help>
-</help>
-</tool>
--- a/tools/unix_tools/remove_ending.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-#!/bin/sh
-
-# Version 0.1 ,  15aug08
-# Written by Assaf Gordon (gordon@cshl.edu)
-#
-
-LINES="$1"
-INFILE="$2"
-OUTFILE="$3"
-
-if [ "$LINES" == "" ]; then
-	cat >&2 <<EOF
-Remove Ending Lines
-
-Usage: $0 LINES [INFILE] [OUTFILE]
-
-   LINES - number of lines to remove from the end of the file
-   [INFILE] - input file (if not specified - defaults to STDIN)
-   [OUTFILE]- output file (if not specified - defaults to STDOUT)
-
-Input Example:
-
-#Chr	Start	End
-chr1	10	15
-chr1	40	20
-chr1	21	14
-total   3 chromosomes
-
-Removing 1 line (the last line) produces:
-
-#Chr	Start	End
-chr1	10	15
-chr1	20	40
-chr	14	21
-
-Usage Example:
-
-   \$ $0 1 < my_input_file.txt > my_output_file.txt
-
-EOF
-
-	exit 1
-fi
-
-#Validate line argument - remove non-digits characters
-LINES=${LINES//[^[:digit:]]/}
-
-#Make sure the line strings isn't empty
-#(after the regex above, they will either contains digits or be empty)
-if [ -z "$LINES" ]; then
-	echo "Error: bad line value (must be numeric)" >&2
-	exit 1
-fi
-
-# Use default (stdin/out) values if infile / outfile not specified
-[ -z "$INFILE" ] && INFILE="/dev/stdin"
-[ -z "$OUTFILE" ] && OUTFILE="/dev/stdout"
-
-#Make sure the input file (if specified) exists.
-if [ ! -r "$INFILE" ]; then
-	echo "Error: input file ($INFILE) not found!" >&2
-	exit 1
-fi
-
-
-# The "gunzip -f" trick allows
-# piping a file (gzip or plain text, real file name or "/dev/stdin") to sed
-gunzip -f <"$INFILE" | sed -n -e :a -e "1,${LINES}!{P;N;D;};N;ba" > "$OUTFILE"
-
--- a/tools/unix_tools/remove_ending.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-<tool id="Remove ending" name="Remove ending">
-  <description>of a file</description>
-  <command interpreter="sh">remove_ending.sh $num_lines $input $out_file1</command>
-  <inputs>
-    <param name="num_lines" size="5" type="integer" value="1" label="Remove last" help="lines"/>
-    <param format="txt" name="input" type="data" label="from"/>
-  </inputs>
-  <tests>
-	  <test>
-		  <param name="input" value="remove_ending_input1.txt" />
-		  <output name="out_file1" file="remove_ending_output1.txt" />
-		  <param name="num_lines" value="2" />
-	  </test>
-  </tests>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool removes specified number of lines from the ending of a dataset
-
------
-
-**Example**
-
-Input File::
-
-    chr7  56632  56652   D17003_CTCF_R6  310  +
-    chr7  56736  56756   D17003_CTCF_R7  354  +
-    chr7  56761  56781   D17003_CTCF_R4  220  +
-    chr7  56772  56792   D17003_CTCF_R7  372  +
-    chr7  56775  56795   D17003_CTCF_R4  207  +
-
-After removing the last 2 lines the dataset will look like this::
-
-    chr7  56632  56652   D17003_CTCF_R6  310  +
-    chr7  56736  56756   D17003_CTCF_R7  354  +
-    chr7  56761  56781   D17003_CTCF_R4  220  +
-
-</help>
-</tool>
--- a/tools/unix_tools/sed_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-<tool id="cshl_sed_tool" name="sed">
-  <description></description>
-  <!-- NOTE
-  	  'sandbox' is a patched SED program,
-	  which blocks executing shell commands and file reading/writing.
-
-	  Hopefully, it is safe enough to allow users to execute their own SED commands
-	  -->
-  <command interpreter="sh">sed_wrapper.sh $silent $input $output '$url_paste'</command>
-  <inputs>
-    <param format="txt" name="input" type="data" label="File to process" />
-
-    <!-- Note: the parameter ane MUST BE 'url_paste' -
-         This is a hack in the galaxy library (see ./lib/galaxy/util/__init__.py line 142)
-	 If the name is 'url_paste' the string won't be sanitized, and all the non-alphanumeric characters
-	 will be passed to the shell script -->
-    <param name="url_paste" type="text" area="true" size="5x35" label="SED Program" help="">
-    	<validator type="expression" message="Invalid Program!">value.find('\'')==-1</validator>
-    </param>
-
-    <param name="silent" type="select"  label="operation mode" help="(Same as 'sed -n', leave at 'normal' unless you know what you're doing)" >
-      <option value="">normal</option>
-      <option value="-n">silent</option>
-    </param>
-
-  </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input" />
-  </outputs>
-<help>
-
-**What it does**
-
-This tool runs the unix **sed** command on the selected data file.
-
-.. class:: infomark
-
-**TIP:** This tool uses the **extended regular** expression syntax (same as running 'sed -r').
-
-
-
-**Further reading**
-
-- Short sed tutorial (http://www.linuxhowtos.org/System/sed_tutorial.htm)
-- Long sed tutorial (http://www.grymoire.com/Unix/Sed.html)
-- sed faq with good examples (http://sed.sourceforge.net/sedfaq.html)
-- sed cheat-sheet (http://www.catonmat.net/download/sed.stream.editor.cheat.sheet.pdf)
-- Collection of useful sed one-liners (http://student.northpark.edu/pemente/sed/sed1line.txt)
-
------
-
-**Sed commands**
-
-The most useful sed command is **s** (substitute).
-
-**Examples**
-
-- **s/hsa//**  will remove the first instance of 'hsa' in every line.
-- **s/hsa//g**  will remove all instances (beacuse of the **g**) of 'hsa' in every line.
-- **s/A{4,}/--&amp;--/g**  will find sequences of 4 or more consecutive A's, and once found, will surround them with two dashes from each side. The **&amp;** marker is a place holder for 'whatever matched the regular expression'.
-- **s/hsa-mir-([^ ]+)/short name: \\1 full name: &amp;/**  will find strings such as 'hsa-mir-43a' (the regular expression is 'hsa-mir-' followed by non-space characters) and will replace it will string such as 'short name: 43a full name: hsa-mir-43a'.  The **\\1** marker is a place holder for 'whatever matched the first parenthesis' (similar to perl's **$1**) .
-
-
-**sed's Regular Expression Syntax**
-
-The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
-
-- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
-- **^** matches the beginning of a string(but not an internal line).
-- **(** .. **)** groups a particular pattern.
-- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
-
-  - **{n}** The preceding item is matched exactly n times.
-  - **{n,}** The preceding item ismatched n or more times.
-  - **{n,m}** The preceding item is matched at least n times but not more than m times.
-
-- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
-- **.** Matches any single character except a newline.
-- ***** The preceding item will be matched zero or more times.
-- **?** The preceding item is optional and matched at most once.
-- **+** The preceding item will be matched one or more times.
-- **^** has two meaning:
-  - matches the beginning of a line or string.
-  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
-- **$** matches the end of a line or string.
-- **\|** Separates alternate possibilities.
-
-
-**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
-
-</help>
-</tool>
--- a/tools/unix_tools/sed_wrapper.sh	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#!/bin/sh
-
-##
-## Galaxy wrapper for SED command
-##
-
-##
-## command line arguments:
-##   input_file
-##   output_file
-##   sed-program
-##   [other parameters passed on to sed]
-
-INPUT="$1"
-OUTPUT="$2"
-PROG="$3"
-
-shift 3
-
-if [ -z "$PROG" ]; then
-	echo usage: $0 INPUTFILE OUTPUTFILE SED-PROGRAM [other sed patameters] >&2
-	exit 1
-fi
-
-if [ ! -r "$INPUT" ]; then
-	echo "error: input file ($INPUT) not found!" >&2
-	exit 1
-fi
-
-# Messages printed to STDOUT will be displayed in the "INFO" field in the galaxy dataset.
-# This way the user can tell what was the command
-echo "sed" "$@" "$PROG"
-
-sed -r --sandbox "$@" "$PROG" "$INPUT" > "$OUTPUT"
-if (( $? ));  then exit; fi
-
-exit 0
--- a/tools/unix_tools/sort_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-<tool id="cshl_sort_tool" name="Sort">
-  <!--
-   	note 1:
-	  the 'version' sort (or natual order sort)
-	  requires GNU Coreutils 7.1 or later
-
-	note 2:
-	  for greater efficiency, sort buffer size is very large.
-	  If your Galaxy server doesn't have so much memory (or the
-	  sorts you use don't require it) - you can decrease the memory size.
-	  (argument is "-S 2G")
-  -->
-  <command>sort -S 2G $unique
-      #for $key in $sortkeys
-       '-k ${key.column},${key.column}${key.order}${key.style}'
-      #end for
-  	$input > $out_file1
-  </command>
-
-  <inputs>
-	<param format="txt" name="input" type="data" label="Sort Query" />
-
-	<param name="unique" type="select" label="Output only unique values?">
-		<option value="">No</option>
-		<option value="-u">Yes</option>
-	</param>
-
-	<repeat name="sortkeys" title="sort key">
-	    <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true" />
-	    <param name="order" type="select" display="radio" label="in">
-	      <option value="r">Descending order</option>
-	      <option value="">Ascending order</option>
-	    </param>
-	    <param name="style" type="select" display="radio" label="Flavor">
-	      <option value="n">Fast numeric sort ([-n])</option>
-	      <option value="g">General numeric sort ( scientific notation [-g])</option>
-	      <option value="V">Natural/Version sort ([-V]) </option>
-	      <option value="">Alphabetical sort</option>
-	    </param>
-	</repeat>
-  </inputs>
-  <tests>
-	  <test>
-		  <!-- Sort Descending numerical order,
-		       with scientific notation -->
-		  <param name="input" value="unix_sort_input1.txt" />
-		  <output name="output" file="unix_sort_output1.txt" />
-		  <param name="unique" value="No" />
-		  <param name="column" value="2" />
-		  <param name="order"  value="r" />
-		  <param name="style"  value="g" />
-	  </test>
-	  <test>
-		  <!-- Sort Ascending numerical order,
-		  with scientific notation - outputing unique values only
-
-		  The catch:
-		  	chr15 appears twice, with the same value (0.0314 and 3.14e-2).
-			In the output, it should appear only once because of the unique flag
-		  -->
-		  <param name="input" value="unix_sort_input1.txt" />
-		  <output name="output" file="unix_sort_output2.txt" />
-		  <param name="unique" value="Yes" />
-		  <param name="column" value="2" />
-		  <param name="order"  value="" />
-		  <param name="style"  value="g" />
-	  </test>
-	  <test>
-		  <!-- Sort Ascending 'natural' order -->
-		  <param name="input" value="unix_sort_input1.txt" />
-		  <output name="output" file="unix_sort_output3.txt" />
-		  <param name="unique" value="No" />
-		  <param name="column" value="1" />
-		  <param name="order"  value="" />
-		  <param name="style"  value="V" />
-	  </test>
-  </tests>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool runs the unix **sort** command on the selected data file.
-
------
-
-**Sorting Styles**
-
-* **Fast Numeric**: sort by numeric values. Handles integer values (e.g. 43, 134) and decimal-point values (e.g. 3.14). *Does not* handle scientific notation (e.g. -2.32e2).
-* **General Numeric**: sort by numeric values. Handles all numeric notations (including scientific notation). Slower than *fast numeric*, so use only when necessary.
-* **Natural Sort**: Sort in 'natural' order (natural to humans, not to computers). See example below.
-* **Alphabetical sort**: Sort in strict alphabetical order. See example below.
-
-
-
-
-**Sorting Examples**
-
-Given the following list::
-
-    chr4
-    chr13
-    chr1
-    chr10
-    chr20
-    chr2
-
-**Alphabetical sort** would produce the following sorted list::
-
-    chr1
-    chr10
-    chr13
-    chr2
-    chr20
-    chr4
-
-**Natural Sort** would produce the following sorted list::
-
-    chr1
-    chr2
-    chr4
-    chr10
-    chr13
-    chr20
-
-
-.. class:: infomark
-
-If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**,  not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs.
-
-  </help>
-</tool>
--- a/tools/unix_tools/uniq_tool.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-<tool id="cshl_uniq_tool" name="uniq">
-  <command>
-  	uniq -f $skipfields $count $repeated $ignorecase $uniqueonly $input $output
-  </command>
-
-  <inputs>
-	<param format="txt" name="input" type="data" label="file to scan for unique values" />
-
-	<param name="count" type="boolean" label="count [-c]" help="prefix lines by the number of occurrences" truevalue="-c" falsevalue="" />
-
-	<param name="repeated" type="boolean" label="repeated [-d]" help="only print duplicate lines" truevalue="-d" falsevalue="" />
-
-	<param name="ignorecase" type="boolean" label="ignore case [-i]" help="ignore differences in case when comparing" truevalue="-i" falsevalue="" />
-
-	<param name="uniqueonly" type="boolean" label="unique only [-u]" help="only print unique lines" truevalue="-u" falsevalue="" />
-
-	<param name="skipfields" type="integer" label="skip fields [-f]" help="avoind comparing the first N fields. (use zero to start from the first field)" size="2" value="0" />
-  </inputs>
-
-  <outputs>
-    <data format="input" name="output" metadata_source="input"/>
-  </outputs>
-  <help>
-  </help>
-</tool>
--- a/tools/unix_tools/word_list_grep.pl	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,182 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Getopt::Std;
-
-sub parse_command_line();
-sub load_word_list();
-sub compile_regex(@);
-sub usage();
-
-my $word_list_file;
-my $input_file ;
-my $output_file;
-my $find_complete_words ;
-my $find_inverse;
-my $find_in_specific_column ;
-my $find_case_insensitive ;
-my $skip_first_line ;
-
-
-##
-## Program Start
-##
-usage() if @ARGV==0;
-parse_command_line();
-
-my @words = load_word_list();
-
-my $regex = compile_regex(@words);
-
-# Allow first line to pass without filtering?
-if ( $skip_first_line ) {
-	my $line = <$input_file>;
-	print $output_file $line ;
-}
-
-
-##
-## Main loop
-##
-while ( <$input_file> ) {
-	my $target = $_;
-
-
-	# If searching in a specific column (and not in the entire line)
-	# extract the content of that one column
-	if ( $find_in_specific_column ) {
-		my @columns = split ;
-
-		#not enough columns in this line - skip it
-		next if ( @columns < $find_in_specific_column ) ;
-
-		$target = $columns [ $find_in_specific_column - 1 ] ;
-	}
-
-	# Match ?
-	if ( ($target =~ $regex) ^ ($find_inverse) ) {
-		print $output_file $_ ;
-	}
-}
-
-close $input_file;
-close $output_file;
-
-##
-## Program end
-##
-
-
-sub parse_command_line()
-{
-	my %opts ;
-	getopts('siwvc:o:', \%opts) or die "$0: Invalid option specified\n";
-
-	die "$0: missing word-list file name\n" if (@ARGV==0);
-
-	$word_list_file = $ARGV[0];
-	die "$0: Word-list file '$word_list_file' not found\n" unless -e $word_list_file ;
-
-	$find_complete_words = ( exists $opts{w} ) ;
-	$find_inverse = ( exists $opts{v} ) ;
-	$find_case_insensitive = ( exists $opts{i} ) ;
-	$skip_first_line = ( exists $opts{s} ) ;
-
-
-	# Search in specific column ?
-	if ( defined $opts{c} ) {
-		$find_in_specific_column = $opts{c};
-
-		die "$0: invalid column number ($find_in_specific_column).\n"
-			unless $find_in_specific_column =~ /^\d+$/ ;
-
-		die "$0: invalid column number ($find_in_specific_column).\n"
-			if $find_in_specific_column <= 0;
-	}
-	else {
-		$find_in_specific_column = 0 ;
-	}
-
-
-	# Output File specified (instead of STDOUT) ?
-	if ( defined $opts{o} ) {
-		my $filename = $opts{o};
-		open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
-	} else {
-		$output_file = *STDOUT ;
-	}
-
-
-
-	# Input file Specified (instead of STDIN) ?
-	if ( @ARGV>1 ) {
-		my $filename = $ARGV[1];
-		open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
-	} else {
-		$input_file = *STDIN;
-	}
-}
-
-sub load_word_list()
-{
-	open WORDLIST, "<$word_list_file" or die "$0: Failed to open word-list file '$word_list_file'\n" ;
-	my @words ;
-	while ( <WORDLIST> ) {
-		chomp ;
-		s/^\s+//;
-		s/\s+$//;
-		next if length==0;
-		push @words,quotemeta $_;
-	}
-	close WORDLIST;
-
-	die "$0: Error: word-list file '$word_list_file' is empty!\n"
-       		unless @words;
-
-	return @words;
-}
-
-sub compile_regex(@)
-{
-	my @words = @_;
-
-	my $regex_string = join ( '|', @words ) ;
-	if ( $find_complete_words ) {
-		$regex_string = "\\b($regex_string)\\b";
-	}
-	my $regex;
-
-	if ( $find_case_insensitive ) {
-		$regex = qr/$regex_string/i ;
-	} else {
-		$regex = qr/$regex_string/;
-	}
-
-	return $regex;
-}
-
-sub usage()
-{
-print <<EOF;
-
-Word-List Grep
-Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
-
-Usage: $0 [-o OUTPUT] [-s] [-w] [-i] [-c N] [-v] WORD-LIST-FILE [INPUT-FILE]
-
-   -s   - do not filter first line - always output the first line from the input file.
-   -w   - search for complete words (not partial sub-strings).
-   -i   - case insensitive search.
-   -v   - inverse - output lines NOT matching the word list.
-   -c N - check only column N, instead of entire line (line split by whitespace).
-   -o OUT - specify output file (default = STDOUT).
-   WORD-LIST-FILE - file containing one word per line. These will be used
-          for the search.
-   INPUT-FILE - (optional) read from file (default = from STDIN).
-
-
-
-EOF
-
-	exit;
-}
--- a/tools/unix_tools/word_list_grep.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-<tool id="cshl_word_list_grep" name="Select lines">
-<description>by word list</description>
-<command interpreter="perl">
-	word_list_grep.pl
-	#if $searchwhere.choice == "column":
-		-c $searchwhere.column
-	#end if
-	-o $output
-	$inverse
-	$caseinsensitive
-	$wholewords
-	$skip_first_line
-	$wordlist
-	$input
-</command>
-
-<inputs>
-	<param name="input" format="txt" type="data" label="input file" />
-	<param name="wordlist" format="txt" type="data" label="word list file" />
-
-
-	<param name="inverse" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Inverse filter"
-		help="Report lines NOT matching the word list" />
-
-	<param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search"
-		help="" />
-
-	<param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words"
-		help="ignore partial matches (e.g. 'apple' will not match 'snapple') " />
-
-	<param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line"
-		help="Select this option if the first line contains column headers. First line will not be filtered. " />
-
-	<conditional name="searchwhere">
-		<param name="choice" type="select" label="Search words in">
-			<option value="line" selected="true">entire line</option>
-			<option value="column">specific column</option>
-		</param>
-
-		<when value="line">
-		</when>
-
-		<when value="column">
-    			<param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" />
-		</when>
-	</conditional>
-
-</inputs>
-
-<outputs>
-	<data name="output" format="input" metadata_source="input" />
-</outputs>
-
-<help>
-**What it does**
-
-This tool selects lines that match words from a word list.
-
---------
-
-**Example**
-
-Input file (UCSC's rmsk track from dm3)::
-
-    585	787	66	241	11	chrXHet	2860	3009	-201103	-	DNAREP1_DM	LINE	Penelope	0	594	435	1
-    585	1383	78	220	0	chrXHet	3012	3320	-200792	-	DNAREP1_DM	LINE	Penelope	-217	377	2	1
-    585	244	103	0	0	chrXHet	3737	3776	-200336	-	DNAREP1_DM	LINE	Penelope	-555	39	1	1
-    585	2270	83	144	0	chrXHet	7907	8426	-195686	+	DNAREP1_DM	LINE	Penelope	1	594	0	1
-    585	617	189	73	68	chrXHet	10466	10671	-193441	+	DNAREP1_DM	LINE	Penelope	368	573	-21	1
-    586	1122	71	185	0	chrXHet	173138	173322	-30790	-	PROTOP	DNA	P	-4033	447	230	1
-    ...
-    ...
-
-
-Word list file::
-
-  STALKER
-  PROTOP
-
-
-
-Output sequence (searching in column 11)::
-
-    586	1122	71	185	0	chrXHet	173138	173322	-30790	        -	PROTOP	DNA	P	-4033	447	230	1
-    586	228	162	0	0	chrXHet	181026	181063	-23049	        +	STALKER4_I	LTR	Gypsy	9	45	-6485	1
-    585	245	105	26	0	chr3R	41609	41647	-27863406	+	PROTOP_B	DNA	P	507	545	-608	4
-    586	238	91	0	0	chr3R	140224	140257	-27764796	-	PROTOP_B	DNA	P	-617	536	504	4
-    ...
-    ...
-
-( With **find whole-words** not selected, *PROTOP* matched *PROTOP_B*, *STALKER* matched *STALKER4_I* )
-
-
-
-
-Output sequence (searching in column 11, and whole-words only)::
-
-    586	670	90	38	57	chrXHet	168356	168462	-35650	-	PROTOP	DNA	P	-459	4021	3918	1
-    586	413	139	70	0	chrXHet	168462	168548	-35564	-	PROTOP	DNA	P	-3406	1074	983	1
-    586	1122	71	185	0	chrXHet	173138	173322	-30790	-	PROTOP	DNA	P	-4033	447	230	1
-    ...
-    ...
-
-</help>
-
-</tool>
--- a/tools/validation/fix_errors.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Fix errors in a dataset.
-For now, only removing erroneous lines is supported.
-
-usage: %prog input errorsfile output
-    -x, --ext: dataset extension (type)
-    -m, --methods=N: comma separated list of repair methods
-"""
-
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-from galaxy import util
-
-def main():
-    options, args = doc_optparse.parse( __doc__ )
-    methods = []
-    try:
-        if options.methods: methods = options.methods.split(",")
-    except:
-        pass
-
-    ext = options.ext
-
-    in_file = open(args[0], "r")
-    error_file = open(args[1], "r")
-    out_file = open(args[2], "w")
-
-    # string_to_object errors
-    error_list = util.string_to_object(error_file.read())
-    # index by error type and then by line number
-    error_lines = {}
-    error_types = {}
-    for error in error_list:
-        if error.linenum:
-            if error.linenum in error_lines:
-                error_lines[error.linenum].append(error)
-            else:
-                error_lines[error.linenum] = [error]
-        error_type = error.__class__.__name__
-        if error_type in error_types:
-            error_types[error_type].append(error)
-        else:
-            error_types[error_type] = [error]
-
-    linenum = 0
-    for line in in_file:
-        linenum += 1
-        # write unless
-        if "lines" in methods:
-            if linenum in error_lines:
-                line = None
-            # other processing here?
-        if line:
-            out_file.write(line)
-
-if __name__ == "__main__":
-    main()
--- a/tools/validation/fix_errors.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Fix errors" id="fix_errors" hidden="true">
-
-	<description>in data validation</description>
-
-	<command interpreter="python">
-		fix_errors.py $input $errorsfile $output -x $ext --methods=$methods
-	</command>
-
-	<inputs>
-		<param name="errorsfile" type="text" />
-	        <param type="data" name="input" />
-		<param name="ext" type="text" />
-		<param name="methods" type="text" />
-	</inputs>
-
-	<code file="fix_errors_code.py"/>
-
-	<outputs>
-		<data name="output" format="input" metadata="input" />
-	</outputs>
-
-</tool>
\ No newline at end of file
--- a/tools/validation/fix_errors_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-# runs after the job (and after the default post-filter)
-
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
-from galaxy import datatypes, jobs, util
-# needed to reference ParseError types, is this bad?
-from bx.tabular.io import *
-from bx.intervals.io import *
-import sys, tempfile, os
-
-def validate(incoming):
-    """Validator"""
-    #raise Exception, 'not quite right'
-    pass
-
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
-    """Build a temp file with errors in it"""
-    errors = []
-    for name, data in inp_data.items():
-        validation_errors = data.validation_errors
-        for error in validation_errors:
-            # build dummy class
-            try:
-                temp = eval(error.err_type)()
-            except:
-                temp = object()
-            # stuff attributes
-            temp.__dict__ = util.string_to_object( error.attributes )
-            errors.append(temp)
-    # There *should* only be 1 input, so we assume there is and continue
-    # base64 pickel
-    errors_str = util.object_to_string( errors )
-    # write
-    database_tmp = "./database/tmp" # globaly visible path
-    error_file = tempfile.NamedTemporaryFile(mode="w", dir=database_tmp, suffix=".b64")
-    error_file_name = error_file.name
-    error_file.close()
-    error_file = open(error_file_name, "w")
-    error_file.write(errors_str)
-    error_file.close()
-    param_dict["errorsfile"] = error_file_name
-
-
-def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    # in a perfect world, changes to param_dict would persist
-    # for now, unlink from tool
-    # os.unlink(param_dict["errorsfile"])
-    pass
--- a/tools/validation/validate.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Validate a dataset based on extension a metadata passed in on the
-command line.  Outputs a binhex'd representation of the exceptions.
-
-usage: %prog input output
-    -m, --metadata=N: base64 pickeled metadata
-    -x, --ext=N: extension as understood by galaxy
-"""
-
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-from galaxy import model
-from fileinput import FileInput
-from galaxy import util
-
-def main():
-    options, args = doc_optparse.parse( __doc__ )
-
-    try:
-        extension = options.ext
-    except:
-        doc_optparse.exception()
-
-    # create datatype
-    data = model.Dataset( extension=extension, id=int( args[0] ) )
-    data.file_path = "/home/ian/trunk/database/files/"
-
-    if options.metadata:
-        data.metadata = util.string_to_object( options.metadata )
-
-    errors = data.datatype.validate( data )
-    print util.object_to_string(errors)
-
-if __name__ == "__main__":
-    main()
--- a/tools/vcf_tools/annotate.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,163 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-import optparse
-
-import vcfClass
-from vcfClass import *
-
-import tools
-from tools import *
-
-if __name__ == "__main__":
-  main()
-
-# Check that the reference and alternate in the dbsnp vcf file match those
-# from the input vcf file.
-def checkRefAlt(vcfRef, vcfAlt, dbsnpRef, dbsnpAlt, ref, position, annotation):
-  text = "WARNING: ref and alt alleles differ between vcf and " + annotation + " " + ref + ":" + str(position) + " vcf: " + \
-         vcfRef + "/" + vcfAlt + ", dbsnp: " + dbsnpRef + "/" + dbsnpAlt
-
-  allelesAgree = True
-  if vcfRef.lower() != dbsnpRef.lower():
-    if vcfRef.lower() != dbsnpAlt.lower():
-      #print >> sys.stderr, text
-      allelesAgree = False
-  else:
-    if vcfAlt.lower() != dbsnpAlt.lower():
-      #print >> sys.stderr, text
-      allelesAgree = False
-
-  return allelesAgree
-
-# Intersect two vcf files.  It is assumed that the two files are
-# sorted by genomic coordinates and the reference sequences are
-# in the same order.
-def annotateVcf(v, d, outputFile, annotation):
-  success1 = v.getRecord()
-  success2 = d.getRecord()
-  currentReferenceSequence = v.referenceSequence
-
-# Finish when the end of the first file has been reached.
-  while success1:
-
-# If the end of the dbsnp vcf file is reached, write out the
-# remaining records from the vcf file.
-    if not success2:
-      outputFile.write(v.record)
-      success1 = v.getRecord()
-
-    if v.referenceSequence == d.referenceSequence and v.referenceSequence == currentReferenceSequence:
-      if v.position == d.position:
-        allelesAgree = checkRefAlt(v.ref, v.alt, d.ref, d.alt, v.referenceSequence, v.position, annotation)
-        if annotation == "dbsnp": v.rsid = d.getDbsnpInfo()
-        elif annotation == "hapmap":
-          if allelesAgree: v.info += ";HM3"
-          else: v.info += ";HM3A"
-        record = v.buildRecord(False)
-        outputFile.write(record)
-
-        success1 = v.getRecord()
-        success2 = d.getRecord()
-      elif d.position > v.position: success1 = v.parseVcf(d.referenceSequence, d.position, True, outputFile)
-      elif v.position > d.position: success2 = d.parseVcf(v.referenceSequence, v.position, False, None)
-    else:
-      if v.referenceSequence == currentReferenceSequence: success1 = v.parseVcf(d.referenceSequence, d.position, True, outputFile)
-      elif d.referenceSequence == currentReferenceSequence: success2 = d.parseVcf(v.referenceSequence, v.position, False, None)
-
-# If the last record for a reference sequence is the same for both vcf
-# files, they will both have referenceSequences different from the
-# current reference sequence.  Change the reference sequence to reflect
-# this and proceed.
-      else:
-        if v.referenceSequence != d.referenceSequence:
-          print >> sys.stderr, "ERROR: Reference sequences for both files are unexpectedly different."
-          print >> sys.stderr, "Check that both files contain records for the following reference sequences:"
-          print >> sys.stderr, "\t", v.referenceSequence, " and ", d.referenceSequence
-          exit(1)
-      currentReferenceSequence = v.referenceSequence
-
-def main():
-
-# Parse the command line options
-  usage = "Usage: vcfPytools.py annotate [options]"
-  parser = optparse.OptionParser(usage = usage)
-  parser.add_option("-i", "--in",
-                    action="store", type="string",
-                    dest="vcfFile", help="input vcf files")
-  parser.add_option("-d", "--dbsnp",
-                    action="store", type="string",
-                    dest="dbsnpFile", help="input dbsnp vcf file")
-  parser.add_option("-m", "--hapmap",
-                    action="store", type="string",
-                    dest="hapmapFile", help="input hapmap vcf file")
-  parser.add_option("-o", "--out",
-                    action="store", type="string",
-                    dest="output", help="output vcf file")
-
-  (options, args) = parser.parse_args()
-
-# Check that a single  vcf file is given.
-  if options.vcfFile == None:
-    parser.print_help()
-    print >> sys.stderr, "\nInput vcf file (--in, -i) is required for dbsnp annotation."
-    exit(1)
-
-# Check that either a hapmap or a dbsnp vcf file is included.
-  if options.dbsnpFile == None and options.hapmapFile == None:
-    parser.print_help()
-    print >> sys.stderr, "\ndbSNP or hapmap vcf file is required (--dbsnp, -d, --hapmap, -h)."
-    exit(1)
-  elif options.dbsnpFile != None and options.hapmapFile != None:
-    parser.print_help()
-    print >> sys.stderr, "\ndbSNP or hapmap vcf file is required, not both (--dbsnp, -d, --hapmap, -h)."
-    exit(1)
-
-# Set the output file to stdout if no output file was specified.
-  outputFile, writeOut = setOutput(options.output) # tools.py
-
-  v = vcf() # Define vcf object.
-  d = vcf() # Define dbsnp/hapmap vcf object.
-  if options.dbsnpFile:
-    d.dbsnpVcf = True
-    annotationFile = options.dbsnpFile
-    annotation = "dbsnp"
-  elif options.hapmapFile:
-    d.hapmapVcf = True
-    annotationFile = options.hapmapFile
-    annotation = "hapmap"
-
-# Open the vcf files.
-  v.openVcf(options.vcfFile)
-  d.openVcf(annotationFile)
-
-# Read in the header information.
-  v.parseHeader(options.vcfFile, writeOut)
-  d.parseHeader(annotationFile, writeOut)
-
-# Add an extra line to the vcf header to indicate the file used for
-# performing dbsnp annotation.
-  taskDescriptor = "##vcfPytools=annotated vcf file with "
-  if options.dbsnpFile: taskDescriptor += "dbSNP file " + options.dbsnpFile
-  elif options.hapmapFile:
-    taskDescriptor += "hapmap file " + options.hapmapFile
-    v.infoHeaderString["HM3"] = "##INFO=<ID=HM3,Number=0,Type=Flag,Description=\"Hapmap3.2 membership determined from file " + \
-                                options.hapmapFile + "\">"
-    v.infoHeaderString["HM3A"] = "##INFO=<ID=HM3A,Number=0,Type=Flag,Description=\"Hapmap3.2 membership (with different alleles)" + \
-                                 ", determined from file " + options.hapmapFile + "\">"
-  writeHeader(outputFile, v, False, taskDescriptor) # tools.py
-
-# Annotate the vcf file.
-  annotateVcf(v, d, outputFile, annotation)
-
-# Check that the input files had the same list of reference sequences.
-# If not, it is possible that there were some problems.
-  checkReferenceSequenceLists(v.referenceSequenceList, d.referenceSequenceList) # tools.py
-
-# Close the vcf files.
-  v.closeVcf(options.vcfFile)
-  d.closeVcf(annotationFile)
-
-# End the program.
-  return 0
--- a/tools/vcf_tools/annotate.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-<tool id="vcf_annotate" name="Annotate" version="1.0.0">
-  <description>a VCF file (dbSNP, hapmap)</description>
-  <command interpreter="python">
-    vcfPytools.py
-      annotate
-      --in=$input1
-      #if $annotation_options.annotate == "dbsnp"
-      --dbsnp=$input2
-      #elif $annotation_options.annotate == "hapmap"
-      --hapmap=$input2
-      #end if
-      --out=$output1
-  </command>
-  <inputs>
-    <param name="input1" label="VCF file to annotate" type="data" format="vcf" />
-    <conditional name="annotation_options">
-      <param name="annotate" type="select" label="annotation source">
-        <option value="dbsnp">dbSNP vcf file</option>
-        <option value="hapmap">hapmap vcf file</option>
-      </param>
-      <when value="dbsnp">
-        <param name="input2" label="dbSNP vcf file" type="data" format="vcf" help="This option will annotate the vcf file with dbSNP rsid values.  The input dbSNP file must also be in vcf v4.0 format.  Only dbSNP entries with VC=SNP are included."/>
-      </when>
-      <when value="hapmap">
-        <param name="input2" label="hapmap vcf file" type="data" format="vcf" help="This option will annotate the vcf file info string to include HM3 if the record is included hapmap.  If the ref/alt values do not match the hapmap file, the info string will be populated with HM3A."/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="vcf" name="output1" label="${tool.name} ${on_string}" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="annotate" value="dbsnp" />
-      <param name="input2" value="dbsnp.small.vcf" ftype="vcf" />
-      <output name="output" file="test_annotated_dbsnp.vcf" lines_diff="6" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="annotate" value="hapmap" />
-      <param name="input2" value="hapmap.small.vcf" ftype="vcf" />
-      <output name="output" file="test_annotated_hapmap.vcf" lines_diff="6" ftype="vcf" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool uses vcfPytools_' annotate command annotate a VCF file
-
-.. _vcfPytools: https://github.com/AlistairNWard/vcfPytools
-
-Currently, either a hapmap or a dbsnp file should be provided, not both.
-
-dbSNP option will annotate the VCF file with dbSNP rsid values.  The input dbSNP file must also be in VCF v4.0 format.  Only dbSNP entries with VC=SNP are included.
-
-hapmap option will annotate the VCF file info string to include HM3 if the record is included hapmap.  If the ref/alt values do not match the hapmap file, the info string will be populated with HM3A.
-
-
-  </help>
-</tool>
\ No newline at end of file
--- a/tools/vcf_tools/bedClass.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-
-class bed:
-  def __init__(self):
-    self.numberTargets = 0
-    self.referenceSequences = {}
-    self.referenceSequenceList = []
-
-  def openBed(self, filename):
-    if filename == "stdin": self.filehandle = sys.stdin
-    else:
-      try: self.filehandle = open(filename,"r")
-      except IOError:
-        print >> sys.stderr, "Failed to find file: ",filename
-        exit(1)
-
-# Get a bed record.
-  def getRecord(self):
-    self.record = self.filehandle.readline()
-    if not self.record: return False
-
-    self.numberTargets = self.numberTargets + 1
-    self.ref = ""
-    self.start = 0
-    self.end = 0
-
-# bed file should be 0-based, half-open, so the start coordinate
-# must be that in the bed file plus one.
-    entries = self.record.rstrip("\n").split("\t")
-    self.referenceSequence = entries[0]
-
-# Add the reference sequence to the dictionary.  If it didn't previously
-# exist append the reference sequence to the end of the list as well.
-# This ensures that the order in which the reference sequences appeared
-# in the header can be preserved.
-    if self.referenceSequence not in self.referenceSequences:
-      self.referenceSequences[self.referenceSequence] = True
-      self.referenceSequenceList.append(self.referenceSequence)
-
-    try: self.start = int(entries[1]) + 1
-    except:
-      text = "start position need is not an integer"
-      self.generalError(text, "start", entries[1])
-
-    try: self.end = int(entries[2])
-    except:
-      text = "end position need is not an integer"
-      self.generalError(text, "end", entries[2])
-
-# Check that the record is a valid interval.
-    if self.end - self.start < 0:
-      print >> sys.stderr, "Invalid target interval:\n\t", self.record
-      exit(1)
-
-    return True
-
-# Parse through the bed file until the correct reference sequence is
-# encountered and the end position is greater than or equal to that requested.
-  def parseBed(self, referenceSequence, position):
-    success = True
-    if self.referenceSequence != referenceSequence:
-      while self.referenceSequence != referenceSequence and success: success = self.getRecord()
-
-    while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord()
-
-    return success
-
-# Close the bed file.
-  def closeBed(self, filename):
-    self.filehandle.close()
-
-# Define error messages for different handled errors.
-  def generalError(self, text, field, fieldValue):
-    print >> sys.stderr, "\nError encountered when attempting to read:"
-    if field != "": print >> sys.stderr, "\t", field, ":             ", fieldValue
-    print >> sys.stderr,  "\n", text
-    exit(1)
--- a/tools/vcf_tools/extract.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-import optparse
-
-import vcfClass
-from vcfClass import *
-
-import tools
-from tools import *
-
-if __name__ == "__main__":
-  main()
-
-def main():
-
-# Parse the command line options
-  usage = "Usage: vcfPytools.py extract [options]"
-  parser = optparse.OptionParser(usage = usage)
-  parser.add_option("-i", "--in",
-                    action="store", type="string",
-                    dest="vcfFile", help="input vcf file (stdin for piped vcf)")
-  parser.add_option("-o", "--out",
-                    action="store", type="string",
-                    dest="output", help="output validation file")
-  parser.add_option("-s", "--reference-sequence",
-                    action="store", type="string",
-                    dest="referenceSequence", help="extract records from this reference sequence")
-  parser.add_option("-r", "--region",
-                    action="store", type="string",
-                    dest="region", help="extract records from this region")
-  parser.add_option("-q", "--keep-quality",
-                    action="append", type="string", nargs=2,
-                    dest="keepQuality", help="keep records containing this quality")
-  parser.add_option("-k", "--keep-info",
-                    action="append", type="string",
-                    dest="infoKeep", help="keep records containing this info field")
-  parser.add_option("-d", "--discard-info",
-                    action="append", type="string",
-                    dest="infoDiscard", help="discard records containing this info field")
-  parser.add_option("-p", "--pass-filter",
-                    action="store_true", default=False,
-                    dest="passFilter", help="discard records whose filter field is not PASS")
-
-  (options, args) = parser.parse_args()
-
-# Check that a vcf file is given.
-  if options.vcfFile == None:
-    parser.print_help()
-    print >> sys.stderr, "\nInput vcf file (--in, -i) is required."
-    exit(1)
-
-# Check that either a reference sequence or region is specified,
-# but not both if not dealing with info fields.
-  if not options.infoKeep and not options.infoDiscard and not options.passFilter and not options.keepQuality:
-    if not options.referenceSequence and not options.region:
-      parser.print_help()
-      print >> sys.stderr, "\nA region (--region, -r) or reference sequence (--reference-sequence, -s) must be supplied"
-      print >> sys.stderr, "if not extracting records based on info strings."
-      exit(1)
-  if options.referenceSequence and options.region:
-    parser.print_help()
-    print >> sys.stderr, "\nEither a region (--region, -r) or reference sequence (--reference-sequence, -s) can be supplied, but not both."
-    exit(1)
-
-# If a region was supplied, check the format.
-  if options.region:
-    if options.region.find(":") == -1 or options.region.find("..") == -1:
-      print >> sys.stderr, "\nIncorrect format for region string.  Required: ref:start..end."
-      exit(1)
-    regionList = options.region.split(":",1)
-    referenceSequence = regionList[0]
-    try: start = int(regionList[1].split("..")[0])
-    except ValueError:
-      print >> sys.stderr, "region start coordinate is not an integer"
-      exit(1)
-    try: end = int(regionList[1].split("..")[1])
-    except ValueError:
-      print >> sys.stderr, "region end coordinate is not an integer"
-      exit(1)
-
-# Ensure that discard-info and keep-info haven't both been defined.
-  if options.infoKeep and options.infoDiscard:
-    print >> sys.stderr, "Cannot specify fields to keep and discard simultaneously."
-    exit(1)
-
-# If the --keep-quality argument is used, check that a value and a logical
-# argument are supplied and that the logical argument is valid.
-
-  if options.keepQuality:
-    for value, logic in options.keepQuality:
-      if logic != "eq" and logic != "lt" and logic != "le" and logic != "gt" and logic != "ge":
-        print >> sys.stderr, "Error with --keep-quality (-q) argument.  Must take the following form:"
-        print >> sys.stderr, "\npython vcfPytools extract --in <VCF> --keep-quality <value> <logic>"
-        print >> sys.stderr, "\nwhere logic is one of: eq, le, lt, ge or gt"
-        exit(1)
-    try: qualityValue = float(value)
-    except ValueError:
-      print >> sys.stderr, "Error with --keep-quality (-q) argument.  Must take the following form:"
-      print >> sys.stderr, "Quality value must be an integer or float value."
-      exit(1)
-    qualityLogic = logic
-
-# Set the output file to stdout if no output file was specified.
-  outputFile, writeOut = setOutput(options.output)
-
-  v = vcf() # Define vcf object.
-
-# Set process info to True if info strings need to be parsed.
-  if options.infoKeep or options.infoDiscard: v.processInfo = True
-
-# Open the file.
-  v.openVcf(options.vcfFile)
-
-# Read in the header information.
-  v.parseHeader(options.vcfFile, writeOut)
-  taskDescriptor = "##vcfPytools=extract data"
-  writeHeader(outputFile, v, False, taskDescriptor) # tools.py
-
-# Read through all the entries and write out records in the correct
-# reference sequence.
-  while v.getRecord():
-    writeRecord = True
-    if options.referenceSequence and v.referenceSequence != options.referenceSequence: writeRecord = False
-    elif options.region:
-      if v.referenceSequence != referenceSequence: writeRecord = False
-      elif v.position < start or v.position > end: writeRecord = False
-
-# Only consider these fields if the record is contained within the
-# specified region.
-    if options.infoKeep and writeRecord:
-      for tag in options.infoKeep:
-        if v.infoTags.has_key(tag):
-          writeRecord = True
-          break
-        if not v.infoTags.has_key(tag): writeRecord = False
-    if options.infoDiscard and writeRecord:
-      for tag in options.infoDiscard:
-        if v.infoTags.has_key(tag): writeRecord = False
-    if options.passFilter and v.filters != "PASS" and writeRecord: writeRecord = False
-    if options.keepQuality:
-      if qualityLogic == "eq" and v.quality != qualityValue: writeRecord = False
-      if qualityLogic == "le" and v.quality > qualityValue: writeRecord = False
-      if qualityLogic == "lt" and v.quality >= qualityValue: writeRecord = False
-      if qualityLogic == "ge" and v.quality < qualityValue: writeRecord = False
-      if qualityLogic == "gt" and v.quality <= qualityValue: writeRecord = False
-
-    if writeRecord: outputFile.write(v.record)
-
-# Close the file.
-  v.closeVcf(options.vcfFile)
-
-# Terminate the program cleanly.
-  return 0
--- a/tools/vcf_tools/extract.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-<tool id="vcf_extract" name="Extract" version="1.0.0">
-  <description>reads from a specified region</description>
-  <command interpreter="python">
-    vcfPytools.py
-      extract
-      --in=$input1
-      --out=$output1
-      #if $reference_sequence.value.strip()
-        --reference-sequence=$reference_sequence
-      #end if
-      #if $region.value.strip()
-        --region=$region
-      #end if
-      #if $keep_quality.value.strip()
-        --keep-quality=$keep_quality
-      #end if
-      #if $keep_info.value.strip()
-      --keep-info=$keep_info
-      #end if
-      #if $discard_info.value.strip()
-        --discard-info=$discard_info
-      #end if
-      $pass_filter
-  </command>
-  <inputs>
-    <param name="input1" label="VCF file" type="data" format="vcf" />
-    <param name="reference_sequence" label="Extract records from this reference sequence" type="text" value='' />
-    <param name="region" label="Extract records from this region" type="text" value='' help="The format of the region is ref:start..end, where the start and end coordinates are 1-based"/>
-    <param name="keep_quality" label="Keep records containing this quality" type="text" value='' help="This requires two arguments: the quality value and a logical operator (eq - equals, le - less than or equal to, lt - less than, ge - greater than or equal to , gt - greater than) to determine which records to keep.  For example: '90 ge' will retain all records that have a quality of 90 or greater"/>
-    <param name="keep_info" label="Keep records containing this info field" type="text" value='' />
-    <param name="discard_info" label="Discard records containing this info field" type="text" value='' />
-    <param name="pass_filter" label="Discard records whose filter field is not PASS" type="boolean" truevalue="--pass-filter" falsevalue="" checked="False"/>
-  </inputs>
-  <tests>
-    <test>
-      <param name="input1" value="test_filter_quality_9_DP_2000_lt.vcf" ftype="vcf" />
-      <param name="reference_sequence" value='' />
-	  <param name="region" value='' />
-	  <param name="keep_quality" value='' />
-	  <param name="keep_info" value='' />
-	  <param name="discard_info" value='' />
-	  <param name="pass_filter" value='true' />
-      <output name="output" file="test_extract_pass_filter_quality_9_DP_2000_lt.vcf" lines_diff="6" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="reference_sequence" value='' />
-      <param name="region" value='20:80000..100000' />
-      <param name="keep_quality" value='' />
-      <param name="keep_info" value='' />
-      <param name="discard_info" value='' />
-      <param name="pass_filter" value='false' />
-      <output name="output" file="test_extract_region_80000_100000.vcf" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="reference_sequence" value='' />
-      <param name="region" value='' />
-      <param name="keep_quality" value='90 ge' />
-      <param name="keep_info" value='' />
-      <param name="discard_info" value='' />
-      <param name="pass_filter" value='false' />
-      <output name="output" file="test_extract_quality_90_ge.vcf" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="reference_sequence" value='' />
-      <param name="region" value='' />
-      <param name="keep_quality" value='' />
-      <param name="keep_info" value='TV' />
-      <param name="discard_info" value='' />
-      <param name="pass_filter" value='false' />
-      <output name="output" file="test_extract_keep_info_TV.vcf" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="reference_sequence" value='' />
-      <param name="region" value='' />
-      <param name="keep_quality" value='' />
-      <param name="keep_info" value='' />
-      <param name="discard_info" value='TV' />
-      <param name="pass_filter" value='false' />
-      <output name="output" file="test_extract_discard_info_TV.vcf" ftype="vcf" />
-    </test>
-  </tests>
-  <outputs>
-    <data format="vcf" name="output1" label="${tool.name} from ${on_string}" />
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool uses vcfPytools_' extract command to extract reads from a specified region of a VCF file
-
-.. _vcfPytools: https://github.com/AlistairNWard/vcfPytools
-
-Option **Extract records from this reference sequence** outputs all records from the specified reference sequence from the input vcf file into the output vcf file.
-
-Option **Extract records from this region** outputs all records from the specified region from the input vcf file into the output vcf file.  The format of the region is ref:start..end, where the start and end coordinates are 1-based.
-
-Option **Keep records containing this quality** allows only records with specified quality values to be retained.  This requires two arguments: the quality value and a logical operator (eq - equals, le - less than or equal to, lt - less than, ge - greater than or equal to , gt - greater than) to determine which records to keep.  For example: **90 ge** will retain all records that have a quality of 90 or greater.
-
-Option **Keep records containing this info field** allows all records to be removed unless they contain this value in the info field.
-
-Option **Discard records containing this info field** ensures that all records containing this value in the info field will not be included in the output file.  This cannot be used in conjunction with Keep info field to avoid conflict.
-
-Option **Discard records whose filter field is not PASS** will only output records that have the filter field populated with PASS.  All filtered records or records that haven't undergone filtering will be discarded.
-
-
-  </help>
-</tool>
--- a/tools/vcf_tools/filter.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-import optparse
-
-import vcfClass
-from vcfClass import *
-
-import tools
-from tools import *
-
-if __name__ == "__main__":
-  main()
-
-def filterFail(text, file):
-  print >> sys.stderr, text
-  if file != None: os.remove(file)
-  exit(1)
-
-def main():
-
-# Parse the command line options
-  usage = "Usage: vcfPytools.py filter [options]"
-  parser = optparse.OptionParser(usage = usage)
-  parser.add_option("-i", "--in",
-                    action="store", type="string",
-                    dest="vcfFile", help="input vcf file")
-  parser.add_option("-o", "--out",
-                    action="store", type="string",
-                    dest="output", help="output vcf file")
-  parser.add_option("-q", "--quality",
-                    action="store", type="int",
-                    dest="quality", help="filter out SNPs with qualities lower than selected value")
-  parser.add_option("-n", "--info",
-                    action="append", type="string", nargs=3,
-                    dest="infoFilters", help="filter based on entries in the info string")
-  parser.add_option("-r", "--remove-genotypes",
-                    action="store_true", default=False,
-                    dest="removeGeno", help="remove the genotype strings from the vcf file")
-  parser.add_option("-m", "--mark-as-pass",
-                    action="store_true", default=False,
-                    dest="markPass", help="Mark all records as having passed filters")
-
-  (options, args) = parser.parse_args()
-
-# Check that a single vcf file is given.
-  if options.vcfFile == None:
-    parser.print_help()
-    print >> sys.stderr, "\nInput vcf file (-i, --input) is required for vcf filtering."
-    exit(1)
-
-# The --mark-as-pass option can only be used if no actual filters
-# have been specified.
-  if options.markPass and options.infoFilters:
-    print >> sys.stderr, "--mark-as-pass cannot be used in conjunction with filters."
-    exit(1)
-
-# Set the output file to stdout if no output file was specified.
-  outputFile, writeOut = setOutput(options.output) # tools.py
-
-  v = vcf() # Define vcf object.
-
-# Open the vcf file.
-  v.openVcf(options.vcfFile)
-
-# Read in the header information.
-  v.parseHeader(options.vcfFile, writeOut)
-  taskDescriptor = "##vcfPytools="
-  if options.infoFilters:
-    taskDescriptor += "filtered using the following filters: "
-    for filter, value, logic in options.infoFilters: taskDescriptor += str(filter) + str(value) + ","
-    taskDescriptor = taskDescriptor.rstrip(",")
-  if options.markPass: taskDescriptor += "marked all records as PASS"
-
-  writeHeader(outputFile, v, options.removeGeno, taskDescriptor)
-
-# Check that specified filters from the info field are either integers or floats.
-  if options.infoFilters:
-    v.processInfo = True # Process the info string
-    filters = {}
-    filterValues = {}
-    filterLogic = {}
-    for filter, value, logic in options.infoFilters:
-      filterName = str(filter) + str(value)
-      if "-" in filter or "-" in value or "-" in logic:
-        print >> sys.stderr, "\n--info (-n) requires three arguments, for example:"
-        print >> sys.stderr, "\t--info DP 5 lt: filter records with DP less than (lt) 5.\n"
-        print >> sys.stderr, "allowed logic arguments:\n\tgt: greater than\n\tlt: less than."
-        print >> sys.stderr, "\nError in:", filter
-        exit(1)
-      if logic != "gt" and logic != "lt":
-        print >> sys.stderr, "\nfilter logic not recognised."
-        print >> sys.stderr, "allowed logic arguments:\n\tgt: greater than\n\tlt: less than."
-        print >> sys.stderr, "\nError in:", filter
-        exit(1)
-      if v.infoHeaderTags.has_key(filter):
-        if v.infoHeaderTags[filter][1].lower() == "integer":
-          try:
-            filters[filterName] = filter
-            filterValues[filterName] = int(value)
-            filterLogic[filterName] = logic
-            #filterLogic[filterName] = logic
-          except ValueError:
-            text = "Filter " + filter + " requires an integer entry, not " + str(type(value))
-            filterFail(text, options.output)
-
-        if v.infoHeaderTags[filter][1].lower() == "float":
-          try:
-            filters[filterName] = filter
-            filterValues[filterName] = float(value)
-            filterLogic[filterName] = logic
-            #filters[filterName] = float(value)
-            #filterLogic[filterName] = logic
-          except ValueError:
-            text = "Filter " + filter + " requires an float entry, not " + str(type(value))
-            filterFail(text, options.output)
-
-      else:
-        text = "Filter " + filter + " has no explanation in the header.  Unknown type for the entry."
-        filterFail(text, options.output)
-
-# Parse the vcf file and check if any of the filters are failed.  If
-# so, build up a string of failed filters.
-  while v.getRecord():
-    filterString = ""
-
-# Mark the record as "PASS" if --mark-as-pass was applied.
-    if options.markPass: v.filters = "PASS"
-
-# Check for quality filtering.
-    if options.quality != None:
-      if v.quality < options.quality:
-        filterString = filterString + ";" + "Q" + str(options.quality) if filterString != "" else "Q" + str(options.quality)
-
-# Check for filtering on info string filters.
-    if options.infoFilters:
-      for filterName, filter in filters.iteritems():
-        value = filterValues[filterName]
-        logic = filterLogic[filterName]
-        if v.infoTags.has_key(filter):
-          if type(value) == int:
-            if logic == "lt" and int(v.infoTags[filter]) < value:
-              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
-            if logic == "gt" and int(v.infoTags[filter]) > value:
-              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
-          elif type(value) == float:
-            if logic == "lt" and float(v.infoTags[filter]) < value:
-              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
-            if logic == "gt" and float(v.infoTags[filter]) > value:
-              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
-
-    filterString = "PASS" if filterString == "" else filterString
-    v.filters = filterString
-    record = v.buildRecord(options.removeGeno)
-    outputFile.write(record)
-
-# Close the vcf files.
-  v.closeVcf(options.vcfFile)
-
-# Terminate the program.
-  return 0
--- a/tools/vcf_tools/filter.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-<tool id="vcf_filter" name="Filter" version="1.0.0">
-  <description>a VCF file</description>
-  <command interpreter="python">
-    vcfPytools.py
-      filter
-      --in=$input1
-      --out=$output1
-      --quality=$quality
-      #for $i in $info_filter:
-        --info ${i.info}
-      #end for
-      $remove_genotypes
-      $mark_as_pass
-  </command>
-  <inputs>
-    <param name="input1" label="VCF file" type="data" format="vcf" />
-    <param name="quality" label="Filter by quality" type="integer" value='' help="Filter out SNPs with qualities lower than selected value" />
-    <repeat name="info_filter" title="Filter based on entries in the info string">
-      <param name="info" label="Filter" type="text" value='' help='This option takes three values: the info string tag, the cutoff value and whether to filter out those records with less than (lt) or greater than (gt) this value.  For example: DP 10 lt ' />
-    </repeat>
-    <param name="remove_genotypes" label="Remove the genotype strings" type="boolean" truevalue="--remove-genotypes" falsevalue="" checked="False" />
-    <param name="mark_as_pass" label="Mark all records as having passed filters" type="boolean" truevalue="--mark-as-pass" falsevalue="" checked="False" />
-  </inputs>
-  <tests>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="quality" value="9" />
-      <param name="info" value="NS 360 gt"/>
-      <param name="remove_genotypes" value="" />
-      <param name="mark_as_pass" value="" />
-      <output name="output" file="test_filter_quality_9_NS_360_gt.vcf" lines_diff="6" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="test.small.vcf" ftype="vcf" />
-      <param name="quality" value="9" />
-      <param name="info" value="DP 2000 lt"/>
-      <param name="remove_genotypes" value="" />
-      <param name="mark_as_pass" value="" />
-      <output name="output" file="test_filter_quality_9_DP_2000_lt.vcf" lines_diff="6" ftype="vcf" />
-    </test>
-  </tests>
-  <outputs>
-    <data format="vcf" name="output1" label="${tool.name} ${on_string}" />
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool uses vcfPytools_' filter command
-
-.. _vcfPytools: https://github.com/AlistairNWard/vcfPytools
-
-Quality option will check the variant quality for each record and if it is below the defined value, the filter field will be populated with the filter entry Q[value].
-
-Any value in the info string can be used for filtering by using the 'Filter by info' option.  This option takes three values: the info string tag, the cutoff value and whether to filter out those records with less than (lt) or greater than (gt) this value.  For example:
-
-  DP 10 lt
-
-would filter out all varianta with a depth (DP) less than 10 and the filter field would be populated with DP10.
-
-This option can be defined as many times as required.
-
-  </help>
-</tool>
--- a/tools/vcf_tools/intersect.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-import optparse
-
-import bedClass
-from bedClass import *
-
-import vcfClass
-from vcfClass import *
-
-import tools
-from tools import *
-
-if __name__ == "__main__":
-  main()
-
-# Intersect two vcf files.  It is assumed that the two files are
-# sorted by genomic coordinates and the reference sequences are
-# in the same order.
-def intersectVcf(v1, v2, priority, outputFile):
-  success1 = v1.getRecord()
-  success2 = v2.getRecord()
-  currentReferenceSequence = v1.referenceSequence
-
-# As soon as the end of either file is reached, there can be no
-# more intersecting SNPs, so terminate.
-  while success1 and success2:
-    if v1.referenceSequence == v2.referenceSequence and v1.referenceSequence == currentReferenceSequence:
-      if v1.position == v2.position:
-        writeVcfRecord(priority, v1, v2, outputFile)
-        success1 = v1.getRecord()
-        success2 = v2.getRecord()
-      elif v2.position > v1.position: success1 = v1.parseVcf(v2.referenceSequence, v2.position, False, None)
-      elif v1.position > v2.position: success2 = v2.parseVcf(v1.referenceSequence, v1.position, False, None)
-    else:
-      if v1.referenceSequence == currentReferenceSequence: success1 = v1.parseVcf(v2.referenceSequence, v2.position, False, None)
-      elif v2.referenceSequence == currentReferenceSequence: success2 = v2.parseVcf(v1.referenceSequence, v1.position, False, None)
-
-# If the last record for a reference sequence is the same for both vcf
-# files, they will both have referenceSequences different from the
-# current reference sequence.  Change the reference sequence to reflect
-# this and proceed.
-      else:
-        if v1.referenceSequence != v2.referenceSequence:
-          print >> sys.stderr, "ERROR: Reference sequences for both files are unexpectedly different."
-          print >> sys.stderr, "Check that both files contain records for the following reference sequences:"
-          print >> sys.stderr, "\t", v1.referenceSequence, " and ", v2.referenceSequence
-          exit(1)
-      currentReferenceSequence = v1.referenceSequence
-
-# Intersect a vcf file and a bed file.  It is assumed that the
-# two files are sorted by genomic coordinates and the reference
-# sequences are in the same order.
-def intersectVcfBed(v, b, outputFile):
-  successb = b.getRecord()
-  successv = v.getRecord()
-  currentReferenceSequence = v.referenceSequence
-
-# As soon as the end of the first file is reached, there are no
-# more intersections and the program can terminate.
-  while successv:
-    if v.referenceSequence == b.referenceSequence:
-      if v.position < b.start: successv = v.parseVcf(b.referenceSequence, b.start, False, None)
-      elif v.position > b.end: successb = b.parseBed(v.referenceSequence, v.position)
-      else:
-        outputFile.write(v.record)
-        successv = v.getRecord()
-    else:
-      if v.referenceSequence == currentReferenceSequence: successv = v.parseVcf(b.referenceSequence, b.start, False, None)
-      if b.referenceSequence == currentReferenceSequence: successb = b.parseBed(v.referenceSequence, v.position)
-      currentReferenceSequence = v.referenceSequence
-
-def main():
-
-# Parse the command line options
-  usage = "Usage: vcfPytools.py intersect [options]"
-  parser = optparse.OptionParser(usage = usage)
-  parser.add_option("-i", "--in",
-                    action="append", type="string",
-                    dest="vcfFiles", help="input vcf files")
-  parser.add_option("-b", "--bed",
-                    action="store", type="string",
-                    dest="bedFile", help="input bed vcf file")
-  parser.add_option("-o", "--out",
-                    action="store", type="string",
-                    dest="output", help="output vcf file")
-  parser.add_option("-f", "--priority-file",
-                    action="store", type="string",
-                    dest="priorityFile", help="output records from this vcf file")
-
-  (options, args) = parser.parse_args()
-
-# Check that a single  vcf file is given.
-  if options.vcfFiles == None:
-    parser.print_help()
-    print >> sys.stderr, "\nAt least one vcf file (--in, -i) is required for performing intersection."
-    exit(1)
-  elif len(options.vcfFiles) > 2:
-    parser.print_help()
-    print >> sys.stderr, "\nAt most, two vcf files (--in, -i) can be submitted for performing intersection."
-    exit(1)
-  elif len(options.vcfFiles) == 1 and not options.bedFile:
-    parser.print_help()
-    print >> sys.stderr, "\nIf only one vcf file (--in, -i) is specified, a bed file is also required for performing intersection."
-    exit(1)
-
-# Set the output file to stdout if no output file was specified.
-  outputFile, writeOut = setOutput(options.output) # tools.py
-
-# If intersecting with a bed file, call the bed intersection routine.
-  if options.bedFile:
-    v = vcf() # Define vcf object.
-    b = bed() # Define bed object.
-
-# Open the files.
-    v.openVcf(options.vcfFiles[0])
-    b.openBed(options.bedFile)
-
-# Read in the header information.
-    v.parseHeader(options.vcfFiles[0], writeOut)
-    taskDescriptor = "##vcfPytools=intersect " + options.vcfFiles[0] + ", " + options.bedFile
-    writeHeader(outputFile, v, False, taskDescriptor) # tools.py
-
-# Intersect the vcf file with the bed file.
-    intersectVcfBed(v, b, outputFile)
-
-# Check that the input files had the same list of reference sequences.
-# If not, it is possible that there were some problems.
-    checkReferenceSequenceLists(v.referenceSequenceList, b.referenceSequenceList) # tools.py
-
-# Close the files.
-    v.closeVcf(options.vcfFiles[0])
-    b.closeBed(options.bedFile)
-
-  else:
-    priority = setVcfPriority(options.priorityFile, options.vcfFiles)
-    v1 = vcf() # Define vcf object.
-    v2 = vcf() # Define vcf object.
-
-# Open the vcf files.
-    v1.openVcf(options.vcfFiles[0])
-    v2.openVcf(options.vcfFiles[1])
-
-# Read in the header information.
-    v1.parseHeader(options.vcfFiles[0], writeOut)
-    v2.parseHeader(options.vcfFiles[1], writeOut)
-    if priority == 3:
-      v3 = vcf() # Generate a new vcf object that will contain the header information of the new file.
-      mergeHeaders(v1, v2, v3) # tools.py
-      v1.processInfo = True
-      v2.processInfo = True
-    else: checkDataSets(v1, v2)
-
-    #print v1.samplesList
-    #print v2.samplesList
-
-# Check that the header for the two files contain the same samples.
-    if v1.samplesList != v2.samplesList:
-      print >> sys.stderr, "vcf files contain different samples (or sample order)."
-      exit(1)
-    else:
-      taskDescriptor = "##vcfPytools=intersect " + v1.filename + ", " + v2.filename
-      if priority == 3: writeHeader(outputFile, v3, False, taskDescriptor)
-      elif (priority == 2 and v2.hasHeader) or not v1.hasHeader: writeHeader(outputFile, v2, False, taskDescriptor) # tools.py
-      else: writeHeader(outputFile, v1, False, taskDescriptor) # tools.py
-
-# Intersect the two vcf files.
-    intersectVcf(v1, v2, priority, outputFile)
-
-# Check that the input files had the same list of reference sequences.
-# If not, it is possible that there were some problems.
-    checkReferenceSequenceLists(v1.referenceSequenceList, v2.referenceSequenceList) # tools.py
-
-# Close the vcf files.
-    v1.closeVcf(options.vcfFiles[0])
-    v2.closeVcf(options.vcfFiles[1])
-
-# End the program.
-  return 0
--- a/tools/vcf_tools/intersect.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-<tool id="vcf_intersect" name="Intersect" version="1.0.0">
-  <description>Generate the intersection of two VCF files</description>
-  <command interpreter="python">
-    vcfPytools.py
-      intersect
-      --in=$input1
-      #if $format_type.format == "vcf"
-      --in=$input2
-      #elif $format_type.format == "bed"
-      --bed=$input2
-      #end if
-      #if $priority_file.value == "first_file"
-      --priority-file=$input1
-      #elif $priority_file.value == "second_file"
-      --priority-file=$input2
-      #end if
-      --out=$output1
-  </command>
-  <inputs>
-    <param name="input1" label="First VCF file" type="data" format="vcf" />
-    <conditional name="format_type">
-      <param name="format" type="select" label="intersect with file of format">
-        <option value="vcf">VCF</option>
-        <option value="bed">BED</option>
-      </param>
-      <when value="vcf">
-        <param name="input2" label="second VCF file" type="data" format="vcf"/>
-      </when>
-      <when value="bed">
-        <param name="input2" label="second BED file" type="data" format="bed"/>
-      </when>
-    </conditional>
-    <param name="priority_file" type="select" label="Priority file" help="If the priority file argument is set (this must be equal to one of the input vcf files), then the record written to the output will come from this file.  If this argument is not set, the record with the highest quality is written out.">
-      <option value="none">None</option>
-      <option value="first_file">First file</option>
-      <option value="second_file">Second file</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="vcf" name="output1" label="${tool.name} on ${on_string}" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1" value="1.vcf" ftype="vcf" />
-      <param name="format" value="vcf" />
-      <param name="input2" value="2.vcf" ftype="vcf" />
-      <param name="priority_file" value="none" />
-      <output name="output" file="1_2_intersect_priority_0.vcf" lines_diff="2" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="1.vcf" ftype="vcf" />
-      <param name="format" value="vcf" />
-      <param name="input2" value="2.vcf" ftype="vcf" />
-      <param name="priority_file" value="first_file" />
-      <output name="output" file="1_2_intersect_priority_1.vcf" lines_diff="2" ftype="vcf" />
-    </test>
-    <test>
-      <param name="input1" value="1.vcf" ftype="vcf" />
-      <param name="format" value="vcf" />
-      <param name="input2" value="2.vcf" ftype="vcf" />
-      <param name="priority_file" value="second_file" />
-      <output name="output" file="1_2_intersect_priority_2.vcf" lines_diff="2" ftype="vcf" />
-    </test>
-  </tests>
-  <help>
-
-**What it does**
-
-This tool uses vcfPytools_' intersect command to generate the intersection of two VCF files
-
-.. _vcfPytools: https://github.com/AlistairNWard/vcfPytools
-
-Two input files are required as input and the intersection of these two files is generated and sent to the output.  These files must be sorted by genomic coordinate to function correctly, although the reference sequence order is no important.
-
-The intersection can be calculated on two VCF files or a VCF and a BED file.
-
-If the priority file argument is set (this must be equal to one of the input VCF files), then the record written to the output will come from this file.  If this argument is not set, the record with the highest quality is written out.
-
-  </help>
-</tool>
--- a/tools/vcf_tools/tools.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,188 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-import vcfPytools
-from vcfPytools import __version__
-
-# Determine whether to output to a file or stdout.
-def setOutput(output):
-  if output == None:
-    outputFile = sys.stdout
-    writeOut = False
-  else:
-    output = os.path.abspath(output)
-    outputFile = open(output, 'w')
-    writeOut = True
-
-  return outputFile, writeOut
-
-# Determine which file has priority for writing out records.
-def setVcfPriority(priorityFile, vcfFiles):
-  if priorityFile == None: priority = 0
-  elif priorityFile == vcfFiles[0]: priority = 1
-  elif priorityFile == vcfFiles[1]: priority = 2
-  elif priorityFile.lower() == "merge": priority = 3
-  else:
-    print >> sys.stderr, "vcf file give priority must be one of the two input vcf files or merge."
-    exit(1)
-
-  return priority
-
-# If the union or intersection of two vcf files is being performed
-# and the output vcf file is to contain the information from both
-# files, the headers need to be merged to ensure that all info and
-# format entries have an explanation.
-def mergeHeaders(v1, v2, v3):
-
-# If either file does not have a header, terminate the program.
-# In order to merge the headers, the different fields must be
-# checked to ensure the files are compatible.
-  if not v1.hasHeader or not v2.hasHeader:
-    print >> sys.stderr, "Both vcf files must have a header in order to merge data sets."
-    exit(1)
-
-  v3.infoHeaderTags = v1.infoHeaderTags.copy()
-  v3.formatHeaderTags = v1.formatHeaderTags.copy()
-  v3.numberDataSets = v1.numberDataSets
-  v3.includedDataSets = v1.includedDataSets.copy()
-  v3.headerText = v1.headerText
-  v3.headerTitles = v1.headerTitles
-  v3.infoHeaderString = v1.infoHeaderString.copy()
-  v3.formatHeaderString = v1.formatHeaderString.copy()
-
-# Merge the info field descriptions.
-  for tag in v2.infoHeaderTags:
-    if v1.infoHeaderTags.has_key(tag):
-      if v1.infoHeaderTags[tag][0] != v2.infoHeaderTags[tag][0] or \
-         v1.infoHeaderTags[tag][1] != v2.infoHeaderTags[tag][1]:
-        print v1.infoHeaderTags[tag][0]
-        print v1.infoHeaderTags[tag][1]
-        print v1.infoHeaderTags[tag][2]
-        print >> sys.stderr, "Input vcf files have different definitions for " + tag + " field."
-        exit(1)
-    else: v3.infoHeaderTags[tag] = v2.infoHeaderTags[tag]
-
-# Merge the format field descriptions.
-  for tag in v2.formatHeaderTags:
-    if v1.formatHeaderTags.has_key(tag):
-      if v1.formatHeaderTags[tag][0] != v2.formatHeaderTags[tag][0] or \
-         v1.formatHeaderTags[tag][1] != v2.formatHeaderTags[tag][1]:
-        print >> sys.stderr, "Input vcf files have different definitions for " + tag + " field."
-        exit(1)
-    else: v3.formatHeaderTags[tag] = v2.formatHeaderTags[tag]
-
-# Now check to see if the vcf files contain information from multiple
-# records themselves and create an ordered list in which the data
-# will appear in the file.  For instance, of the first file has
-# already got two sets of data and is being intersected with a file
-# with one set of data, the order of data in the new vcf file will be
-# the two sets from the first file followed by the second, e.g.
-# AB=3/2/4, where the 3 and 2 are from the first file and the 4 is the
-# value of AC from the second vcf.  The header will have a ##FILE for
-# each of the three files, so the origin if the data can be recovered.
-  if v1.numberDataSets == 0:
-    v3.includedDataSets[v3.numberDataSets + 1] = v1.filename
-    v3.numberDataSets += 1
-  if v2.numberDataSets == 0:
-    v3.includedDataSets[v3.numberDataSets + 1] = v2.filename
-    v3.numberDataSets += 1
-  else:
-    for i in range(1, v2.numberDataSets + 1):
-      v3.includedDataSets[v3.numberDataSets + 1] = v2.includedDataSets[i]
-      v3.numberDataSets += 1
-
-# If either of the input files contain multiple data sets (e.g. multiple
-# vcf files have undergone intersection or union calculations and all
-# information has been retained) and the priority isn't set to 'merge',
-# terminate the program.  This is to ensure that the origin of the data
-# doesn't get confused.
-def checkDataSets(v1, v2):
-  if v1.numberDataSets + v2.numberDataSets != 0:
-    print >> sys.stderr, "\nERROR:"
-    print >> sys.stderr, "input vcf file(s) contain data sets from multiple vcf files."
-    print >> sys.stderr, "Further intersection or union operations must include --priority-file merge"
-    print >> sys.stderr, "Other tools may be incompatible with this format."
-    exit(1)
-
-# Write the header to file.
-def writeHeader (outputFile, v, removeGenotypes, taskDescriptor):
-  if not v.hasHeader:
-    v.headerText = "##fileformat=VCFv4.0\n##source=vcfPytools " + __version__ + "\n"
-    v.headerTitles = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n"
-  outputFile.write(v.headerText) if v.headerText != "" else None
-  print >> outputFile, taskDescriptor
-  for tag in v.infoHeaderString: print >> outputFile, v.infoHeaderString[tag]
-  for tag in v.formatHeaderString: print >> outputFile, v.formatHeaderString[tag]
-
-# Write out a list of files indicating which data set belongs to which file.
-  if v.numberDataSets != 0:
-    for i in range(1, v.numberDataSets + 1):
-      print >> outputFile, "##FILE=<ID=" + str(i) + ",\"" + v.includedDataSets[i] + "\">"
-
-  if removeGenotypes:
-    line = v.headerTitles.rstrip("\n").split("\t")
-    newHeaderTitles = line[0]
-    for i in range(1,8):
-      newHeaderTitles = newHeaderTitles + "\t" + line[i]
-    newHeaderTitles = newHeaderTitles + "\n"
-    outputFile.write( newHeaderTitles )
-  else:
-    outputFile.write( v.headerTitles )
-
-# Check that the two reference sequence lists are identical.
-# If there are a different number or order, the results may
-# not be as expected.
-def checkReferenceSequenceLists(list1, list2):
-  errorMessage = False
-  if len(list1) != len(list2):
-    print >> sys.stderr, "WARNING: Input files contain a different number of reference sequences."
-    errorMessage = True
-  elif list1 != list2:
-    print >> sys.stderr, "WARNING: Input files contain different or differently ordered reference sequences."
-    errorMessage = True
-  if errorMessage:
-    print >> sys.stderr, "Results may not be as expected."
-    print >> sys.stderr, "Ensure that input files have the same reference sequences in the same order."
-    print >> sys.stderr, "Reference sequence lists observed were:\n\t", list1, "\n\t", list2
-
-# Write out a vcf record to file.  The record written depends on the
-# value of 'priority' and could therefore be the record from either
-# of the vcf files, or a combination of them.
-
-def writeVcfRecord(priority, v1, v2, outputFile):
-  if priority == 0:
-    if v1.quality >= v2.quality: outputFile.write(v1.record)
-    else: outputFile.write(v2.record)
-  elif priority == 1: outputFile.write(v1.record)
-  elif priority == 2: outputFile.write(v2.record)
-  elif priority == 3:
-
-# Define the missing entry values (depends on the number of data sets
-# in the file).
-    info = ""
-    missingEntry1 = missingEntry2 = "."
-    for i in range(1, v1.numberDataSets): missingEntry1 += "/."
-    for i in range(1, v2.numberDataSets): missingEntry2 += "/."
-    secondList = v2.infoTags.copy()
-
-# Build up the info field.
-    for tag in v1.infoTags:
-      if secondList.has_key(tag):
-        if v1.infoHeaderTags[tag][1].lower() != "flag": info += tag + "=" + v1.infoTags[tag] + "/" + v2.infoTags[tag] + ";"
-        del secondList[tag]
-      else:
-        if v1.infoHeaderTags[tag][1].lower() != "flag": info += tag + "=" + v1.infoTags[tag] + "/" + missingEntry2 + ";"
-
-# Now include the info tags that are not populated in the first vcf file.
-    for tag in secondList:
-      if v2.infoHeaderTags[tag][1].lower() != "flag": info += tag + "=" + missingEntry1 + "/" + v2.infoTags[tag] + ";"
-
-# Build the complete record.
-    info = info.rstrip(";")
-    record = v1.referenceSequence + "\t" + str(v1.position) + "\t" + v1.rsid + "\t" + v1.ref + "\t" + \
-             v1.alt + "/" + v2.alt + "\t" + v1.quality + "/" + v2.quality + "\t.\t" + info
-    print >> outputFile, record
-  else:
-    print >> sys.sterr, "Unknown file priority."
-    exit(1)
--- a/tools/vcf_tools/vcfClass.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,422 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-import re
-
-class vcf:
-  def __init__(self):
-
-# Header info.
-    self.filename = ""
-    self.hasHeader = True
-    self.headerText = ""
-    self.headerTitles = ""
-    #self.headerInfoText = ""
-    #self.headerFormatText = ""
-
-# Store the info and format tags as well as the lines that describe
-# them in a dictionary.
-    self.numberDataSets = 0
-    self.includedDataSets = {}
-    self.infoHeaderTags = {}
-    self.infoHeaderString = {}
-    self.formatHeaderTags = {}
-    self.formatHeaderString = {}
-
-# Genotype information.
-    self.genotypes = False
-    self.infoField = {}
-
-# Reference sequence information.
-    self.referenceSequences = {}
-    self.referenceSequenceList = []
-    self.referenceSequence = ""
-
-# Record information.
-    self.position = -1
-    self.samplesList = []
-
-# Determine which fields to process.
-    self.processInfo = False
-    self.processGenotypes = False
-    self.dbsnpVcf = False
-    self.hapmapVcf = False
-
-# Open a vcf file.
-  def openVcf(self, filename):
-    if filename == "stdin":
-      self.filehandle = sys.stdin
-      self.filename = "stdin"
-    else:
-      try: self.filehandle = open(filename,"r")
-      except IOError:
-        print >> sys.stderr, "Failed to find file: ",filename
-        exit(1)
-      self.filename = os.path.abspath(filename)
-
-# Parse the vcf header.
-  def parseHeader(self, filename, writeOut):
-    while self.getHeaderLine(filename, writeOut):
-      continue
-
-# Determine the type of information in the header line.
-  def getHeaderLine(self, filename, writeOut):
-    self.headerLine = self.filehandle.readline().rstrip("\n")
-    if self.headerLine.startswith("##INFO"): success = self.headerInfo(writeOut, "info")
-    elif self.headerLine.startswith("##FORMAT"): success = self.headerInfo(writeOut, "format")
-    elif self.headerLine.startswith("##FILE"): success = self.headerFiles(writeOut)
-    elif self.headerLine.startswith("##"): success = self.headerAdditional()
-    elif self.headerLine.startswith("#"): success = self.headerTitleString(filename, writeOut)
-    else: success = self.noHeader(filename, writeOut)
-
-    return success
-
-# Read information on an info field from the header line.
-  def headerInfo(self, writeOut, lineType):
-    tag = self.headerLine.split("=",1)
-    tagID = (tag[1].split("ID=",1))[1].split(",",1)
-
-# Check if this info field has already been defined.
-    if (lineType == "info" and self.infoHeaderTags.has_key(tagID[0])) or (lineType == "format" and self.formatHeaderTags.has_key(tagID[0])):
-      print >> sys.stderr, "Info tag \"", tagID[0], "\" is defined multiple times in the header."
-      exit(1)
-
-# Determine the number of entries, entry type and description.
-    tagNumber = (tagID[1].split("Number=",1))[1].split(",",1)
-    tagType = (tagNumber[1].split("Type=",1))[1].split(",",1)
-    try: tagDescription = ( ( (tagType[1].split("Description=\"",1))[1] ).split("\">") )[0]
-    except IndexError: tagDescription = ""
-    tagID = tagID[0]; tagNumber = tagNumber[0]; tagType = tagType[0]
-
-# Check that the number of fields associated with the tag is either
-# an integer or a '.' to indicate variable number of entries.
-    if tagNumber == ".": tagNumber = "variable"
-    else:
-      try: tagNumber = int(tagNumber)
-      except ValueError:
-        print >> sys.stderr, "\nError parsing header.  Problem with info tag:", tagID
-        print >> sys.stderr, "Number of fields associated with this tag is not an integer or '.'"
-        exit(1)
-
-    if lineType == "info":
-      self.infoHeaderTags[tagID] = tagNumber, tagType, tagDescription
-      self.infoHeaderString[tagID] = self.headerLine
-    if lineType == "format":
-      self.formatHeaderTags[tagID] = tagNumber, tagType, tagDescription
-      self.formatHeaderString[tagID] = self.headerLine
-
-    return True
-
-# Check to see if the records contain information from multiple different
-# sources.  If vcfPytools has been used to find the intersection or union
-# of two vcf files, the records may have been merged to keep all the
-# information available.  If this is the case, there will be a ##FILE line
-# for each set of information in the file.  The order of these files needs
-# to be maintained.
-  def headerFiles(self, writeOut):
-    fileID = (self.headerLine.split("ID=",1))[1].split(",",1)
-    filename = fileID[1].split("\"",2)[1]
-    try: fileID = int(fileID[0])
-    except ValueError:
-      print >> sys.stderr, "File ID in ##FILE entry must be an integer."
-      print >> sys.stderr, self.headerLine
-      exit(1)
-    if self.includedDataSets.has_key(fileID):
-      print >> sys.stderr, "\nERROR: file " + self.filename
-      print >> sys.stderr, "Multiple files in the ##FILE list have identical ID values."
-      exit(1)
-    self.includedDataSets[fileID] = filename
-
-# Set the number of files with information in this vcf file.
-    if fileID > self.numberDataSets: self.numberDataSets = fileID
-
-    return True
-
-# Read additional information contained in the header.
-  def headerAdditional(self):
-    self.headerText += self.headerLine + "\n"
-
-    return True
-
-# Read in the column titles to check that all standard fields
-# are present and read in all the samples.
-  def headerTitleString(self, filename, writeOut):
-    self.headerTitles = self.headerLine + "\n"
-
-# Strip the end of line character from the last infoFields entry.
-    infoFields = self.headerLine.split("\t")
-    if len(infoFields) > 8:
-#      if len(infoFields) - 9 == 1 and writeOut: print >> sys.stdout, len(infoFields) - 9, " sample present in vcf file: ", filename
-#      elif writeOut: print >> sys.stdout, len(infoFields) - 9, " samples present in vcf file: ", filename
-      self.samplesList = infoFields[9:]
-      self.genotypes = True
-    elif len(infoFields) == 8:
-      if writeOut: print >> sys.stdout, "No samples present in the header.  No genotype information available."
-    else:
-      print self.headerLine, len(infoFields)
-      print >> sys.stderr, "Not all vcf standard fields are available."
-      exit(1)
-
-    return False
-
-# If there is no header in the vcf file, close and reopen the
-# file so that the first line is avaiable for parsing as a
-# vcf record.
-  def noHeader(self, filename, writeOut):
-    if writeOut: print >> sys.stdout, "No header lines present in", filename
-    self.hasHeader = False
-    self.closeVcf(filename)
-    self.openVcf(filename)
-
-    return False
-
-# Check that info fields exist.
-  def checkInfoFields(self, tag):
-    if self.infoHeaderTags.has_key(tag) == False:
-      print >> sys.stderr, "Info tag \"", tag, "\" does not exist in the header."
-      exit(1)
-
-# Get the next line of information from the vcf file.
-  def getRecord(self):
-    self.record = self.filehandle.readline()
-    if not self.record: return False
-
-# Set up and execute a regular expression match.
-    recordRe = re.compile(r"^(\S+)\t(\d+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)(\n|\t.+)$")
-    #recordRe = re.compile(r"^(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)(\n|\s+.+)$")
-    recordMatch = recordRe.match(self.record)
-    if recordMatch == None:
-      print >> sys.stderr, "Unable to resolve vcf record.\n"
-      print >> sys.stderr, self.record
-      exit(1)
-
-    self.referenceSequence = recordMatch.group(1)
-    try: self.position = int(recordMatch.group(2))
-    except ValueError:
-      text = "variant position is not an integer"
-      self.generalError(text, "", None)
-    self.rsid       = recordMatch.group(3)
-    self.ref        = recordMatch.group(4)
-    self.alt        = recordMatch.group(5)
-    self.quality    = recordMatch.group(6)
-    self.filters    = recordMatch.group(7)
-    self.info       = recordMatch.group(8)
-    self.genotypeString = recordMatch.group(9)
-    self.infoTags   = {}
-
-# Check that the quality is an integer or a float.  If not, set the quality
-# to zero.
-    try: self.quality = float(self.quality)
-    except ValueError: self.quality = float(0.)
-
-# If recordMatch.group(9) is not the end of line character, there is
-# genotype information with this record.
-    if self.genotypeString != "\n": self.hasGenotypes = True
-    else: self.hasGenotypes = False
-
-# Add the reference sequence to the dictionary.  If it didn't previously
-# exist append the reference sequence to the end of the list as well.
-# This ensures that the order in which the reference sequences appeared
-# in the header can be preserved.
-    if self.referenceSequence not in self.referenceSequences:
-      self.referenceSequences[self.referenceSequence] = True
-      self.referenceSequenceList.append(self.referenceSequence)
-
-# Check for multiple alternate alleles.
-    self.alternateAlleles = self.alt.split(",")
-    self.numberAlternateAlleles = len(self.alternateAlleles)
-
-# If required, process the info and genotypes.
-    if self.processInfo: self.processInfoFields()
-    if self.processGenotypes and self.hasGenotypes: self.processGenotypeFields()
-
-    return True
-
-# Process the info string.
-  def processInfoFields(self):
-
-# First break the info string into its constituent elements.
-    infoEntries = self.info.split(";")
-
-# As long as some info fields exist, place them into a dictionary.
-    for entry in infoEntries:
-      infoEntry = entry.split("=")
-
-# If the entry is a flag, there will be no equals and the length of
-# infoEntry will be 1.  In this case, set the dictionary entry to the
-# whole entry.  If the vcf file has undergone a union or intersection
-# operation and contains the information from multiple files, this may
-# be a '/' seperate list of flags and so cannot be set to a Boolean value
-# yet.
-      if len(infoEntry) == 1: self.infoTags[infoEntry[0]] = infoEntry[0]
-      elif len(infoEntry) > 1: self.infoTags[infoEntry[0]] = infoEntry[1]
-
-# Process the genotype formats and values.
-  def processGenotypeFields(self):
-    genotypeEntries = self.genotypeString.split("\t")
-    self.genotypeFormatString = genotypeEntries[1]
-    self.genotypes = list(genotypeEntries[2:])
-    self.genotypeFormats = {}
-    self.genotypeFields = {}
-    self.genotypeFormats = self.genotypeFormatString.split(":")
-
-# Check that the number of genotype fields is equal to the number of samples
-    if len(self.samplesList) != len(self.genotypes):
-      text = "The number of genotypes is different to the number of samples"
-      self.generalError(text, "", "")
-
-# Add the genotype information to a dictionary.
-    for i in range( len(self.samplesList) ):
-      genotypeInfo = self.genotypes[i].split(":")
-      self.genotypeFields[ self.samplesList[i] ] = {}
-
-# Check that there are as many fields as in the format field.  If not, this must
-# be because the information is not known.  In this case, it is permitted that
-# the genotype information is either . or ./.
-      if genotypeInfo[0] == "./." or genotypeInfo[0] == "." and len(self.genotypeFormats) != len(genotypeInfo):
-        self.genotypeFields[ self.samplesList[i] ] = "."
-      else:
-        if len(self.genotypeFormats) != len(genotypeInfo):
-          text = "The number of genotype fields is different to the number specified in the format string"
-          self.generalError(text, "sample", self.samplesList[i])
-
-        for j in range( len(self.genotypeFormats) ): self.genotypeFields[ self.samplesList[i] ][ self.genotypeFormats[j] ] = genotypeInfo[j]
-
-# Parse through the vcf file until the correct reference sequence is
-# encountered and the position is greater than or equal to that requested.
-  def parseVcf(self, referenceSequence, position, writeOut, outputFile):
-    success = True
-    if self.referenceSequence != referenceSequence:
-      while self.referenceSequence != referenceSequence and success:
-        if writeOut: outputFile.write(self.record)
-        success = self.getRecord()
-
-    while self.referenceSequence == referenceSequence and self.position < position and success:
-      if writeOut: outputFile.write(self.record)
-      success = self.getRecord()
-
-    return success
-
-# Get the information for a specific info tag.  Also check that it contains
-# the correct number and type of entries.
-  def getInfo(self, tag):
-    result = []
-
-# Check if the tag exists in the header information.  If so,
-# determine the number and type of entries asscoiated with this
-# tag.
-    if self.infoHeaderTags.has_key(tag):
-      infoNumber = self.infoHeaderTags[tag][0]
-      infoType = self.infoHeaderTags[tag][1]
-      numberValues = infoNumber
-
-# First check that the tag exists in the information string.  Then split
-# the entry on commas.  For flag entries, do not perform the split.
-      if self.infoTags.has_key(tag):
-        if numberValues == 0 and infoType == "Flag": result = True
-        elif numberValues != 0 and infoType == "Flag":
-          print >> sys.stderr, "ERROR"
-          exit(1)
-        else:
-          fields = self.infoTags[tag].split(",")
-          if len(fields) != numberValues:
-            text = "Unexpected number of entries"
-            self.generalError(text, "information tag", tag)
-
-          for i in range(infoNumber):
-            try: result.append(fields[i])
-            except IndexError:
-              text = "Insufficient values. Expected: " + self.infoHeaderTags[tag][0]
-              self.generalError(text, "tag:", tag)
-      else: numberValues = 0
-
-    else:
-      text = "information field does not have a definition in the header"
-      self.generalError(text, "tag", tag)
-
-    return numberValues, infoType, result
-
-# Get the genotype information.
-  def getGenotypeInfo(self, sample, tag):
-    result = []
-    if self.formatHeaderTags.has_key(tag):
-      infoNumber = self.formatHeaderTags[tag][0]
-      infoType = self.formatHeaderTags[tag][1]
-      numberValues = infoNumber
-
-      if self.genotypeFields[sample] == "." and len(self.genotypeFields[sample]) == 1:
-        numberValues = 0
-        result = "."
-      else:
-        if self.genotypeFields[sample].has_key(tag):
-          if tag == "GT":
-            if len(self.genotypeFields[sample][tag]) != 3 and len(self.genotypeFields[sample][tag]) != 1:
-              text = "Unexected number of characters in genotype (GT) field"
-              self.generalError(text, "sample", sample)
-
-# If a diploid call, check whether or not the genotype is phased.
-            elif len(self.genotypeFields[sample][tag]) == 3:
-              self.phased = True if self.genotypeFields[sample][tag][1] == "|" else False
-              result.append( self.genotypeFields[sample][tag][0] )
-              result.append( self.genotypeFields[sample][tag][2] )
-            elif len(self.genotypeFields[sample][tag]) == 3:
-              result.append( self.genotypeFields[sample][tag][0] )
-          else:
-            fields = self.genotypeFields[sample][tag].split(",")
-            if len(fields) != numberValues:
-              text = "Unexpected number of characters in " + tag + " field"
-              self.generalError(text, "sample", sample)
-
-            for i in range(infoNumber): result.append(fields[i])
-    else:
-      text = "genotype field does not have a definition in the header"
-      self.generalError(text, "tag", tag)
-
-    return numberValues, result
-
-# Parse the dbsnp entry.  If the entry conforms to the required variant type,
-# return the dbsnp rsid value, otherwise ".".
-  def getDbsnpInfo(self):
-
-# First check that the variant class (VC) is listed as SNP.
-    vc = self.info.split("VC=",1)
-    if vc[1].find(";") != -1: snp = vc[1].split(";",1)
-    else:
-      snp = []
-      snp.append(vc[1])
-
-    if snp[0].lower() == "snp": rsid = self.rsid
-    else: rsid = "."
-
-    return rsid
-
-# Build a new vcf record.
-  def buildRecord(self, removeGenotypes):
-    record = self.referenceSequence + "\t" + \
-                str(self.position) + "\t" + \
-                self.rsid + "\t" + \
-                self.ref + "\t" + \
-                self.alt + "\t" + \
-                str(self.quality) + "\t" + \
-                self.filters + "\t" + \
-                self.info
-
-    if self.hasGenotypes and not removeGenotypes: record += self.genotypeString
-
-    record += "\n"
-
-    return record
-
-# Close the vcf file.
-  def closeVcf(self, filename):
-    self.filehandle.close()
-
-# Define error messages for different handled errors.
-  def generalError(self, text, field, fieldValue):
-    print >> sys.stderr, "\nError encountered when attempting to read:"
-    print >> sys.stderr, "\treference sequence :\t", self.referenceSequence
-    print >> sys.stderr, "\tposition :\t\t", self.position
-    if field != "": print >> sys.stderr, "\t", field, ":\t", fieldValue
-    print >> sys.stderr,  "\n", text
-    exit(1)
--- a/tools/vcf_tools/vcfPytools.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#!/usr/bin/python
-
-import os.path
-import sys
-
-__author__ = "alistair ward"
-__version__ = "version 0.26"
-__date__ = "february 2011"
-
-def main():
-  usage = "Usage: vcfPytools.py [tool] [options]\n\n" + \
-          "Available tools:\n" + \
-          "  annotate:\n\tAnnotate the vcf file with membership in other vcf files.\n" + \
-          "  extract:\n\tExtract vcf records from a region.\n" + \
-          "  filter:\n\tFilter the vcf file.\n" + \
-          "  intersect:\n\tGenerate the intersection of two vcf files.\n" + \
-          "  merge:\n\tMerge a list of vcf files.\n" + \
-          "  multi:\n\tFind the intersections and unique fractions of multiple vcf files.\n" + \
-          "  sort:\n\tSort a vcf file.\n" + \
-          "  stats:\n\tGenerate statistics from a vcf file.\n" + \
-          "  union:\n\tGenerate the union of two vcf files.\n" + \
-          "  unique:\n\tGenerate the unique fraction from two vcf files.\n" + \
-          "  validate:\n\tValidate the input vcf file.\n\n" + \
-          "vcfPytools.py [tool] --help for information on a specific tool."
-
-# Determine the requested tool.
-
-  if len(sys.argv) > 1:
-    tool = sys.argv[1]
-  else:
-    print >> sys.stderr, usage
-    exit(1)
-
-  if tool == "annotate":
-    import annotate
-    success = annotate.main()
-  elif tool == "extract":
-    import extract
-    success = extract.main()
-  elif tool == "filter":
-    import filter
-    success = filter.main()
-  elif tool == "intersect":
-    import intersect
-    success = intersect.main()
-  elif tool == "multi":
-    import multi
-    success = multi.main()
-  elif tool == "merge":
-    import merge
-    success = merge.main()
-  elif tool == "sort":
-    import sort
-    success = sort.main()
-  elif tool == "stats":
-    import stats
-    success = stats.main()
-  elif tool == "union":
-    import union
-    success = union.main()
-  elif tool == "unique":
-    import unique
-    success = unique.main()
-  elif tool == "test":
-    import test
-    success = test.main()
-  elif tool == "validate":
-    import validate
-    success = validate.main()
-  elif tool == "--help" or tool == "-h" or tool == "?":
-    print >> sys.stderr, usage
-  else:
-    print >> sys.stderr, "Unknown tool: ",tool
-    print >> sys.stderr, "\n", usage
-    exit(1)
-
-# If program completed properly, terminate.
-
-  if success == 0: exit(0)
-
-if __name__ == "__main__":
-  main()
--- a/tools/visualization/GMAJ.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Script that Creates a zip file for use by GMAJ
-"""
-import sys, zipfile
-
-def __main__():
-    #create a new zip file
-    out_file  = zipfile.ZipFile( sys.argv[1], "w" )
-    #add info files
-    out_file.write( sys.argv[3], "input.gmaj" ) #THIS FILE MUST BE ADDED FIRST
-    out_file.write( sys.argv[2], "input.maf" )
-
-    #add annotation files
-    for line in open( sys.argv[4] ):
-        try:
-            out_file.write( *[ field.strip() for field in line.split( "=", 1 ) ] )
-        except:
-            continue
-    out_file.close()
-
-if __name__ == "__main__": __main__()
--- a/tools/visualization/GMAJ.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,200 +0,0 @@
-<tool id="gmaj_1" name="GMAJ" version="2.0.1">
-<description>Multiple Alignment Viewer</description>
-  <command interpreter="python">GMAJ.py $out_file1 $maf_input $gmaj_file $filenames_file</command>
-  <inputs>
-      <param name="maf_input" type="data" format="maf" label="Alignment File" optional="False">
-        <validator type="metadata" check="species_chromosomes" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/>
-      </param>
-      <param name="refseq" label="Reference Sequence" type="select">
-        <option value="first" selected="true">First sequence in each block</option>
-        <option value="any">Any sequence</option>
-      </param>
-      <repeat name="annotations" title="Annotations">
-        <conditional name="annotation_style">
-          <param name="style" type="select" label="Annotation Style" help="If your data is not in a style similar to what is available from Galaxy (and the UCSC table browser), choose 'Basic'.">
-            <option value="galaxy" selected="true">Galaxy</option>
-            <option value="basic">Basic</option>
-          </param>
-          <when value="galaxy">
-            <param name="species" type="select" label="Species" multiple="False">
-              <options>
-                <filter type="data_meta" ref="maf_input" key="species" />
-              </options>
-            </param>
-            <param name="exons_file" type="data" format="bed,gff" label="Exons File" optional="True"/>
-            <param name="highlights_file" type="data" format="bed,gff" label="Highlights File" optional="True"/>
-            <param name="underlays_file" type="data" format="bed,gff" label="Underlays File" optional="True"/>
-            <param name="repeats_file" type="data" format="bed,gff" label="Repeats File" optional="True"/>
-            <param name="links_file" type="data" format="bed,gff" label="Links File" optional="True"/>
-          </when>
-          <when value="basic">
-            <param name="seq_name" label="Full Sequence Name" value="" type="text">
-              <validator type="empty_field" message="You must supply the sequence name"/>
-            </param>
-            <param name="exons_file" type="data" format="bed,gff" label="Exons File" optional="True"/>
-            <param name="highlights_file" type="data" format="bed,gff" label="Highlights File" optional="True"/>
-            <param name="underlays_file" type="data" format="bed,gff" label="Underlays File" optional="True"/>
-            <param name="repeats_file" type="data" format="bed,gff" label="Repeats File" optional="True"/>
-            <param name="links_file" type="data" format="bed,gff" label="Links File" optional="True"/>
-            <param name="offset" label="Offset" value="0" type="integer"/>
-          </when>
-        </conditional>
-      </repeat>
-      <param name="nowarn" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true" label="Choose Warnings to Suppress" separator=" " help="These do not affect behavior, only suppress warning messages.">
-        <options>
-          <option name="All" value="all">
-            <option name="MAF File" value="maf">
-              <option name="Invalid MAF version (maf_version)" value="maf_version"/>
-              <option name="Skipping unsupported paragraph (maf_paragraph)" value="maf_paragraph"/>
-              <option name="Unrecognized character found in alignment (bad_char_all)" value="bad_char_all"/>
-              <option name="Skipping all reconstruction scores: no species specified (recon_noseq)" value="recon_noseq"/>
-              <option name="Skipping reconstruction scores in blocks with missing row (recon_missing)" value="recon_missing"/>
-              <option name="The first row in some blocks is not the specified reference sequence (refseq_not_first)" value="refseq_not_first"/>
-              <option name="Skipping extra MAF File (unused_maf)" value="unused_maf"/>
-            </option>
-            <option name="Annotation Files" value="annotations">
-              <option name="Semantic Assumptions" value="semantics">
-                <option name="BED Format" value = "bed">
-                  <option name="BED12 blocks are exons (bed_blocks)" value="bed_blocks"/>
-                  <option name="BED thickstart/thickend designate CDS (bed_thick)" value="bed_thick"/>
-                  <option name="BED name is gene name when loading exons from BED12 (bed_name)" value="bed_name"/>
-                  <option name="BED name is gene name when loading exons from exon BED (bed_name_full)" value="bed_name_full"/>
-                  <option name="BED name's prefix is gene name when loading exons from exon BED (bed_name_prefix)" value="bed_name_prefix"/>
-                </option>
-                <option name="GFF group is gene name (gff_group)" value="gff_group"/>
-              </option>
-              <option name="Skipped Items" value="skipped">
-                <option name="Skipping lines in unrecognized format (annot_format)" value="annot_format"/>
-                <option name="Skipping lines with no gene name when loading exons (gene_missing)" value="gene_missing"/>
-                <option name="Skipping lone CDS start/stop codons when strand is unknown (ambiguous_codon)" value="ambiguous_codon"/>
-                <option name="Skipping lines with invalid repeat types (unrec_repeat)" value="unrec_repeat"/>
-                <option name="Using 'Other' for missing or incomplete repeat types (repeat_type_missing)" value="repeat_type_missing"/>
-                <option name="Ignoring invalid strand fields (bad_strand)" value="bad_strand"/>
-                <option name="Ignoring invalid score fields (bad_score)" value="bad_score"/>
-                <option name="Ignoring invalid color fields (color_format)" value="color_format"/>
-                <option name="Ignoring malformed URLs (bad_url)" value="bad_url"/>
-                <option name="Score shading is not yet supported (score_shading)" value="score_shading"/>
-              </option>
-              <option name="Red Flags" value="red">
-                <option name="Assuming that annotations in file ___ are for species ___ (seqname_fix_all)" value="seqname_fix_all"/>
-                <option name="BED start or end &lt; 0 (bed_coord)" value="bed_coord"/>
-                <option name="GFF start or end &lt; 1 (gff_coord)" value="gff_coord"/>
-                <option name="Missing item name for URL substitution (url_subst)" value="url_subst"/>
-              </option>
-            </option>
-            <option name="Miscellaneous" value="miscellaneous">
-              <option name="No refseq specified; assuming 'first' (default_refseq)" value="default_refseq"/>
-              <option name="One or more bundle entries are not used in parameters file(unused_entry)" value="unused_entry"/>
-              <option name="Skipping blocks for export where reference sequence is hidden or all gaps (export_skip)" value="export_skip"/>
-              <option name="Possible parse error: token ends with an escaped quote (escaped_quote)" value="escaped_quote"/>
-              <option name="Draggable panel dividers will not be sticky (no_sticky)" value="no_sticky"/>
-              <option name="Selecting a large block may be very slow (big_block)" value="big_block"/>
-            </option>
-          </option>
-        </options>
-      </param>
-  </inputs>
-  <configfiles>
-    <configfile name="gmaj_file">#:gmaj
-
-title = "Galaxy: $maf_input.name"
-alignfile = input.maf
-refseq = $refseq
-tabext = .bed .gff .gtf
-#if $nowarn.value:
-nowarn = $nowarn
-#end if
-
-#set $seq_count = 0
-#for $annotation_count, $annotation in $enumerate( $annotations ):
-#if $annotation.annotation_style.style == "galaxy":
-#set $species_chromosomes = {}
-#if $maf_input.dataset.metadata.species_chromosomes:
-#for $line in open( $maf_input.dataset.metadata.species_chromosomes.file_name ):
-#set $fields = $line.split( "\t" )
-#if $fields:
-#set $spec = $fields.pop( 0 )
-#set $species_chromosomes[spec] = $fields
-#end if
-#end for
-#end if
-#if $species_chromosomes and $annotation.annotation_style['species'].value in $species_chromosomes and $species_chromosomes[$annotation.annotation_style['species'].value]:
-#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $species_chromosomes[$annotation.annotation_style['species'].value]]
-#else:
-#set $seq_names = [$annotation.annotation_style['species']]
-#end if
-#else:
-#set $seq_names = [$annotation.annotation_style['seq_name']]
-#end if
-#for $seq_name in $seq_names:
-seq ${seq_count}:
-seqname = $seq_name
-#if $annotation.annotation_style['exons_file'].dataset:
-exons = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension}
-#end if
-#if $annotation.annotation_style['repeats_file'].dataset:
-repeats = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension}
-#end if
-#if $annotation.annotation_style['links_file'].dataset:
-links = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension}
-#end if
-#if $annotation.annotation_style['underlays_file'].dataset:
-underlays = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension}
-#end if
-#if $annotation.annotation_style['highlights_file'].dataset:
-highlights = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension}
-#end if
-#if $annotation.annotation_style.style == "basic":
-offset = $annotation.annotation_style['offset']
-#end if
-
-#set $seq_count = $seq_count + 1
-#end for
-#end for
-</configfile>
-    <configfile name="filenames_file">
-#for $annotation_count, $annotation in $enumerate( $annotations ):
-#if $annotation.annotation_style['exons_file'].dataset:
-$annotation.annotation_style['exons_file'] = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension}
-#end if
-#if $annotation.annotation_style['repeats_file'].dataset:
-$annotation.annotation_style['repeats_file'] = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension}
-#end if
-#if $annotation.annotation_style['links_file'].dataset:
-$annotation.annotation_style['links_file'] = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension}
-#end if
-#if $annotation.annotation_style['underlays_file'].dataset:
-$annotation.annotation_style['underlays_file'] = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension}
-#end if
-#if $annotation.annotation_style['highlights_file'].dataset:
-$annotation.annotation_style['highlights_file'] = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension}
-#end if
-#end for
-</configfile>
-  </configfiles>
-  <outputs>
-    <data name="out_file1" format="gmaj.zip"/>
-  </outputs>
-<help>
-.. class:: infomark
-
-**Reference Sequence:**
-The default option, &quot;First sequence in each block&quot;, is the correct choice for the vast majority of MAF alignments.  The alternative, &quot;Any sequence&quot;, will allow you to flip the blocks to view them with any of the MAF sequences as the reference, but this is only appropriate if the file was generated by a sequence-symmetric alignment program such as TBA_.  Using &quot;Any sequence&quot; with an ordinary MAF will **not** give the same results as if that alignment had been run with a different reference sequence.
-
-.. class:: infomark
-
-**Annotation Style:**
-The default style, &quot;Galaxy&quot;, specifies one set of annotations for each species in the MAF file; it assumes that if you have, say, exons for several chromosomes of one species, they are all together in one file. The other style, &quot;Basic&quot;, is more flexible but cumbersome: a separate set of files is specified for each sequence (e.g. chromosome), and you must fill in the full sequence name as it appears in the MAF. The Basic style also allows you to provide a display offset that GMAJ will add to all of the position labels for that sequence.  With either style, specifying more than one set of annotations for the same sequence will result in an error message from GMAJ.
-
-----
-
-**What it does**
-
-GMAJ is an interactive viewer for MAF alignments, with support for optional annotation data.  In addition to browsing the alignments, you can select and export them according to a variety of criteria and send the output back to your Galaxy history.
-
-For detailed information on GMAJ, click here_.
-
-.. _here: /static/gmaj/docs/gmaj_readme.html
-.. _TBA: http://www.bx.psu.edu/miller_lab/
-  </help>
-</tool>
--- a/tools/visualization/LAJ.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Copies LAV file over to new file for use with LAJ
-"""
-import sys, shutil
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-shutil.copyfile(sys.argv[1],sys.argv[2])
--- a/tools/visualization/LAJ.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<tool id="laj_1" name="LAJ">
-<description>Pairwise Alignment Viewer</description>
-  <command interpreter="python">LAJ.py $maf_input $out_file1</command>
-  <inputs>
-      <param name="maf_input" type="data" format="lav" label="Alignment File" optional="False"/>
-      <param name="seq_file1" type="data" format="fasta" label="First Sequence File" optional="True"/>
-      <param name="seq_file2" type="data" format="fasta" label="Second Sequence File" optional="True"/>
-      <param name="exonfile" type="data" format="txt" label="Exon File" optional="True"/>
-      <param name="repeatfile" type="data" format="txt" label="Repeat File" optional="True"/>
-      <param name="annotationfile" type="data" format="txt" label="Annotation File" optional="True"/>
-      <param name="underlayfile" type="data" format="txt" label="Underlay File" optional="True"/>
-      <param name="highlightfile" type="data" format="txt" label="Highlight File" optional="True"/>
-  </inputs>
-  <outputs>
-    <data name="out_file1" format="laj"/>
-  </outputs>
-<help>
-You can use this tool to view a set of LAV alignments.  You may include FASTA formatted sequences for both species.
-
-For detailed information on LAJ, click here_.
-
-.. _here: http://globin.cse.psu.edu/dist/laj/
-
-Laj is a tool for viewing and manipulating the output from pairwise alignment programs such as blastz. It can display interactive dotplot, pip, and text representations of the alignments, a diagram showing the locations of exons and repeats, and annotation links to other web sites containing additional information about particular regions.
-
-.. class:: infomark
-
-**Note:** If you save output from the applet, you will need to manually refresh your history.
-
-  </help>
-  <code file="LAJ_code.py"/>
-</tool>
\ No newline at end of file
--- a/tools/visualization/LAJ_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-#post processing, add sequence and additional annoation info if available
-from urllib import urlencode
-from galaxy.datatypes.images import create_applet_tag_peek
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    primary_data = out_data.items()[0][1]
-
-    #default params for LAJ type
-    params = {
-    "alignfile1": "display?id=%s" % primary_data.id,
-    "buttonlabel": "Launch LAJ",
-    "title": "LAJ in Galaxy",
-    "posturl": "history_add_to?%s" % urlencode( { 'history_id': primary_data.history_id, 'ext': 'lav', 'name': 'LAJ Output', 'info': 'Added by LAJ', 'dbkey': primary_data.dbkey } )
-    }
-    for name,data in inp_data.items():
-        if name == "maf_input":
-            params["alignfile1"] = "display?id=%s" % data.id
-        elif name == "seq_file1" and data.state == data.states.OK and data.has_data():
-            params["file1seq1"] = "display?id=%s" % data.id
-        elif name == "seq_file2" and data.state == data.states.OK and data.has_data():
-            params["file1seq2"] = "display?id=%s" % data.id
-        elif name == "exonfile" and data.state == data.states.OK and data.has_data():
-            params["exonfile"] = "display?id=%s" % data.id
-        elif name == "repeatfile" and data.state == data.states.OK and data.has_data():
-            params["repeatfile"] = "display?id=%s" % data.id
-        elif name == "annotationfile" and data.state == data.states.OK and data.has_data():
-            params["annotationfile"] = "display?id=%s" % data.id
-        elif name == "underlayfile" and data.state == data.states.OK and data.has_data():
-            params["underlayfile"] = "display?id=%s" % data.id
-        elif name == "highlightfile" and data.state == data.states.OK and data.has_data():
-            params["highlightfile"] = "display?id=%s" % data.id
-
-    if "file1seq1" not in params and "file1seq2" not in params:
-        params["noseq"] = "true"
-
-    class_name = "edu.psu.cse.bio.laj.LajApplet.class"
-    archive = "/static/laj/laj.jar"
-    primary_data.peek = create_applet_tag_peek( class_name, archive, params )
-    app.model.context.add( primary_data )
-    app.model.context.flush()
--- a/tools/visualization/build_ucsc_custom_track.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-"""
-Build a UCSC genome browser custom track file
-"""
-
-import sys, os
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-args = sys.argv[1:]
-
-out_fname = args.pop(0)
-out = open( out_fname, "w" )
-
-num_tracks = 0
-skipped_lines = 0
-first_invalid_line = 0
-while args:
-    # Suck in one dataset worth of arguments
-    in_fname = args.pop(0)
-    type = args.pop(0)
-    colspec = args.pop(0)
-    name = args.pop(0)
-    description = args.pop(0)
-    color = args.pop(0).replace( '-', ',' )
-    visibility = args.pop(0)
-    # Do the work
-    if type == "wig":
-        print >> out, '''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s''' \
-                      % ( name, description, color, visibility )
-        for i, line in enumerate( file( in_fname ) ):
-            print >> out, line,
-        print >> out
-    elif type == "bed":
-        print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \
-                      % ( name, description, color, visibility )
-        for i, line in enumerate( file( in_fname ) ):
-            print >> out, line,
-        print >> out
-    else:
-        # Assume type is interval (don't pass this script anything else!)
-        try:
-            c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ]
-        except:
-            try:
-                c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ]
-                st = -1    #strand column is absent
-            except:
-                stop_err( "Columns in interval file invalid for UCSC custom track." )
-
-        print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \
-                      % ( name, description, color, visibility )
-        i = 0
-        for i, line in enumerate( file( in_fname ) ):
-            line = line.rstrip( '\r\n' )
-            if line and not line.startswith( '#' ):
-                fields = line.split( "\t" )
-                if st > 0:
-                    #strand column is present
-                    try:
-                        print >> out, "%s\t%s\t%s\t%d\t0\t%s" % ( fields[c], fields[s], fields[e], i, fields[st] )
-                    except:
-                        skipped_lines += 1
-                        if not first_invalid_line:
-                            first_invalid_line = i+1
-                else:
-                    try:
-                        print >> out, "%s\t%s\t%s" % ( fields[c], fields[s], fields[e] )
-                    except:
-                        skipped_lines += 1
-                        if not first_invalid_line:
-                            first_invalid_line = i+1
-        print >> out
-    num_tracks += 1
-
-out.close()
-
-print "Generated a custom track containing %d subtracks." % num_tracks
-if skipped_lines:
-    print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line )
-
-
-
--- a/tools/visualization/build_ucsc_custom_track.xml	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-<tool id="build_ucsc_custom_track_1" name="Build custom track">
-  <description>for UCSC genome browser</description>
-  <command interpreter="python">
-    build_ucsc_custom_track.py
-      "$out_file1"
-      #for $t in $tracks
-        "${t.input.file_name}"
-        "${t.input.ext}"
-        #if $t.input.ext == "interval"
-          ${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol}
-        #else
-          "NA"
-        #end if
-        "${t.name}"
-        "${t.description}"
-        "${t.color}"
-        "${t.visibility}"
-      #end for
-  </command>
-  <inputs>
-    <repeat name="tracks" title="Track">
-      <param name="input" type="data" format="interval,wig" label="Dataset"/>
-      <param name="name" type="text" size="15" value="User Track">
-        <validator type="length" max="15"/>
-      </param>
-      <param name="description" type="text" value="User Supplied Track (from Galaxy)">
-        <validator type="length" max="60"/>
-      </param>
-  		<param label="Color" name="color" type="select">
-  			<option selected="yes" value="0-0-0">Black</option>
-  			<option value="255-0-0">Red</option>
-  			<option value="0-255-0">Green</option>
-  			<option value="0-0-255">Blue</option>
-  			<option value="255-0-255">Magenta</option>
-  			<option value="0-255-255">Cyan</option>
-  			<option value="255-215-0">Gold</option>
-  			<option value="160-32-240">Purple</option>
-  			<option value="255-140-0">Orange</option>
-  			<option value="255-20-147">Pink</option>
-  			<option value="92-51-23">Dark Chocolate</option>
-  			<option value="85-107-47">Olive green</option>
-  		</param>
-  		<param label="Visibility" name="visibility" type="select">
-  			<option selected="yes" value="1">Dense</option>
-  			<option value="2">Full</option>
-  			<option value="3">Pack</option>
-  			<option value="4">Squish</option>
-  			<option value="0">Hide</option>
-  		</param>
-    </repeat>
-  </inputs>
-	<outputs>
-  	<data format="customtrack" name="out_file1" />
-	</outputs>
-    <tests>
-        <!--TODO: add a 2nd test here that includes 2 tracks -->
-        <test>
-  		    <param name="input" value="customTrack1.bed" />
-  		    <param name="name" value="User Track" />
-  		    <param name="description" value="User Supplied Track (from Galaxy)" />
-  		    <param name="color" value="0-0-0" />
-  		    <param name="visibility" value="1" />
-  		    <output name="out_file1" file="build_ucsc_custom_track_out1.customtrack" />
-        </test>
-	</tests>
-<help>
-
-.. class:: infomark
-
-This tool allows you to build custom tracks using datasets in your history for the UCSC genome browser. You can view these custom tracks on the UCSC genome browser by clicking on **display at UCSC main/test** link in the history panel of the output dataset.
-
------
-
-.. class:: warningmark
-
-Please note that this tool requires **all input datasets(tracks) to have the same genome build**. The tool throws an error when this requirement is not met. You may then have to choose a valid dataset or remove invalid tracks.
-
-</help>
-
-<code file="build_ucsc_custom_track_code.py" />
-
-</tool>
\ No newline at end of file
--- a/tools/visualization/build_ucsc_custom_track_code.py	Fri Mar 09 19:45:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-# runs after the job (and after the default post-filter)
-
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-def validate_input( trans, error_map, param_values, page_param_map ):
-    dbkeys = set()
-    tracks = param_values['tracks']
-    for track in tracks:
-        if track['input']:
-            dbkeys.add( track['input'].dbkey )
-    if len( dbkeys ) > 1:
-        # FIXME: Should be able to assume error map structure is created
-        if 'tracks' not in error_map:
-            error_map['tracks'] = [ dict() for t in tracks ]
-            for i in range( len( tracks ) ):
-                error_map['tracks'][i]['input'] = \
-                    "All datasets must belong to same genomic build"
\ No newline at end of file