changeset 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children cdcb0ce84a1b
files tools/.DS_Store tools/._.DS_Store tools/._mytools tools/._tool_conf.xml tools/annotation_profiler/annotation_profiler.xml tools/annotation_profiler/annotation_profiler_for_interval.py tools/bedtools/._bedToBam.xml tools/bedtools/bedToBam.xml tools/data_destination/epigraph.xml tools/data_destination/epigraph_test.xml tools/data_source/access_libraries.xml tools/data_source/bed_convert.xml tools/data_source/biomart.xml tools/data_source/biomart_test.xml tools/data_source/bx_browser.xml tools/data_source/cbi_rice_mart.xml tools/data_source/data_source.py tools/data_source/echo.py tools/data_source/echo.xml tools/data_source/encode_db.xml tools/data_source/epigraph_import.xml tools/data_source/epigraph_import_test.xml tools/data_source/eupathdb.xml tools/data_source/fetch.py tools/data_source/fly_modencode.xml tools/data_source/flymine.xml tools/data_source/flymine_test.xml tools/data_source/genbank.py tools/data_source/genbank.xml tools/data_source/gramene_mart.xml tools/data_source/hapmapmart.xml tools/data_source/hbvar.xml tools/data_source/hbvar_filter.py tools/data_source/import.py tools/data_source/import.xml tools/data_source/metabolicmine.xml tools/data_source/microbial_import.py tools/data_source/microbial_import.xml tools/data_source/microbial_import_code.py tools/data_source/modmine.xml tools/data_source/ratmine.xml tools/data_source/ucsc_archaea.xml tools/data_source/ucsc_filter.py tools/data_source/ucsc_proxy.py tools/data_source/ucsc_proxy.xml tools/data_source/ucsc_tablebrowser.xml tools/data_source/ucsc_tablebrowser_archaea.xml tools/data_source/ucsc_tablebrowser_test.xml tools/data_source/ucsc_testproxy.xml tools/data_source/upload.py tools/data_source/upload.xml tools/data_source/worm_modencode.xml tools/data_source/wormbase.xml tools/data_source/wormbase_test.xml tools/data_source/yeastmine.xml tools/discreteWavelet/execute_dwt_IvC_all.pl tools/discreteWavelet/execute_dwt_IvC_all.xml tools/discreteWavelet/execute_dwt_cor_aVa_perClass.pl tools/discreteWavelet/execute_dwt_cor_aVa_perClass.xml tools/discreteWavelet/execute_dwt_cor_aVb_all.pl tools/discreteWavelet/execute_dwt_cor_aVb_all.xml tools/discreteWavelet/execute_dwt_var_perClass.pl tools/discreteWavelet/execute_dwt_var_perClass.xml tools/discreteWavelet/execute_dwt_var_perFeature.pl tools/discreteWavelet/execute_dwt_var_perFeature.xml tools/emboss_5/emboss_antigenic.xml tools/emboss_5/emboss_backtranseq.xml tools/emboss_5/emboss_banana.pl tools/emboss_5/emboss_banana.xml tools/emboss_5/emboss_biosed.xml tools/emboss_5/emboss_btwisted.xml tools/emboss_5/emboss_cai.xml tools/emboss_5/emboss_cai_custom.xml tools/emboss_5/emboss_chaos.xml tools/emboss_5/emboss_charge.xml tools/emboss_5/emboss_checktrans.xml tools/emboss_5/emboss_chips.xml tools/emboss_5/emboss_cirdna.xml tools/emboss_5/emboss_codcmp.xml tools/emboss_5/emboss_coderet.xml tools/emboss_5/emboss_compseq.xml tools/emboss_5/emboss_cpgplot.xml tools/emboss_5/emboss_cpgplot_wrapper.pl tools/emboss_5/emboss_cpgreport.xml tools/emboss_5/emboss_cusp.xml tools/emboss_5/emboss_cutseq.xml tools/emboss_5/emboss_dan.xml tools/emboss_5/emboss_degapseq.xml tools/emboss_5/emboss_descseq.xml tools/emboss_5/emboss_diffseq.xml tools/emboss_5/emboss_digest.xml tools/emboss_5/emboss_dotmatcher.xml tools/emboss_5/emboss_dotpath.xml tools/emboss_5/emboss_dottup.xml tools/emboss_5/emboss_dreg.xml tools/emboss_5/emboss_einverted.xml tools/emboss_5/emboss_epestfind.xml tools/emboss_5/emboss_equicktandem.xml tools/emboss_5/emboss_est2genome.xml tools/emboss_5/emboss_etandem.xml tools/emboss_5/emboss_extractfeat.xml tools/emboss_5/emboss_extractseq.xml tools/emboss_5/emboss_format_corrector.py tools/emboss_5/emboss_freak.xml tools/emboss_5/emboss_fuzznuc.xml tools/emboss_5/emboss_fuzzpro.xml tools/emboss_5/emboss_fuzztran.xml tools/emboss_5/emboss_garnier.xml tools/emboss_5/emboss_geecee.xml tools/emboss_5/emboss_getorf.xml tools/emboss_5/emboss_helixturnhelix.xml tools/emboss_5/emboss_hmoment.xml tools/emboss_5/emboss_iep.xml tools/emboss_5/emboss_infoseq.xml tools/emboss_5/emboss_infoseq_wrapper.pl tools/emboss_5/emboss_isochore.xml tools/emboss_5/emboss_lindna.xml tools/emboss_5/emboss_marscan.xml tools/emboss_5/emboss_maskfeat.xml tools/emboss_5/emboss_maskseq.xml tools/emboss_5/emboss_matcher.xml tools/emboss_5/emboss_megamerger.xml tools/emboss_5/emboss_merger.xml tools/emboss_5/emboss_msbar.xml tools/emboss_5/emboss_multiple_outputfile_wrapper.pl tools/emboss_5/emboss_needle.xml tools/emboss_5/emboss_newcpgreport.xml tools/emboss_5/emboss_newcpgseek.xml tools/emboss_5/emboss_newseq.xml tools/emboss_5/emboss_noreturn.xml tools/emboss_5/emboss_notseq.xml tools/emboss_5/emboss_nthseq.xml tools/emboss_5/emboss_octanol.xml tools/emboss_5/emboss_oddcomp.xml tools/emboss_5/emboss_palindrome.xml tools/emboss_5/emboss_pasteseq.xml tools/emboss_5/emboss_patmatdb.xml tools/emboss_5/emboss_pepcoil.xml tools/emboss_5/emboss_pepinfo.xml tools/emboss_5/emboss_pepnet.xml tools/emboss_5/emboss_pepstats.xml tools/emboss_5/emboss_pepwheel.xml tools/emboss_5/emboss_pepwindow.xml tools/emboss_5/emboss_pepwindowall.xml tools/emboss_5/emboss_plotcon.xml tools/emboss_5/emboss_plotorf.xml tools/emboss_5/emboss_polydot.xml tools/emboss_5/emboss_preg.xml tools/emboss_5/emboss_prettyplot.xml tools/emboss_5/emboss_prettyseq.xml tools/emboss_5/emboss_primersearch.xml tools/emboss_5/emboss_revseq.xml tools/emboss_5/emboss_seqmatchall.xml tools/emboss_5/emboss_seqret.xml tools/emboss_5/emboss_showfeat.xml tools/emboss_5/emboss_shuffleseq.xml tools/emboss_5/emboss_sigcleave.xml tools/emboss_5/emboss_single_outputfile_wrapper.pl tools/emboss_5/emboss_sirna.xml tools/emboss_5/emboss_sixpack.xml tools/emboss_5/emboss_skipseq.xml tools/emboss_5/emboss_splitter.xml tools/emboss_5/emboss_supermatcher.xml tools/emboss_5/emboss_syco.xml tools/emboss_5/emboss_tcode.xml tools/emboss_5/emboss_textsearch.xml tools/emboss_5/emboss_tmap.xml tools/emboss_5/emboss_tranalign.xml tools/emboss_5/emboss_transeq.xml tools/emboss_5/emboss_trimest.xml tools/emboss_5/emboss_trimseq.xml tools/emboss_5/emboss_twofeat.xml tools/emboss_5/emboss_union.xml tools/emboss_5/emboss_vectorstrip.xml tools/emboss_5/emboss_water.xml tools/emboss_5/emboss_wobble.xml tools/emboss_5/emboss_wordcount.xml tools/emboss_5/emboss_wordmatch.xml tools/encode/gencode_partition.xml tools/encode/random_intervals.xml tools/encode/random_intervals_no_bits.py tools/encode/split_by_partitions.py tools/evolution/add_scores.xml tools/evolution/codingSnps.pl tools/evolution/codingSnps.xml tools/evolution/codingSnps_filter.py tools/evolution/mutate_snp_codon.py tools/evolution/mutate_snp_codon.xml tools/extract/extract_genomic_dna.py tools/extract/extract_genomic_dna.xml tools/extract/liftOver_wrapper.py tools/extract/liftOver_wrapper.xml tools/extract/phastOdds/get_scores_galaxy.py tools/extract/phastOdds/phastOdds_tool.xml tools/fasta_tools/fasta_compute_length.py tools/fasta_tools/fasta_compute_length.xml tools/fasta_tools/fasta_concatenate_by_species.py tools/fasta_tools/fasta_concatenate_by_species.xml tools/fasta_tools/fasta_filter_by_length.py tools/fasta_tools/fasta_filter_by_length.xml tools/fasta_tools/fasta_to_tabular.py tools/fasta_tools/fasta_to_tabular.xml tools/fasta_tools/tabular_to_fasta.py tools/fasta_tools/tabular_to_fasta.xml tools/fastq/fastq_combiner.py tools/fastq/fastq_combiner.xml tools/fastq/fastq_filter.py tools/fastq/fastq_filter.xml tools/fastq/fastq_groomer.py tools/fastq/fastq_groomer.xml tools/fastq/fastq_manipulation.py tools/fastq/fastq_manipulation.xml tools/fastq/fastq_masker_by_quality.py tools/fastq/fastq_masker_by_quality.xml tools/fastq/fastq_paired_end_deinterlacer.py tools/fastq/fastq_paired_end_deinterlacer.xml tools/fastq/fastq_paired_end_interlacer.py tools/fastq/fastq_paired_end_interlacer.xml tools/fastq/fastq_paired_end_joiner.py tools/fastq/fastq_paired_end_joiner.xml tools/fastq/fastq_paired_end_splitter.py tools/fastq/fastq_paired_end_splitter.xml tools/fastq/fastq_stats.py tools/fastq/fastq_stats.xml tools/fastq/fastq_to_fasta.py tools/fastq/fastq_to_fasta.xml tools/fastq/fastq_to_tabular.py tools/fastq/fastq_to_tabular.xml tools/fastq/fastq_trimmer.py tools/fastq/fastq_trimmer.xml tools/fastq/fastq_trimmer_by_quality.py tools/fastq/fastq_trimmer_by_quality.xml tools/fastq/tabular_to_fastq.py tools/fastq/tabular_to_fastq.xml tools/fastx_toolkit/fasta_clipping_histogram.xml tools/fastx_toolkit/fasta_formatter.xml tools/fastx_toolkit/fasta_nucleotide_changer.xml tools/fastx_toolkit/fastq_quality_boxplot.xml tools/fastx_toolkit/fastq_quality_converter.xml tools/fastx_toolkit/fastq_quality_filter.xml tools/fastx_toolkit/fastq_to_fasta.xml tools/fastx_toolkit/fastx_artifacts_filter.xml tools/fastx_toolkit/fastx_barcode_splitter.xml tools/fastx_toolkit/fastx_barcode_splitter_galaxy_wrapper.sh tools/fastx_toolkit/fastx_clipper.xml tools/fastx_toolkit/fastx_collapser.xml tools/fastx_toolkit/fastx_nucleotides_distribution.xml tools/fastx_toolkit/fastx_quality_statistics.xml tools/fastx_toolkit/fastx_renamer.xml tools/fastx_toolkit/fastx_reverse_complement.xml tools/fastx_toolkit/fastx_trimmer.xml tools/filters/CreateInterval.pl tools/filters/CreateInterval.xml tools/filters/axt_to_concat_fasta.py tools/filters/axt_to_concat_fasta.xml tools/filters/axt_to_fasta.py tools/filters/axt_to_fasta.xml tools/filters/axt_to_lav.py tools/filters/axt_to_lav.xml tools/filters/axt_to_lav_code.py tools/filters/bed2gff.xml tools/filters/bed_to_bigbed.xml tools/filters/bed_to_gff_converter.py tools/filters/catWrapper.py tools/filters/catWrapper.xml tools/filters/changeCase.pl tools/filters/changeCase.xml tools/filters/commWrapper.pl tools/filters/commWrapper.xml tools/filters/compare.xml tools/filters/condense_characters.pl tools/filters/condense_characters.xml tools/filters/convert_characters.pl tools/filters/convert_characters.py tools/filters/convert_characters.xml tools/filters/cutWrapper.pl tools/filters/cutWrapper.xml tools/filters/fileGrep.xml tools/filters/fixedValueColumn.pl tools/filters/fixedValueColumn.xml tools/filters/gff/extract_GFF_Features.py tools/filters/gff/extract_GFF_Features.xml tools/filters/gff/gff_filter_by_attribute.py tools/filters/gff/gff_filter_by_attribute.xml tools/filters/gff/gff_filter_by_feature_count.py tools/filters/gff/gff_filter_by_feature_count.xml tools/filters/gff/gtf_filter_by_attribute_values_list.py tools/filters/gff/gtf_filter_by_attribute_values_list.xml tools/filters/gff2bed.xml tools/filters/gff_to_bed_converter.py tools/filters/grep.py tools/filters/grep.xml tools/filters/gtf2bedgraph.xml tools/filters/gtf_to_bedgraph_converter.py tools/filters/headWrapper.pl tools/filters/headWrapper.xml tools/filters/join.py tools/filters/joinWrapper.pl tools/filters/joinWrapper.py tools/filters/joiner.xml tools/filters/joiner2.xml tools/filters/lav_to_bed.py tools/filters/lav_to_bed.xml tools/filters/lav_to_bed_code.py tools/filters/mergeCols.py tools/filters/mergeCols.xml tools/filters/pasteWrapper.pl tools/filters/pasteWrapper.xml tools/filters/randomlines.py tools/filters/randomlines.xml tools/filters/remove_beginning.pl tools/filters/remove_beginning.xml tools/filters/sff_extract.py tools/filters/sff_extractor.xml tools/filters/sorter.py tools/filters/sorter.xml tools/filters/tailWrapper.pl tools/filters/tailWrapper.xml tools/filters/trimmer.py tools/filters/trimmer.xml tools/filters/ucsc_gene_bed_to_exon_bed.py tools/filters/ucsc_gene_bed_to_exon_bed.xml tools/filters/ucsc_gene_bed_to_intron_bed.py tools/filters/ucsc_gene_bed_to_intron_bed.xml tools/filters/ucsc_gene_table_to_intervals.py tools/filters/ucsc_gene_table_to_intervals.xml tools/filters/uniq.py tools/filters/uniq.xml tools/filters/wc_gnu.xml tools/filters/wig_to_bigwig.xml tools/filters/wiggle_to_simple.py tools/filters/wiggle_to_simple.xml tools/galaxy-loc.tar.gz tools/gatk/analyze_covariates.xml tools/gatk/count_covariates.xml tools/gatk/gatk_wrapper.py tools/gatk/indel_realigner.xml tools/gatk/realigner_target_creator.xml tools/gatk/table_recalibration.xml tools/gatk/unified_genotyper.xml tools/genetrack/genetrack_indexer.py tools/genetrack/genetrack_indexer.xml tools/genetrack/genetrack_peak_prediction.py tools/genetrack/genetrack_peak_prediction.xml tools/genome_diversity/cdblib.py tools/genome_diversity/extract_flanking_dna.py tools/genome_diversity/extract_flanking_dna.xml tools/genome_diversity/extract_primers.py tools/genome_diversity/extract_primers.xml tools/genome_diversity/genome_diversity.py tools/genome_diversity/select_restriction_enzymes.py tools/genome_diversity/select_restriction_enzymes.xml tools/genome_diversity/select_snps.py tools/genome_diversity/select_snps.xml tools/human_genome_variation/BEAM2_wrapper.sh tools/human_genome_variation/beam.xml tools/human_genome_variation/ctd.pl tools/human_genome_variation/ctd.xml tools/human_genome_variation/disease_ontology_gene_fuzzy_selector.pl tools/human_genome_variation/freebayes.xml tools/human_genome_variation/funDo.xml tools/human_genome_variation/gpass.pl tools/human_genome_variation/gpass.xml tools/human_genome_variation/hilbertvis.sh tools/human_genome_variation/hilbertvis.xml tools/human_genome_variation/ldtools.xml tools/human_genome_variation/ldtools_wrapper.sh tools/human_genome_variation/linkToDavid.pl tools/human_genome_variation/linkToDavid.xml tools/human_genome_variation/linkToGProfile.pl tools/human_genome_variation/linkToGProfile.xml tools/human_genome_variation/lped_to_geno.pl tools/human_genome_variation/lps.xml tools/human_genome_variation/lps_tool_wrapper.sh tools/human_genome_variation/mergeSnps.pl tools/human_genome_variation/pagetag.py tools/human_genome_variation/pass.xml tools/human_genome_variation/pass_wrapper.sh tools/human_genome_variation/senatag.py tools/human_genome_variation/sift.xml tools/human_genome_variation/sift_variants_wrapper.sh tools/human_genome_variation/snpFreq.xml tools/human_genome_variation/snpFreq2.pl tools/hyphy/hyphy_branch_lengths_wrapper.py tools/hyphy/hyphy_branch_lengths_wrapper.xml tools/hyphy/hyphy_dnds_wrapper.py tools/hyphy/hyphy_dnds_wrapper.xml tools/hyphy/hyphy_nj_tree_wrapper.py tools/hyphy/hyphy_nj_tree_wrapper.xml tools/ilmn_pacbio/abyss.xml tools/ilmn_pacbio/assembly_stats.py tools/ilmn_pacbio/assembly_stats.xml tools/ilmn_pacbio/cov_model.py tools/ilmn_pacbio/quake.py tools/ilmn_pacbio/quake.xml tools/ilmn_pacbio/quake_pe.xml tools/ilmn_pacbio/quake_wrapper.py tools/ilmn_pacbio/smrtpipe.py tools/ilmn_pacbio/smrtpipe_filter.xml tools/ilmn_pacbio/smrtpipe_galaxy.py tools/ilmn_pacbio/smrtpipe_hybrid.xml tools/ilmn_pacbio/soap_denovo.xml tools/indels/indel_analysis.py tools/indels/indel_analysis.xml tools/indels/indel_sam2interval.py tools/indels/indel_sam2interval.xml tools/indels/indel_table.py tools/indels/indel_table.xml tools/indels/sam_indel_filter.py tools/indels/sam_indel_filter.xml tools/maf/genebed_maf_to_fasta.xml tools/maf/interval2maf.py tools/maf/interval2maf.xml tools/maf/interval2maf_pairwise.xml tools/maf/interval_maf_to_merged_fasta.py tools/maf/interval_maf_to_merged_fasta.xml tools/maf/maf_by_block_number.py tools/maf/maf_by_block_number.xml tools/maf/maf_filter.py tools/maf/maf_filter.xml tools/maf/maf_limit_size.py tools/maf/maf_limit_size.xml tools/maf/maf_limit_to_species.py tools/maf/maf_limit_to_species.xml tools/maf/maf_reverse_complement.py tools/maf/maf_reverse_complement.xml tools/maf/maf_split_by_species.py tools/maf/maf_split_by_species.xml tools/maf/maf_stats.py tools/maf/maf_stats.xml tools/maf/maf_thread_for_species.py tools/maf/maf_thread_for_species.xml tools/maf/maf_to_bed.py tools/maf/maf_to_bed.xml tools/maf/maf_to_bed_code.py tools/maf/maf_to_fasta.xml tools/maf/maf_to_fasta_concat.py tools/maf/maf_to_fasta_multiple_sets.py tools/maf/maf_to_interval.py tools/maf/maf_to_interval.xml tools/maf/vcf_to_maf_customtrack.py tools/maf/vcf_to_maf_customtrack.xml tools/meme/._meme.xml tools/meme/fimo.xml tools/meme/fimo_wrapper.py tools/meme/meme.xml tools/metag_tools/blat_coverage_report.py tools/metag_tools/blat_coverage_report.xml tools/metag_tools/blat_mapping.py tools/metag_tools/blat_mapping.xml tools/metag_tools/blat_wrapper.py tools/metag_tools/blat_wrapper.xml tools/metag_tools/convert_SOLiD_color2nuc.py tools/metag_tools/convert_SOLiD_color2nuc.xml tools/metag_tools/fastqsolexa_to_fasta_qual.py tools/metag_tools/fastqsolexa_to_fasta_qual.xml tools/metag_tools/mapping_to_ucsc.py tools/metag_tools/mapping_to_ucsc.xml tools/metag_tools/megablast_wrapper.py tools/metag_tools/megablast_wrapper.xml tools/metag_tools/megablast_xml_parser.py tools/metag_tools/megablast_xml_parser.xml tools/metag_tools/rmap_wrapper.py tools/metag_tools/rmap_wrapper.xml tools/metag_tools/rmapq_wrapper.py tools/metag_tools/rmapq_wrapper.xml tools/metag_tools/short_reads_figure_high_quality_length.py tools/metag_tools/short_reads_figure_high_quality_length.xml tools/metag_tools/short_reads_figure_score.py tools/metag_tools/short_reads_figure_score.xml tools/metag_tools/short_reads_trim_seq.py tools/metag_tools/short_reads_trim_seq.xml tools/metag_tools/shrimp_color_wrapper.py tools/metag_tools/shrimp_color_wrapper.xml tools/metag_tools/shrimp_wrapper.py tools/metag_tools/shrimp_wrapper.xml tools/metag_tools/split_paired_reads.py tools/metag_tools/split_paired_reads.xml tools/multivariate_stats/cca.py tools/multivariate_stats/cca.xml tools/multivariate_stats/kcca.py tools/multivariate_stats/kcca.xml tools/multivariate_stats/kpca.py tools/multivariate_stats/kpca.xml tools/multivariate_stats/pca.py tools/multivariate_stats/pca.xml tools/mutation/visualize.py tools/mutation/visualize.xml tools/mytools/.DS_Store tools/mytools/._.DS_Store tools/mytools/._StartGenometriCorr.xml tools/mytools/._Start_GenometriCorr.R tools/mytools/._align2database.py tools/mytools/._align2database.xml tools/mytools/._align2multiple.xml tools/mytools/._alignr.py tools/mytools/._alignr.xml tools/mytools/._alignvis.xml tools/mytools/._altschulEriksonDinuclShuffle.py tools/mytools/._bed_to_bam.xml tools/mytools/._bedclean.xml tools/mytools/._bedsort.xml tools/mytools/._bigWigAverageOverBed.xml tools/mytools/._binaverage.xml tools/mytools/._bowtie2bed.pl tools/mytools/._bowtie2bed.xml tools/mytools/._bwBinavg.xml tools/mytools/._cdf.r tools/mytools/._cdf.xml tools/mytools/._closestBed.xml tools/mytools/._collapseBed.py tools/mytools/._collapseBed.xml tools/mytools/._collapseTab.xml tools/mytools/._convertEnsembl.xml tools/mytools/._dreme.xml tools/mytools/._endbias.xml tools/mytools/._fastamarkov.xml tools/mytools/._fastashuffle1.xml tools/mytools/._fastashuffle2.xml tools/mytools/._fastqdump.xml tools/mytools/._fimo2-old.xml tools/mytools/._fimo2.xml tools/mytools/._fimo2bed.py tools/mytools/._fimo2bed.xml tools/mytools/._genomeView.xml tools/mytools/._genomeview-old2.r tools/mytools/._genomeview.r tools/mytools/._genomeview_notused tools/mytools/._headtail.xml tools/mytools/._intersectSig.xml tools/mytools/._intersectbed.xml tools/mytools/._intervalSize.xml tools/mytools/._iupac2meme.xml tools/mytools/._makebigwig.sh tools/mytools/._makebigwig.sh-old tools/mytools/._makebigwig.xml tools/mytools/._makewindow.xml tools/mytools/._meme.xml tools/mytools/._memelogo.xml tools/mytools/._metaintv.xml tools/mytools/._metaintv_ext.xml tools/mytools/._phastCons.xml tools/mytools/._plotmatrix.xml tools/mytools/._r_wrapper.sh tools/mytools/._r_wrapper_old.sh tools/mytools/._random_interval.py tools/mytools/._random_interval.xml tools/mytools/._removeDuplicate.xml tools/mytools/._resize.xml tools/mytools/._revcompl.py tools/mytools/._revcompl.xml tools/mytools/._sampline.py tools/mytools/._seq2meme.py tools/mytools/._seq2meme.xml tools/mytools/._seqshuffle.py tools/mytools/._shuffleBed.py tools/mytools/._shuffleBed.xml tools/mytools/._shuffleSequenceUsingAltschulErikson.txt tools/mytools/._spatial_proximity.xml tools/mytools/._splicesite.xml tools/mytools/._splicesitescore tools/mytools/._stats.txt tools/mytools/._venn.xml tools/mytools/.sorted.bed tools/mytools/AATAAA.motif tools/mytools/StartGenometriCorr.xml tools/mytools/Start_GenometriCorr.R tools/mytools/align2database.py tools/mytools/align2database.xml tools/mytools/align2multiple.xml tools/mytools/alignr.py tools/mytools/alignr.xml tools/mytools/alignvis.py tools/mytools/alignvis.r tools/mytools/alignvis.xml tools/mytools/altschulEriksonDinuclShuffle.py tools/mytools/bedClean.py tools/mytools/bed_to_bam.xml tools/mytools/bedclean.xml tools/mytools/bedsort.xml tools/mytools/bigWigAverageOverBed.xml tools/mytools/binaverage.xml tools/mytools/binnedAverage.py tools/mytools/bowtie2bed.pl tools/mytools/bowtie2bed.xml tools/mytools/bwBinavg.xml tools/mytools/cdf-old-not-used/._cdf.xml tools/mytools/cdf-old-not-used/._cdf2-old.xml tools/mytools/cdf-old-not-used/cdf.py tools/mytools/cdf-old-not-used/cdf.xml tools/mytools/cdf-old-not-used/cdf2-old.xml tools/mytools/cdf-old-not-used/cdf2.py tools/mytools/cdf.r tools/mytools/cdf.xml tools/mytools/closestBed.py tools/mytools/closestBed.xml tools/mytools/collapseBed.py tools/mytools/collapseBed.xml tools/mytools/collapseBed2.py tools/mytools/collapseTab.py tools/mytools/collapseTab.xml tools/mytools/convertEnsembl.py tools/mytools/convertEnsembl.xml tools/mytools/dreme.xml tools/mytools/dreme_out/dreme.html tools/mytools/dreme_out/dreme.txt tools/mytools/dreme_out/dreme.xml tools/mytools/endbias.py tools/mytools/endbias.xml tools/mytools/fasta-dinucleotide-shuffle.py tools/mytools/fastamarkov.xml tools/mytools/fastashuffle1.xml tools/mytools/fastashuffle2.xml tools/mytools/fastqdump.xml tools/mytools/fimo2-old.xml tools/mytools/fimo2.xml tools/mytools/fimo2bed.py tools/mytools/fimo2bed.xml tools/mytools/fimo_out/cisml.css tools/mytools/fimo_out/cisml.xml tools/mytools/fimo_out/fimo-to-html.xsl tools/mytools/fimo_out/fimo.gff tools/mytools/fimo_out/fimo.html tools/mytools/fimo_out/fimo.txt tools/mytools/fimo_out/fimo.wig tools/mytools/fimo_out/fimo.xml tools/mytools/genomeView.xml tools/mytools/genomeview-old2.r tools/mytools/genomeview.r tools/mytools/genomeview_notused tools/mytools/getGenomicScore.py tools/mytools/headtail.xml tools/mytools/intersectSig.py tools/mytools/intersectSig.xml tools/mytools/intersectbed.xml tools/mytools/intervalOverlap.py tools/mytools/intervalSize.py tools/mytools/intervalSize.xml tools/mytools/iupac2meme.xml tools/mytools/makebigwig.sh tools/mytools/makebigwig.sh-old tools/mytools/makebigwig.xml tools/mytools/makewindow.py tools/mytools/makewindow.xml tools/mytools/meme.xml tools/mytools/memelogo.xml tools/mytools/metaintv.py tools/mytools/metaintv.xml tools/mytools/metaintv2.py tools/mytools/metaintv3.py tools/mytools/metaintv_ext.py tools/mytools/metaintv_ext.xml tools/mytools/phastCons.xml tools/mytools/plotmatrix.py tools/mytools/plotmatrix.xml tools/mytools/ptb-3t3 tools/mytools/ptb-ptb tools/mytools/r_wrapper.sh tools/mytools/r_wrapper_old.sh tools/mytools/random_interval.py tools/mytools/random_interval.xml tools/mytools/removeDuplicate.xml tools/mytools/resize.py tools/mytools/resize.xml tools/mytools/revcompl.py tools/mytools/revcompl.xml tools/mytools/sampline.py tools/mytools/sampline.xml tools/mytools/seq2meme.py tools/mytools/seq2meme.xml tools/mytools/seqshuffle.py tools/mytools/sequence.py tools/mytools/shuffleBed.py tools/mytools/shuffleBed.xml tools/mytools/shuffleSequenceUsingAltschulErikson.txt tools/mytools/spatial_proximity.py tools/mytools/spatial_proximity.xml tools/mytools/splicesite.xml tools/mytools/splicesitescore/._me2x5 tools/mytools/splicesitescore/._score3.pl tools/mytools/splicesitescore/._score5.pl tools/mytools/splicesitescore/._splicemodels tools/mytools/splicesitescore/._test3 tools/mytools/splicesitescore/._test3.fa tools/mytools/splicesitescore/._test5 tools/mytools/splicesitescore/._test5.fa tools/mytools/splicesitescore/me2x5 tools/mytools/splicesitescore/score3.pl tools/mytools/splicesitescore/score5.pl tools/mytools/splicesitescore/splicemodels/._hashseq.m tools/mytools/splicesitescore/splicemodels/._hashseq.m~ tools/mytools/splicesitescore/splicemodels/._me1s0acc1 tools/mytools/splicesitescore/splicemodels/._me1s0acc2 tools/mytools/splicesitescore/splicemodels/._me1s0acc3 tools/mytools/splicesitescore/splicemodels/._me1s0acc4 tools/mytools/splicesitescore/splicemodels/._me1s0acc5 tools/mytools/splicesitescore/splicemodels/._me1s0acc6 tools/mytools/splicesitescore/splicemodels/._me1s0acc7 tools/mytools/splicesitescore/splicemodels/._me1s0acc8 tools/mytools/splicesitescore/splicemodels/._me1s0acc9 tools/mytools/splicesitescore/splicemodels/._me2s0 tools/mytools/splicesitescore/splicemodels/._me2s0acc1 tools/mytools/splicesitescore/splicemodels/._me2s0acc2 tools/mytools/splicesitescore/splicemodels/._me2s0acc3 tools/mytools/splicesitescore/splicemodels/._me2s0acc4 tools/mytools/splicesitescore/splicemodels/._me2s0acc5 tools/mytools/splicesitescore/splicemodels/._me2s0acc6 tools/mytools/splicesitescore/splicemodels/._me2s0acc7 tools/mytools/splicesitescore/splicemodels/._me2s0acc8 tools/mytools/splicesitescore/splicemodels/._me2s0acc9 tools/mytools/splicesitescore/splicemodels/._me2x3acc1 tools/mytools/splicesitescore/splicemodels/._me2x3acc2 tools/mytools/splicesitescore/splicemodels/._me2x3acc3 tools/mytools/splicesitescore/splicemodels/._me2x3acc4 tools/mytools/splicesitescore/splicemodels/._me2x3acc5 tools/mytools/splicesitescore/splicemodels/._me2x3acc6 tools/mytools/splicesitescore/splicemodels/._me2x3acc7 tools/mytools/splicesitescore/splicemodels/._me2x3acc8 tools/mytools/splicesitescore/splicemodels/._me2x3acc9 tools/mytools/splicesitescore/splicemodels/._me2x5 tools/mytools/splicesitescore/splicemodels/._splice5sequences tools/mytools/splicesitescore/splicemodels/hashseq.m tools/mytools/splicesitescore/splicemodels/hashseq.m~ tools/mytools/splicesitescore/splicemodels/me1s0acc1 tools/mytools/splicesitescore/splicemodels/me1s0acc2 tools/mytools/splicesitescore/splicemodels/me1s0acc3 tools/mytools/splicesitescore/splicemodels/me1s0acc4 tools/mytools/splicesitescore/splicemodels/me1s0acc5 tools/mytools/splicesitescore/splicemodels/me1s0acc6 tools/mytools/splicesitescore/splicemodels/me1s0acc7 tools/mytools/splicesitescore/splicemodels/me1s0acc8 tools/mytools/splicesitescore/splicemodels/me1s0acc9 tools/mytools/splicesitescore/splicemodels/me2s0 tools/mytools/splicesitescore/splicemodels/me2s0acc1 tools/mytools/splicesitescore/splicemodels/me2s0acc2 tools/mytools/splicesitescore/splicemodels/me2s0acc3 tools/mytools/splicesitescore/splicemodels/me2s0acc4 tools/mytools/splicesitescore/splicemodels/me2s0acc5 tools/mytools/splicesitescore/splicemodels/me2s0acc6 tools/mytools/splicesitescore/splicemodels/me2s0acc7 tools/mytools/splicesitescore/splicemodels/me2s0acc8 tools/mytools/splicesitescore/splicemodels/me2s0acc9 tools/mytools/splicesitescore/splicemodels/me2x3acc1 tools/mytools/splicesitescore/splicemodels/me2x3acc2 tools/mytools/splicesitescore/splicemodels/me2x3acc3 tools/mytools/splicesitescore/splicemodels/me2x3acc4 tools/mytools/splicesitescore/splicemodels/me2x3acc5 tools/mytools/splicesitescore/splicemodels/me2x3acc6 tools/mytools/splicesitescore/splicemodels/me2x3acc7 tools/mytools/splicesitescore/splicemodels/me2x3acc8 tools/mytools/splicesitescore/splicemodels/me2x3acc9 tools/mytools/splicesitescore/splicemodels/me2x5 tools/mytools/splicesitescore/splicemodels/splice5sequences tools/mytools/splicesitescore/test3 tools/mytools/splicesitescore/test3.fa tools/mytools/splicesitescore/test5 tools/mytools/splicesitescore/test5.fa tools/mytools/stats.txt tools/mytools/venn.xml tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/blastxml_to_tabular.xml tools/ncbi_blast_plus/hide_stderr.py tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml tools/new_operations/basecoverage.xml tools/new_operations/cluster.xml tools/new_operations/column_join.py tools/new_operations/column_join.xml tools/new_operations/complement.xml tools/new_operations/concat.xml tools/new_operations/coverage.xml tools/new_operations/flanking_features.py tools/new_operations/flanking_features.xml tools/new_operations/get_flanks.py tools/new_operations/get_flanks.xml tools/new_operations/gops_basecoverage.py tools/new_operations/gops_cluster.py tools/new_operations/gops_complement.py tools/new_operations/gops_concat.py tools/new_operations/gops_coverage.py tools/new_operations/gops_intersect.py tools/new_operations/gops_join.py tools/new_operations/gops_merge.py tools/new_operations/gops_subtract.py tools/new_operations/intersect.xml tools/new_operations/join.xml tools/new_operations/merge.xml tools/new_operations/operation_filter.py tools/new_operations/subtract.xml tools/new_operations/subtract_query.py tools/new_operations/subtract_query.xml tools/new_operations/tables_arithmetic_operations.pl tools/new_operations/tables_arithmetic_operations.xml tools/next_gen_conversion/bwa_solid2fastq_modified.pl tools/next_gen_conversion/fastq_conversions.py tools/next_gen_conversion/fastq_conversions.xml tools/next_gen_conversion/fastq_gen_conv.py tools/next_gen_conversion/fastq_gen_conv.xml tools/next_gen_conversion/solid2fastq.py tools/next_gen_conversion/solid2fastq.xml tools/next_gen_conversion/solid_to_fastq.py tools/next_gen_conversion/solid_to_fastq.xml tools/ngs_rna/cuffcompare_wrapper.py tools/ngs_rna/cuffcompare_wrapper.xml tools/ngs_rna/cuffdiff_wrapper.py tools/ngs_rna/cuffdiff_wrapper.xml tools/ngs_rna/cufflinks_wrapper.py tools/ngs_rna/cufflinks_wrapper.xml tools/ngs_rna/filter_transcripts_via_tracking.py tools/ngs_rna/filter_transcripts_via_tracking.xml tools/ngs_rna/tophat_color_wrapper.xml tools/ngs_rna/tophat_wrapper.py tools/ngs_rna/tophat_wrapper.xml tools/ngs_rna/trinity_all.xml tools/ngs_simulation/ngs_simulation.py tools/ngs_simulation/ngs_simulation.xml tools/peak_calling/ccat_2_wrapper.xml tools/peak_calling/ccat_wrapper.py tools/peak_calling/ccat_wrapper.xml tools/peak_calling/macs_wrapper.py tools/peak_calling/macs_wrapper.xml tools/peak_calling/sicer_wrapper.py tools/peak_calling/sicer_wrapper.xml tools/picard/picard_AddOrReplaceReadGroups.xml tools/picard/picard_BamIndexStats.xml tools/picard/picard_MarkDuplicates.xml tools/picard/picard_ReorderSam.xml tools/picard/picard_ReplaceSamHeader.xml tools/picard/picard_wrapper.py tools/picard/rgPicardASMetrics.xml tools/picard/rgPicardFixMate.xml tools/picard/rgPicardGCBiasMetrics.xml tools/picard/rgPicardHsMetrics.xml tools/picard/rgPicardInsertSize.xml tools/picard/rgPicardLibComplexity.xml tools/picard/rgPicardMarkDups.xml tools/plotting/bar_chart.py tools/plotting/bar_chart.xml tools/plotting/boxplot.xml tools/plotting/histogram.py tools/plotting/histogram2.xml tools/plotting/plot_filter.py tools/plotting/plotter.py tools/plotting/r_wrapper.sh tools/plotting/scatterplot.py tools/plotting/scatterplot.xml tools/plotting/xy_plot.xml tools/regVariation/best_regression_subsets.py tools/regVariation/best_regression_subsets.xml tools/regVariation/categorize_elements_satisfying_criteria.pl tools/regVariation/categorize_elements_satisfying_criteria.xml tools/regVariation/compute_motif_frequencies_for_all_motifs.pl tools/regVariation/compute_motif_frequencies_for_all_motifs.xml tools/regVariation/compute_motifs_frequency.pl tools/regVariation/compute_motifs_frequency.xml tools/regVariation/compute_q_values.pl tools/regVariation/compute_q_values.xml tools/regVariation/delete_overlapping_indels.pl tools/regVariation/delete_overlapping_indels.xml tools/regVariation/draw_stacked_barplots.pl tools/regVariation/draw_stacked_barplots.xml tools/regVariation/featureCounter.py tools/regVariation/featureCounter.xml tools/regVariation/getIndelRates_3way.py tools/regVariation/getIndelRates_3way.xml tools/regVariation/getIndels.py tools/regVariation/getIndels_2way.xml tools/regVariation/getIndels_3way.xml tools/regVariation/linear_regression.py tools/regVariation/linear_regression.xml tools/regVariation/maf_cpg_filter.py tools/regVariation/maf_cpg_filter.xml tools/regVariation/microsatellite_birthdeath.pl tools/regVariation/microsatellite_birthdeath.xml tools/regVariation/microsats_alignment_level.py tools/regVariation/microsats_alignment_level.xml tools/regVariation/microsats_mutability.py tools/regVariation/microsats_mutability.xml tools/regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl tools/regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.xml tools/regVariation/parseMAF_smallIndels.pl tools/regVariation/quality_filter.py tools/regVariation/quality_filter.xml tools/regVariation/qv_to_bqv.py tools/regVariation/qv_to_bqv.xml tools/regVariation/rcve.py tools/regVariation/rcve.xml tools/regVariation/substitution_rates.py tools/regVariation/substitution_rates.xml tools/regVariation/substitutions.py tools/regVariation/substitutions.xml tools/regVariation/t_test_two_samples.pl tools/regVariation/t_test_two_samples.xml tools/regVariation/windowSplitter.py tools/regVariation/windowSplitter.xml tools/rgenetics/listFiles.py tools/rgenetics/plinkbinJZ.py tools/rgenetics/plinkbinJZ.pyc tools/rgenetics/rgCaCo.py tools/rgenetics/rgCaCo.xml tools/rgenetics/rgClean.py tools/rgenetics/rgClean.xml tools/rgenetics/rgClustalw.py tools/rgenetics/rgClustalw.xml tools/rgenetics/rgEigPCA.py tools/rgenetics/rgEigPCA.xml tools/rgenetics/rgFastQC.py tools/rgenetics/rgFastQC.xml tools/rgenetics/rgGLM.py tools/rgenetics/rgGLM.xml tools/rgenetics/rgGLM_code.py tools/rgenetics/rgGRR.py tools/rgenetics/rgGRR.xml tools/rgenetics/rgGTOOL.py tools/rgenetics/rgGTOOL.xml tools/rgenetics/rgHaploView.py tools/rgenetics/rgHaploView.xml tools/rgenetics/rgLDIndep.py tools/rgenetics/rgLDIndep.xml tools/rgenetics/rgLDIndep_code.py tools/rgenetics/rgManQQ.py tools/rgenetics/rgManQQ.xml tools/rgenetics/rgManQQ_code.py tools/rgenetics/rgPedSub.py tools/rgenetics/rgPedSub.xml tools/rgenetics/rgQC.py tools/rgenetics/rgQC.xml tools/rgenetics/rgQQ.py tools/rgenetics/rgQQ.xml tools/rgenetics/rgQQ_code.py tools/rgenetics/rgRegion.py tools/rgenetics/rgRegion.xml tools/rgenetics/rgTDT.py tools/rgenetics/rgTDT.xml tools/rgenetics/rgWebLogo3.py tools/rgenetics/rgWebLogo3.xml tools/rgenetics/rgfakePed.py tools/rgenetics/rgfakePed.xml tools/rgenetics/rgfakePhe.py tools/rgenetics/rgfakePhe.xml tools/rgenetics/rgtest.sh tools/rgenetics/rgtest_one_tool.sh tools/rgenetics/rgutils.py tools/rgenetics/rgutils.pyc tools/rgenetics/test tools/rgenetics/test.eps tools/rgenetics/test.pdf tools/rgenetics/test.png tools/samtools/bam_to_sam.py tools/samtools/bam_to_sam.xml tools/samtools/pileup_interval.py tools/samtools/pileup_interval.xml tools/samtools/pileup_parser.pl tools/samtools/pileup_parser.xml tools/samtools/sam2interval.py tools/samtools/sam2interval.xml tools/samtools/sam_bitwise_flag_filter.py tools/samtools/sam_bitwise_flag_filter.xml tools/samtools/sam_merge.py tools/samtools/sam_merge.xml tools/samtools/sam_merge_code.py tools/samtools/sam_pileup.py tools/samtools/sam_pileup.xml tools/samtools/sam_to_bam.py tools/samtools/sam_to_bam.xml tools/samtools/samtools_flagstat.xml tools/solid_tools/maq_cs_wrapper.py tools/solid_tools/maq_cs_wrapper.xml tools/solid_tools/maq_cs_wrapper_code.py tools/solid_tools/qualsolid_boxplot_graph.sh tools/solid_tools/solid_qual_boxplot.xml tools/solid_tools/solid_qual_stats.py tools/solid_tools/solid_qual_stats.xml tools/sr_assembly/velvetg.xml tools/sr_assembly/velvetg_wrapper.py tools/sr_assembly/velveth.xml tools/sr_assembly/velveth_wrapper.py tools/sr_mapping/PerM.xml tools/sr_mapping/bfast_wrapper.py tools/sr_mapping/bfast_wrapper.xml tools/sr_mapping/bowtie_color_wrapper.xml tools/sr_mapping/bowtie_wrapper.py tools/sr_mapping/bowtie_wrapper.xml tools/sr_mapping/bwa_color_wrapper.xml tools/sr_mapping/bwa_wrapper.py tools/sr_mapping/bwa_wrapper.xml tools/sr_mapping/fastq_statistics.xml tools/sr_mapping/lastz_paired_reads_wrapper.py tools/sr_mapping/lastz_paired_reads_wrapper.xml tools/sr_mapping/lastz_wrapper.py tools/sr_mapping/lastz_wrapper.xml tools/sr_mapping/mosaik.xml tools/sr_mapping/srma_wrapper.py tools/sr_mapping/srma_wrapper.xml tools/stats/aggregate_binned_scores_in_intervals.xml tools/stats/aggregate_scores_in_intervals.py tools/stats/column_maker.py tools/stats/column_maker.xml tools/stats/cor.py tools/stats/cor.xml tools/stats/correlation.pl tools/stats/correlation.xml tools/stats/count_gff_features.py tools/stats/count_gff_features.xml tools/stats/dna_filtering.py tools/stats/dna_filtering.xml tools/stats/filtering.py tools/stats/filtering.xml tools/stats/generate_matrix_for_pca_lda.pl tools/stats/generate_matrix_for_pca_lda.xml tools/stats/grouping.py tools/stats/grouping.xml tools/stats/gsummary.py tools/stats/gsummary.xml tools/stats/gsummary.xml.groups tools/stats/lda_analy.xml tools/stats/plot_from_lda.xml tools/stats/r_wrapper.sh tools/stats/wiggle_to_simple.py tools/stats/wiggle_to_simple.xml tools/taxonomy/find_diag_hits.py tools/taxonomy/find_diag_hits.xml tools/taxonomy/gi2taxonomy.py tools/taxonomy/gi2taxonomy.xml tools/taxonomy/lca.py tools/taxonomy/lca.xml tools/taxonomy/poisson2test.py tools/taxonomy/poisson2test.xml tools/taxonomy/t2ps_wrapper.py tools/taxonomy/t2ps_wrapper.xml tools/taxonomy/t2t_report.xml tools/tool_conf.xml tools/unix_tools/._awk_tool.xml tools/unix_tools/._awk_wrapper.sh tools/unix_tools/._cut_tool.xml tools/unix_tools/._cut_wrapper.sh tools/unix_tools/._find_and_replace.pl tools/unix_tools/._find_and_replace.xml tools/unix_tools/._grep_tool.xml tools/unix_tools/._grep_wrapper.sh tools/unix_tools/._grep_wrapper_old.sh tools/unix_tools/._join_tool.sh tools/unix_tools/._join_tool.xml tools/unix_tools/._remove_ending.sh tools/unix_tools/._remove_ending.xml tools/unix_tools/._sed_tool.xml tools/unix_tools/._sed_wrapper.sh tools/unix_tools/._sort_tool.xml tools/unix_tools/._uniq_tool.xml tools/unix_tools/._word_list_grep.pl tools/unix_tools/._word_list_grep.xml tools/unix_tools/awk_tool.xml tools/unix_tools/awk_wrapper.sh tools/unix_tools/cut_tool.xml tools/unix_tools/cut_wrapper.sh tools/unix_tools/find_and_replace.pl tools/unix_tools/find_and_replace.xml tools/unix_tools/grep_tool.xml tools/unix_tools/grep_wrapper.sh tools/unix_tools/grep_wrapper_old.sh tools/unix_tools/join_tool.sh tools/unix_tools/join_tool.xml tools/unix_tools/remove_ending.sh tools/unix_tools/remove_ending.xml tools/unix_tools/sed_tool.xml tools/unix_tools/sed_wrapper.sh tools/unix_tools/sort_tool.xml tools/unix_tools/uniq_tool.xml tools/unix_tools/word_list_grep.pl tools/unix_tools/word_list_grep.xml tools/validation/fix_errors.py tools/validation/fix_errors.xml tools/validation/fix_errors_code.py tools/validation/validate.py tools/vcf_tools/annotate.py tools/vcf_tools/annotate.xml tools/vcf_tools/bedClass.py tools/vcf_tools/extract.py tools/vcf_tools/extract.xml tools/vcf_tools/filter.py tools/vcf_tools/filter.xml tools/vcf_tools/intersect.py tools/vcf_tools/intersect.xml tools/vcf_tools/tools.py tools/vcf_tools/vcfClass.py tools/vcf_tools/vcfPytools.py tools/visualization/GMAJ.py tools/visualization/GMAJ.xml tools/visualization/LAJ.py tools/visualization/LAJ.xml tools/visualization/LAJ_code.py tools/visualization/build_ucsc_custom_track.py tools/visualization/build_ucsc_custom_track.xml tools/visualization/build_ucsc_custom_track_code.py
diffstat 1094 files changed, 450165 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file tools/.DS_Store has changed
Binary file tools/._.DS_Store has changed
Binary file tools/._mytools has changed
Binary file tools/._tool_conf.xml has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/annotation_profiler/annotation_profiler.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,136 @@
+<tool id="Annotation_Profiler_0" name="Profile Annotations" version="1.0.0">
+  <description>for a set of genomic intervals</description>
+  <command interpreter="python">annotation_profiler_for_interval.py -i $input1 -c ${input1.metadata.chromCol} -s ${input1.metadata.startCol} -e ${input1.metadata.endCol} -o $out_file1 $keep_empty -p ${GALAXY_DATA_INDEX_DIR}/annotation_profiler/$dbkey $summary -b 3 -t $table_names</command>
+  <inputs>
+    <param format="interval" name="input1" type="data" label="Choose Intervals">
+      <validator type="dataset_metadata_in_file" filename="annotation_profiler_valid_builds.txt" metadata_name="dbkey" metadata_column="0" message="Profiling is not currently available for this species."/>
+    </param>
+    <param name="keep_empty" type="select" label="Keep Region/Table Pairs with 0 Coverage">
+      <option value="-k">Keep</option>
+      <option value="" selected="true">Discard</option>
+    </param>
+    <param name="summary" type="select" label="Output per Region/Summary">
+      <option value="-S">Summary</option>
+      <option value="" selected="true">Per Region</option>
+    </param>
+    <param name="table_names" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true" label="Choose Tables to Use" help="Selecting no tables will result in using all tables." from_file="annotation_profiler_options.xml"/>
+   </inputs>
+   <outputs>
+     <data format="input" name="out_file1">
+       <change_format>
+         <when input="summary" value="-S" format="tabular" />
+       </change_format>
+     </data>
+   </outputs>
+   <tests>
+     <test>
+       <param name="input1" value="4.bed" dbkey="hg18"/>
+       <param name="keep_empty" value=""/>
+       <param name="summary" value=""/>
+       <param name="table_names" value="acembly,affyGnf1h,knownAlt,knownGene,mrna,multiz17way,multiz28way,refGene,snp126"/>
+       <output name="out_file1" file="annotation_profiler_1.out" />
+     </test>
+     <test>
+       <param name="input1" value="3.bed" dbkey="hg18"/>
+       <param name="keep_empty" value=""/>
+       <param name="summary" value="Summary"/>
+       <param name="table_names" value="acembly,affyGnf1h,knownAlt,knownGene,mrna,multiz17way,multiz28way,refGene,snp126"/>
+       <output name="out_file1" file="annotation_profiler_2.out" />
+     </test>
+   </tests>
+   <help>
+**What it does**
+
+Takes an input set of intervals and for each interval determines the base coverage of the interval by a set of features (tables) available from UCSC. Genomic regions from the input feature data have been merged by overlap / direct adjacency (e.g. a table having ranges of: 1-10, 6-12, 12-20 and 25-28 results in two merged ranges of: 1-20 and 25-28).
+
+By default, this tool will check the coverage of your intervals against all available features; you may, however, choose to select only those tables that you want to include. Selecting a section heading will effectively cause all of its children to be selected.
+
+You may alternatively choose to receive a summary across all of the intervals that you provide.
+
+-----
+
+**Example**
+
+Using the interval below and selecting several tables::
+
+ chr1 4558 14764 uc001aab.1 0 -
+
+results in::
+
+ chr1 4558 14764 uc001aab.1 0 - snp126Exceptions 151 142
+ chr1 4558 14764 uc001aab.1 0 - genomicSuperDups 10206 1
+ chr1 4558 14764 uc001aab.1 0 - chainOryLat1 3718 1
+ chr1 4558 14764 uc001aab.1 0 - multiz28way 10206 1
+ chr1 4558 14764 uc001aab.1 0 - affyHuEx1 3553 32
+ chr1 4558 14764 uc001aab.1 0 - netXenTro2 3050 1
+ chr1 4558 14764 uc001aab.1 0 - intronEst 10206 1
+ chr1 4558 14764 uc001aab.1 0 - xenoMrna 10203 1
+ chr1 4558 14764 uc001aab.1 0 - ctgPos 10206 1
+ chr1 4558 14764 uc001aab.1 0 - clonePos 10206 1
+ chr1 4558 14764 uc001aab.1 0 - chainStrPur2Link 1323 29
+ chr1 4558 14764 uc001aab.1 0 - affyTxnPhase3HeLaNuclear 9011 8
+ chr1 4558 14764 uc001aab.1 0 - snp126orthoPanTro2RheMac2 61 58
+ chr1 4558 14764 uc001aab.1 0 - snp126 205 192
+ chr1 4558 14764 uc001aab.1 0 - chainEquCab1 10206 1
+ chr1 4558 14764 uc001aab.1 0 - netGalGal3 3686 1
+ chr1 4558 14764 uc001aab.1 0 - phastCons28wayPlacMammal 10172 3
+
+Where::
+
+ The first added column is the table name.
+ The second added column is the number of bases covered by the table.
+ The third added column is the number of regions from the table that is covered by the interval.
+
+Alternatively, requesting a summary, using the intervals below and selecting several tables::
+
+ chr1 4558 14764 uc001aab.1 0 -
+ chr1 4558 19346 uc001aac.1 0 -
+
+results in::
+
+ #tableName tableSize tableRegionCount allIntervalCount allIntervalSize allCoverage allTableRegionsOverlaped allIntervalsOverlapingTable nrIntervalCount nrIntervalSize nrCoverage nrTableRegionsOverlaped nrIntervalsOverlapingTable
+ snp126Exceptions 133601 92469 2 24994 388 359 2 1 14788 237 217 1
+ genomicSuperDups 12268847 657 2 24994 24994 2 2 1 14788 14788 1 1
+ chainOryLat1 70337730 2542 2 24994 7436 2 2 1 14788 3718 1 1
+ affyHuEx1 15703901 112274 2 24994 7846 70 2 1 14788 4293 38 1
+ netXenTro2 111440392 1877 2 24994 6100 2 2 1 14788 3050 1 1
+ snp126orthoPanTro2RheMac2 700436 690674 2 24994 124 118 2 1 14788 63 60 1
+ intronEst 135796064 2332 2 24994 24994 2 2 1 14788 14788 1 1
+ xenoMrna 129031327 1586 2 24994 20406 2 2 1 14788 10203 1 1
+ snp126 956976 838091 2 24994 498 461 2 1 14788 293 269 1
+ clonePos 224999719 39 2 24994 24994 2 2 1 14788 14788 1 1
+ chainStrPur2Link 7948016 119841 2 24994 2646 58 2 1 14788 1323 29 1
+ affyTxnPhase3HeLaNuclear 136797870 140244 2 24994 22601 17 2 1 14788 13590 9 1
+ multiz28way 225928588 38 2 24994 24994 2 2 1 14788 14788 1 1
+ ctgPos 224999719 39 2 24994 24994 2 2 1 14788 14788 1 1
+ chainEquCab1 246306414 141 2 24994 24994 2 2 1 14788 14788 1 1
+ netGalGal3 203351973 461 2 24994 7372 2 2 1 14788 3686 1 1
+ phastCons28wayPlacMammal 221017670 22803 2 24994 24926 6 2 1 14788 14754 3 1
+
+Where::
+ 
+ tableName is the name of the table
+ tableChromosomeCoverage is the number of positions existing in the table for only the chromosomes that were referenced by the interval file
+ tableChromosomeCount is the number of regions existing in the table for only the chromosomes that were referenced by the interval file
+ tableRegionCoverage is the number of positions existing in the table between the minimal and maximal bounding regions that were referenced by the interval file
+ tableRegionCount is the number of regions existing in the table between the minimal and maximal bounding regions that were referenced by the interval file
+ 
+ allIntervalCount is the number of provided intervals
+ allIntervalSize is the sum of the lengths of the provided interval file
+ allCoverage is the sum of the coverage for each provided interval
+ allTableRegionsOverlapped is the sum of the number of regions of the table (non-unique) that were overlapped for each interval
+ allIntervalsOverlappingTable is the number of provided intervals which overlap the table
+ 
+ nrIntervalCount is the number of non-redundant intervals
+ nrIntervalSize is the sum of the lengths of non-redundant intervals
+ nrCoverage is the sum of the coverage of non-redundant intervals
+ nrTableRegionsOverlapped is the number of regions of the table (unique) that were overlapped by the non-redundant intervals
+ nrIntervalsOverlappingTable is the number of non-redundant intervals which overlap the table
+ 
+
+.. class:: infomark
+
+**TIP:** non-redundant (nr) refers to the set of intervals that remains after the intervals provided have been merged to resolve overlaps
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/annotation_profiler/annotation_profiler_for_interval.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,360 @@
+#!/usr/bin/env python
+#Dan Blankenberg
+#For a set of intervals, this tool returns the same set of intervals 
+#with 2 additional fields: the name of a Table/Feature and the number of
+#bases covered. The original intervals are repeated for each Table/Feature.
+
+import sys, struct, optparse, os, random
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+import bx.intervals.io
+import bx.bitset
+try:
+    import psyco
+    psyco.full()
+except:
+    pass
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+class CachedRangesInFile:
+    DEFAULT_STRUCT_FORMAT = '<I'
+    def __init__( self, filename, profiler_info ):
+        self.file_size = os.stat( filename ).st_size
+        self.file = open( filename, 'rb' )
+        self.filename = filename
+        self.fmt = profiler_info.get( 'profiler_struct_format', self.DEFAULT_STRUCT_FORMAT )
+        self.fmt_size = int( profiler_info.get( 'profiler_struct_size', struct.calcsize( self.fmt ) ) )
+        self.length = int( self.file_size / self.fmt_size / 2 )
+        self._cached_ranges = [ None for i in xrange( self.length ) ]
+    def __getitem__( self, i ):
+        if self._cached_ranges[i] is not None:
+            return self._cached_ranges[i]
+        if i < 0: i = self.length + i
+        offset = i * self.fmt_size * 2
+        self.file.seek( offset )
+        try:
+            start = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
+            end = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
+        except Exception, e:
+            raise IndexError, e
+        self._cached_ranges[i] = ( start, end )
+        return start, end
+    def __len__( self ):
+        return self.length
+
+class RegionCoverage:
+    def __init__( self, filename_base, profiler_info ):
+        try:
+            self._coverage = CachedRangesInFile( "%s.covered" % filename_base, profiler_info )
+        except Exception, e:
+            #print "Error loading coverage file %s: %s" % ( "%s.covered" % filename_base, e )
+            self._coverage = []
+        try: 
+            self._total_coverage = int( open( "%s.total_coverage" % filename_base ).read() )
+        except Exception, e:
+            #print "Error loading total coverage file %s: %s" % ( "%s.total_coverage" % filename_base, e )
+            self._total_coverage = 0
+    def get_start_index( self, start ):
+        #binary search: returns index of range closest to start
+        if start > self._coverage[-1][1]:
+            return len( self._coverage ) - 1
+        i = 0
+        j = len( self._coverage) - 1
+        while i < j:
+            k = ( i + j ) / 2
+            if start <= self._coverage[k][1]:
+                j = k
+            else:
+                i = k + 1
+        return i
+    def get_coverage( self, start, end ):
+        return self.get_coverage_regions_overlap( start, end )[0]
+    def get_coverage_regions_overlap( self, start, end ):
+        return self.get_coverage_regions_index_overlap( start, end )[0:2]
+    def get_coverage_regions_index_overlap( self, start, end ):
+        if len( self._coverage ) < 1 or start > self._coverage[-1][1] or end < self._coverage[0][0]:
+            return 0, 0, 0
+        if self._total_coverage and start <= self._coverage[0][0] and end >= self._coverage[-1][1]:
+            return self._total_coverage, len( self._coverage ), 0
+        coverage = 0
+        region_count = 0
+        start_index = self.get_start_index( start )
+        for i in xrange( start_index, len( self._coverage ) ):
+            c_start, c_end = self._coverage[i]
+            if c_start > end:
+                break
+            if c_start <= end and c_end >= start:
+                coverage += min( end, c_end ) - max( start, c_start )
+                region_count += 1
+        return coverage, region_count, start_index
+
+class CachedCoverageReader:
+    def __init__( self, base_file_path, buffer = 10, table_names = None, profiler_info = None ):
+        self._base_file_path = base_file_path
+        self._buffer = buffer #number of chromosomes to keep in memory at a time
+        self._coverage = {}
+        if table_names is None: table_names = [ table_dir for table_dir in os.listdir( self._base_file_path ) if os.path.isdir( os.path.join( self._base_file_path, table_dir ) ) ]
+        for tablename in table_names: self._coverage[tablename] = {}
+        if profiler_info is None: profiler_info = {}
+        self._profiler_info = profiler_info
+    def iter_table_coverage_by_region( self, chrom, start, end ):
+        for tablename, coverage, regions in self.iter_table_coverage_regions_by_region( chrom, start, end ):
+            yield tablename, coverage
+    def iter_table_coverage_regions_by_region( self, chrom, start, end ):
+        for tablename, coverage, regions, index in self.iter_table_coverage_regions_index_by_region( chrom, start, end ):
+            yield tablename, coverage, regions
+    def iter_table_coverage_regions_index_by_region( self, chrom, start, end ):
+        for tablename, chromosomes in self._coverage.iteritems():
+            if chrom not in chromosomes:
+                if len( chromosomes ) >= self._buffer:
+                    #randomly remove one chromosome from this table
+                    del chromosomes[ chromosomes.keys().pop( random.randint( 0, self._buffer - 1 ) ) ]
+                chromosomes[chrom] = RegionCoverage( os.path.join ( self._base_file_path, tablename, chrom ), self._profiler_info )
+            coverage, regions, index = chromosomes[chrom].get_coverage_regions_index_overlap( start, end )
+            yield tablename, coverage, regions, index
+
+class TableCoverageSummary:
+    def __init__( self, coverage_reader, chrom_lengths ):
+        self.coverage_reader = coverage_reader
+        self.chrom_lengths = chrom_lengths
+        self.chromosome_coverage = {} #dict of bitset by chromosome holding user's collapsed input intervals
+        self.total_interval_size = 0 #total size of user's input intervals
+        self.total_interval_count = 0 #total number of user's input intervals
+        self.table_coverage = {} #dict of total coverage by user's input intervals by table
+        self.table_chromosome_size = {} #dict of dict of table:chrom containing total coverage of table for a chrom
+        self.table_chromosome_count = {} #dict of dict of table:chrom containing total number of coverage ranges of table for a chrom
+        self.table_regions_overlaped_count = {} #total number of table regions overlaping user's input intervals (non unique)
+        self.interval_table_overlap_count = {} #total number of user input intervals which overlap table
+        self.region_size_errors = {} #dictionary of lists of invalid ranges by chromosome
+    def add_region( self, chrom, start, end ):
+        chrom_length = self.chrom_lengths.get( chrom )
+        region_start = min( start, chrom_length )
+        region_end = min( end, chrom_length )
+        region_length = region_end - region_start
+        
+        if region_length < 1 or region_start != start or region_end != end:
+            if chrom not in self.region_size_errors:
+                self.region_size_errors[chrom] = []
+            self.region_size_errors[chrom].append( ( start, end ) )
+            if region_length < 1: return
+        
+        self.total_interval_size += region_length
+        self.total_interval_count += 1
+        if chrom not in self.chromosome_coverage:
+            self.chromosome_coverage[chrom] = bx.bitset.BitSet( chrom_length )
+        
+        self.chromosome_coverage[chrom].set_range( region_start, region_length )
+        for table_name, coverage, regions in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, region_start, region_end ):
+            if table_name not in self.table_coverage:
+                self.table_coverage[table_name] = 0
+                self.table_chromosome_size[table_name] = {}
+                self.table_regions_overlaped_count[table_name] = 0
+                self.interval_table_overlap_count[table_name] = 0
+                self.table_chromosome_count[table_name] = {}
+            if chrom not in self.table_chromosome_size[table_name]:
+                self.table_chromosome_size[table_name][chrom] = self.coverage_reader._coverage[table_name][chrom]._total_coverage
+                self.table_chromosome_count[table_name][chrom] = len( self.coverage_reader._coverage[table_name][chrom]._coverage )
+            self.table_coverage[table_name] += coverage
+            if coverage:
+                self.interval_table_overlap_count[table_name] += 1
+            self.table_regions_overlaped_count[table_name] += regions
+    def iter_table_coverage( self ):
+        def get_nr_coverage():
+            #returns non-redundant coverage, where user's input intervals have been collapse to resolve overlaps
+            table_coverage = {} #dictionary of tables containing number of table bases overlaped by nr intervals
+            interval_table_overlap_count = {} #dictionary of tables containing number of nr intervals overlaping table
+            table_regions_overlap_count = {} #dictionary of tables containing number of regions overlaped (unique)
+            interval_count = 0 #total number of nr intervals
+            interval_size = 0 #holds total size of nr intervals
+            region_start_end = {} #holds absolute start,end for each user input chromosome
+            for chrom, chromosome_bitset in self.chromosome_coverage.iteritems():
+                #loop through user's collapsed input intervals
+                end = 0
+                last_end_index = {}
+                interval_size += chromosome_bitset.count_range()
+                while True:
+                    if end >= chromosome_bitset.size: break
+                    start = chromosome_bitset.next_set( end )
+                    if start >= chromosome_bitset.size: break
+                    end = chromosome_bitset.next_clear( start )
+                    interval_count += 1
+                    if chrom not in region_start_end:
+                        region_start_end[chrom] = [start, end]
+                    else:
+                        region_start_end[chrom][1] = end
+                    for table_name, coverage, region_count, start_index in self.coverage_reader.iter_table_coverage_regions_index_by_region( chrom, start, end ):
+                        if table_name not in table_coverage:
+                            table_coverage[table_name] = 0
+                            interval_table_overlap_count[table_name] = 0
+                            table_regions_overlap_count[table_name] = 0
+                        table_coverage[table_name] += coverage
+                        if coverage:
+                            interval_table_overlap_count[table_name] += 1
+                            table_regions_overlap_count[table_name] += region_count
+                            if table_name in last_end_index and last_end_index[table_name] == start_index:
+                                table_regions_overlap_count[table_name] -= 1
+                            last_end_index[table_name] = start_index + region_count - 1
+            table_region_coverage = {} #total coverage for tables by bounding nr interval region
+            table_region_count = {} #total number for tables by bounding nr interval region
+            for chrom, start_end in region_start_end.items():
+                for table_name, coverage, region_count in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, start_end[0], start_end[1] ):
+                    if table_name not in table_region_coverage:
+                        table_region_coverage[table_name] = 0
+                        table_region_count[table_name] = 0
+                    table_region_coverage[table_name] += coverage
+                    table_region_count[table_name] += region_count
+            return table_region_coverage, table_region_count, interval_count, interval_size, table_coverage, table_regions_overlap_count, interval_table_overlap_count
+        table_region_coverage, table_region_count, nr_interval_count, nr_interval_size, nr_table_coverage, nr_table_regions_overlap_count, nr_interval_table_overlap_count = get_nr_coverage()
+        for table_name in self.table_coverage:
+            #TODO: determine a type of statistic, then calculate and report here
+            yield table_name, sum( self.table_chromosome_size.get( table_name, {} ).values() ), sum( self.table_chromosome_count.get( table_name, {} ).values() ), table_region_coverage.get( table_name, 0 ), table_region_count.get( table_name, 0 ), self.total_interval_count, self.total_interval_size,  self.table_coverage[table_name], self.table_regions_overlaped_count.get( table_name, 0), self.interval_table_overlap_count.get( table_name, 0 ), nr_interval_count, nr_interval_size, nr_table_coverage[table_name], nr_table_regions_overlap_count.get( table_name, 0 ), nr_interval_table_overlap_count.get( table_name, 0 )
+
+def profile_per_interval( interval_filename, chrom_col, start_col, end_col, out_filename, keep_empty, coverage_reader ):
+    out = open( out_filename, 'wb' )
+    for region in bx.intervals.io.NiceReaderWrapper( open( interval_filename, 'rb' ), chrom_col = chrom_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ):
+        for table_name, coverage, region_count in coverage_reader.iter_table_coverage_regions_by_region( region.chrom, region.start, region.end ):
+            if keep_empty or coverage:
+                #only output regions that have atleast 1 base covered unless empty are requested
+                out.write( "%s\t%s\t%s\t%s\n" % ( "\t".join( region.fields ), table_name, coverage, region_count ) )
+    out.close()
+
+def profile_summary( interval_filename, chrom_col, start_col, end_col, out_filename, keep_empty, coverage_reader, chrom_lengths ):
+    out = open( out_filename, 'wb' )
+    table_coverage_summary = TableCoverageSummary( coverage_reader, chrom_lengths )
+    for region in bx.intervals.io.NiceReaderWrapper( open( interval_filename, 'rb' ), chrom_col = chrom_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ):
+        table_coverage_summary.add_region( region.chrom, region.start, region.end )
+    
+    out.write( "#tableName\ttableChromosomeCoverage\ttableChromosomeCount\ttableRegionCoverage\ttableRegionCount\tallIntervalCount\tallIntervalSize\tallCoverage\tallTableRegionsOverlaped\tallIntervalsOverlapingTable\tnrIntervalCount\tnrIntervalSize\tnrCoverage\tnrTableRegionsOverlaped\tnrIntervalsOverlapingTable\n" )
+    for table_name, table_chromosome_size, table_chromosome_count, table_region_coverage, table_region_count, total_interval_count, total_interval_size, total_coverage, table_regions_overlaped_count, interval_region_overlap_count, nr_interval_count, nr_interval_size, nr_coverage, nr_table_regions_overlaped_count, nr_interval_table_overlap_count in table_coverage_summary.iter_table_coverage():
+        if keep_empty or total_coverage:
+            #only output tables that have atleast 1 base covered unless empty are requested
+            out.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( table_name, table_chromosome_size, table_chromosome_count, table_region_coverage, table_region_count, total_interval_count, total_interval_size, total_coverage, table_regions_overlaped_count, interval_region_overlap_count, nr_interval_count, nr_interval_size, nr_coverage, nr_table_regions_overlaped_count, nr_interval_table_overlap_count ) )
+    out.close()
+    
+    #report chrom size errors as needed:
+    if table_coverage_summary.region_size_errors:
+        print "Regions provided extended beyond known chromosome lengths, and have been truncated as necessary, for the following intervals:"
+        for chrom, regions in table_coverage_summary.region_size_errors.items():
+            if len( regions ) > 3:
+                extra_region_info = ", ... "
+            else:
+                extra_region_info = ""
+            print "%s has max length of %s, exceeded by %s%s." % ( chrom, chrom_lengths.get( chrom ), ", ".join( map( str, regions[:3] ) ), extra_region_info )
+
+class ChromosomeLengths:
+    def __init__( self, profiler_info ):
+        self.chroms = {}
+        self.default_bitset_size = int( profiler_info.get( 'bitset_size', bx.bitset.MAX ) )
+        chroms = profiler_info.get( 'chromosomes', None )
+        if chroms:
+            for chrom in chroms.split( ',' ):
+                for fields in chrom.rsplit( '=', 1 ):
+                    if len( fields ) == 2:
+                        self.chroms[ fields[0] ] = int( fields[1] )
+                    else:
+                        self.chroms[ fields[0] ] = self.default_bitset_size
+    def get( self, name ):
+        return self.chroms.get( name, self.default_bitset_size )
+
+def parse_profiler_info( filename ):
+    profiler_info = {}
+    try:
+        for line in open( filename ):
+            fields = line.rstrip( '\n\r' ).split( '\t', 1 )
+            if len( fields ) == 2:
+                if fields[0] in profiler_info:
+                    if not isinstance( profiler_info[ fields[0] ], list ):
+                        profiler_info[ fields[0] ] = [ profiler_info[ fields[0] ] ]
+                    profiler_info[ fields[0] ].append( fields[1] )
+                else:
+                    profiler_info[ fields[0] ] = fields[1]
+    except:
+        pass #likely missing file
+    return profiler_info
+
+def __main__():
+    parser = optparse.OptionParser()
+    parser.add_option(
+        '-k','--keep_empty',
+        action="store_true",
+        dest='keep_empty',
+        default=False,
+        help='Keep tables with 0 coverage'
+    )
+    parser.add_option(
+        '-b','--buffer',
+        dest='buffer',
+        type='int',default=10,
+        help='Number of Chromosomes to keep buffered'
+    )
+    parser.add_option(
+        '-c','--chrom_col',
+        dest='chrom_col',
+        type='int',default=1,
+        help='Chromosome column'
+    )
+    parser.add_option(
+        '-s','--start_col',
+        dest='start_col',
+        type='int',default=2,
+        help='Start Column'
+    )
+    parser.add_option(
+        '-e','--end_col',
+        dest='end_col',
+        type='int',default=3,
+        help='End Column'
+    )
+    parser.add_option(
+        '-p','--path',
+        dest='path',
+        type='str',default='/galaxy/data/annotation_profiler/hg18',
+        help='Path to profiled data for this organism'
+    )
+    parser.add_option(
+        '-t','--table_names',
+        dest='table_names',
+        type='str',default='None',
+        help='Table names requested'
+    )
+    parser.add_option(
+        '-i','--input',
+        dest='interval_filename',
+        type='str',
+        help='Input Interval File'
+    )
+    parser.add_option(
+        '-o','--output',
+        dest='out_filename',
+        type='str',
+        help='Input Interval File'
+    )
+    parser.add_option(
+        '-S','--summary',
+        action="store_true",
+        dest='summary',
+        default=False,
+        help='Display Summary Results'
+    )
+    
+    options, args = parser.parse_args()
+    
+    assert os.path.isdir( options.path ), IOError( "Configuration error: Table directory is missing (%s)" % options.path )
+    
+    #get profiler_info
+    profiler_info = parse_profiler_info( os.path.join( options.path, 'profiler_info.txt' ) )
+    
+    table_names = options.table_names.split( "," )
+    if table_names == ['None']: table_names = None
+    coverage_reader = CachedCoverageReader( options.path, buffer = options.buffer, table_names = table_names, profiler_info = profiler_info )
+    
+    if options.summary:
+        profile_summary( options.interval_filename, options.chrom_col - 1, options.start_col - 1, options.end_col -1, options.out_filename, options.keep_empty, coverage_reader, ChromosomeLengths( profiler_info ) )
+    else:
+        profile_per_interval( options.interval_filename, options.chrom_col - 1, options.start_col - 1, options.end_col -1, options.out_filename, options.keep_empty, coverage_reader )
+    
+    #print out data version info
+    print 'Data version (%s:%s:%s)' % ( profiler_info.get( 'dbkey', 'unknown' ), profiler_info.get( 'profiler_hash', 'unknown' ), profiler_info.get( 'dump_time', 'unknown' ) )
+
+if __name__ == "__main__": __main__()
Binary file tools/bedtools/._bedToBam.xml has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/bedtools/bedToBam.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,29 @@
+<tool id="bedToBam" name="bedToBam">
+  <description>convert BED or GFF or VCF to BAM</description>
+  <command>bedToBam -i $input -g $genome $bed12 $mapq $ubam > $outfile </command>
+  <inputs>
+    <param name="input" format="bed,gff,vcf" type="data" label="Input file (BED,GFF,VCF)" help="BED files must be at least BED4 to be amenable to BAM (needs name field)"/>
+    <param name="genome" type="select" label="Select genome">
+     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.mm9.genome" selected="true">mm9</option>
+     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.mm8.genome">mm8</option>
+     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.hg18.genome">hg18</option>
+     <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.hg19.genome">hg19</option>
+    </param>
+    <param name="mapq" size="10" type="integer" value="255" label="Set the mappinq quality for the BAM records"/>
+    <param name="bed12" label="The BED file is in BED12 format" help="The BAM CIGAR string will reflect BED blocks" type="boolean" truevalue="-bed12" falsevalue="" checked="False"/>
+    <param name="ubam" label="Write uncompressed BAM output" help="Default is to write compressed BAM" type="boolean" truevalue="-ubam" falsevalue="" checked="False"/>
+  </inputs>
+  <outputs>
+    <data format="bam" name="outfile" />
+  </outputs>
+  <help>
+
+**What it does**
+
+Program: bedToBam (v2.13.3)
+Author:  Aaron Quinlan (aaronquinlan@gmail.com)
+Summary: Converts feature records to BAM format.
+
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_destination/epigraph.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<tool name="Perform genome analysis" id="epigraph_export">
+    <description> and prediction with EpiGRAPH</description>
+    <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
+    <inputs>
+        <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
+            <validator type="unspecified_build" />
+        </param>
+        <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/DataImport.jsp" />
+        <param name="DATA_URL" type="baseurl" value="/datasets" />
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
+    </inputs>
+    <outputs/>
+    <help>
+
+.. class:: warningmark
+
+After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance.
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods.
+
+-----
+
+.. class:: infomark
+
+**EpiGRAPH outline**
+
+The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties.
+
+.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/
+
+    </help>
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_destination/epigraph_test.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<tool name="Perform genome analysis" id="epigraph_test_export">
+    <description> and prediction with EpiGRAPH Test</description>
+    <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
+    <inputs>
+        <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
+            <validator type="unspecified_build" />
+        </param>
+        <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" />
+        <param name="DATA_URL" type="baseurl" value="/datasets" />
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
+    </inputs>
+    <outputs/>
+    <help>
+
+.. class:: warningmark
+
+After clicking the **Execute** button, you will be redirected to the EpiGRAPH test website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance.
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods.
+
+-----
+
+.. class:: infomark
+
+**EpiGRAPH outline**
+
+The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties.
+
+.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/access_libraries.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tool name="Access Libraries" id="library_access1">
+    <description>stored locally</description>
+    <inputs action="/library/index" method="get" target="_parent">
+        <param name="default_action" type="hidden" value="import_to_histories" />
+    </inputs>
+    <uihints minwidth="800"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/bed_convert.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,14 @@
+<tool id="BED File Converter1" name="BED File Converter">
+  <description>creates a bed or xbed file containing from text query</description>
+  <command>noop</command>
+  <inputs>
+    <display>creates a bed or xbed file containing user assigned input of $input</display>
+    <param format="tabular" name="input" type="data" />
+    <param name="chrom" size="4" type="text" value="all" />
+  </inputs>
+  <outputs>
+    <data format="bed" name="out_file1" />
+  </outputs>
+  <help>User specifies delimiter, header information, and column assignments and the file will be converted to BED or xBED.
+</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/biomart.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+
+    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
+    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
+-->
+<tool name="BioMart" id="biomart" tool_type="data_source" version="1.0.1">
+	<description>Central server</description>
+	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+	<inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
+		<display>go to BioMart Central $GALAXY_URL</display>
+		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="_export" missing="1" />
+                <value name="GALAXY_URL" missing="0" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
+            <value_translation>
+                <value galaxy_value="tabular" remote_value="TSV" />
+            </value_translation>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="Biomart query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+    </request_param_translation>
+	<uihints minwidth="800"/>
+	<outputs>
+		<data name="output" format="tabular" />
+	</outputs>
+	<options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/biomart_test.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+
+    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
+    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
+-->
+<tool name="BioMart" id="biomart_test" tool_type="data_source" version="1.0.1">
+	<description>Test server</description>
+	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+	<inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
+		<display>go to BioMart Central $GALAXY_URL</display>
+		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
+	</inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="_export" missing="1" />
+                <value name="GALAXY_URL" missing="0" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
+            <value_translation>
+                <value galaxy_value="tabular" remote_value="TSV" />
+            </value_translation>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="Biomart test query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+    </request_param_translation>
+	<uihints minwidth="800"/>		
+	<outputs>
+		<data name="output" format="tabular" />
+	</outputs>
+	<options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/bx_browser.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="BX main" id="bx_browser" tool_type="data_source">
+    <description>browser</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://main.genome-browser.bx.psu.edu/cgi-bin/hgTables" check_values="false" method="get">
+        <display>go to BX Browser $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
+        <param name="tool_id" type="hidden" value="bx_browser" />
+        <param name="sendToGalaxy" type="hidden" value="1" />
+        <param name="hgta_compressType" type="hidden" value="none" />
+        <param name="hgta_outputType" type="hidden" value="bed" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
+        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
+        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
+        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" >
+            <value_translation>
+                <value galaxy_value="tabular" remote_value="primaryTable" />
+                <value galaxy_value="tabular" remote_value="selectedFields" />
+                <value galaxy_value="wig" remote_value="wigData" />
+                <value galaxy_value="interval" remote_value="tab" />
+                <value galaxy_value="html" remote_value="hyperlinks" />
+                <value galaxy_value="fasta" remote_value="sequence" />
+            </value_translation>
+        </request_param>
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="tabular" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/cbi_rice_mart.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="CBI Rice Mart" id="cbi_rice_mart" tool_type="data_source" version="1.0.1">
+    <description>rice mart</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://ricemart.cbi.edu.cn/biomart/martview/" check_values="false" method="get" target="_top">
+        <display>go to RMap rice mart $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="_export" missing="1" />
+                <value name="GALAXY_URL" missing="0" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
+            <value_translation>
+                <value galaxy_value="tabular" remote_value="TSV" />
+            </value_translation>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="Rice mart query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="tabular" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/data_source.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# Retrieves data from external data source applications and stores in a dataset file.
+# Data source application parameters are temporarily stored in the dataset file.
+import socket, urllib, sys, os
+from galaxy import eggs #eggs needs to be imported so that galaxy.util can find docutils egg...
+from galaxy.util.json import from_json_string, to_json_string
+import galaxy.model # need to import model before sniff to resolve a circular import dependency
+from galaxy.datatypes import sniff
+from galaxy.datatypes.registry import Registry
+from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_FILE
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+GALAXY_PARAM_PREFIX = 'GALAXY'
+GALAXY_ROOT_DIR = os.path.realpath( os.path.join( os.path.split( os.path.realpath( __file__ ) )[0], '..', '..' ) )
+GALAXY_DATATYPES_CONF_FILE = os.path.join( GALAXY_ROOT_DIR, 'datatypes_conf.xml' )
+
+def load_input_parameters( filename, erase_file = True ):
+    datasource_params = {}
+    try:
+        json_params = from_json_string( open( filename, 'r' ).read() )
+        datasource_params = json_params.get( 'param_dict' )
+    except:
+        json_params = None
+        for line in open( filename, 'r' ):
+            try:
+                line = line.strip()
+                fields = line.split( '\t' )
+                datasource_params[ fields[0] ] = fields[1]
+            except:
+                continue
+    if erase_file:
+        open( filename, 'w' ).close() #open file for writing, then close, removes params from file
+    return json_params, datasource_params
+
+def __main__():
+    filename = sys.argv[1]
+    try:
+        max_file_size = int( sys.argv[2] )
+    except:
+        max_file_size = 0
+    
+    job_params, params = load_input_parameters( filename )
+    if job_params is None: #using an older tabular file
+        enhanced_handling = False
+        job_params = dict( param_dict = params )
+        job_params[ 'output_data' ] =  [ dict( out_data_name = 'output',
+                                               ext = 'data',
+                                               file_name = filename,
+                                               extra_files_path = None ) ]
+        job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE )
+    else:
+        enhanced_handling = True
+        json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata
+    
+    datatypes_registry = Registry( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
+    
+    URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
+    URL_method = params.get( 'URL_method', None )
+    
+    # The Python support for fetching resources from the web is layered. urllib uses the httplib
+    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
+    # a socket should wait for a response before timing out. By default the socket module has no
+    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
+    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
+    # doing the following.
+    socket.setdefaulttimeout( 600 )
+    
+    for data_dict in job_params[ 'output_data' ]:
+        cur_filename =  data_dict.get( 'file_name', filename )
+        cur_URL =  params.get( '%s|%s|URL' % ( GALAXY_PARAM_PREFIX, data_dict[ 'out_data_name' ] ), URL )
+        if not cur_URL:
+            open( cur_filename, 'w' ).write( "" )
+            stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
+        
+        # The following calls to urllib.urlopen() will use the above default timeout
+        try:
+            if not URL_method or URL_method == 'get':
+                page = urllib.urlopen( cur_URL )
+            elif URL_method == 'post':
+                page = urllib.urlopen( cur_URL, urllib.urlencode( params ) )
+        except Exception, e:
+            stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
+        if max_file_size:
+            file_size = int( page.info().get( 'Content-Length', 0 ) )
+            if file_size > max_file_size:
+                stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
+        #do sniff stream for multi_byte
+        try:
+            cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( cur_filename, os.O_WRONLY | os.O_CREAT ), cur_filename )
+        except Exception, e:
+            stop_err( 'Unable to fetch %s:\n%s' % ( cur_URL, e ) )
+        
+        #here import checks that upload tool performs
+        if enhanced_handling:
+            try:
+                ext = sniff.handle_uploaded_dataset_file( filename, datatypes_registry, ext = data_dict[ 'ext' ], is_multi_byte = is_multi_byte )
+            except Exception, e:
+                stop_err( str( e ) )
+            info = dict( type = 'dataset',
+                         dataset_id = data_dict[ 'dataset_id' ],
+                         ext = ext)
+            
+            json_file.write( "%s\n" % to_json_string( info ) )
+    
+if __name__ == "__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/echo.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+
+"""
+Script that just echos the command line.
+"""
+
+import sys
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+print '-' * 20, "<br>"
+for elem in sys.argv:
+    print elem, "<br>"
+print '-' * 20, "<br>"
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/echo.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+
+<tool name="Echo" id="echo1">
+
+	<description>
+		echoes parameters  
+	</description>
+	
+	<command interpreter="python">echo.py $input $database $output </command>
+
+	<inputs>
+		<param format="tabular" name="input" type="data" label="Input stuff"/>
+        <param type="select" name="database" label="Database">
+            <option value="alignseq.loc">Human (hg18)</option>
+            <option value="faseq.loc">Fly (dm3)</option>
+        </param>
+	</inputs>
+
+	<outputs>
+		<data format="input" name="output" label="Blat on ${database.value_label}" />
+	</outputs>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/encode_db.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+
+<tool name="EncodeDB" id="encode_db1">
+
+	<description>
+		at NHGRI 
+	</description>
+
+	<command interpreter="python">
+		fetch.py "$url" $output
+	</command>
+
+	<inputs action="http://research.nhgri.nih.gov/projects/ENCODEdb/cgi-bin/power_query.cgi" target="_top"> 
+<!--	<inputs action="http://localhost:9000/prepared"> -->
+		<display>go to EncodeDB $GALAXY_URL</display>
+		<param name="GALAXY_URL" type="baseurl" value="/async/encode_db1" />
+	</inputs>
+	
+  <uihints minwidth="800"/>
+  
+  <outputs>
+    <data format="bed" name="output" />
+  </outputs>
+	
+	<options sanitize="False" refresh="True"/>
+
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/epigraph_import.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source">
+    <description> server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get">
+        <display>go to EpiGRAPH server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" />
+        <request_param galaxy_name="info" remote_name="INFO" missing="" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" />
+    </request_param_translation>
+    <uihints minwidth="800"/>  
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/epigraph_import_test.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source">
+    <description> test server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get">
+        <display>go to EpiGRAPH server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" />
+        <request_param galaxy_name="info" remote_name="INFO" missing="" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" />
+    </request_param_translation>
+    <uihints minwidth="800"/>  
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/eupathdb.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,13 @@
+<tool name="EuPathDB" id="eupathdb" tool_type="data_source" url_method="post">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://eupathdb.org/eupathdb/queries_tools.jsp" check_values="false" method="get"> 
+        <display>go to EuPathDB server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=eupathdb" />
+    </inputs>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="tabular" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/fetch.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+"""
+Script that just echos the command line.
+"""
+
+import sys, os, urllib
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+BUFFER = 1048576
+
+url      = sys.argv[1]
+out_name = sys.argv[2]
+
+out = open(out_name, 'wt')
+try:
+    page = urllib.urlopen(url)
+    while 1:
+        data = page.read(BUFFER)
+        if not data:
+            break
+        out.write(data)
+except Exception, e:
+    print 'Error getting the data -> %s' % e
+out.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/fly_modencode.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<tool name="modENCODE fly" id="modENCODEfly" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://modencode.oicr.on.ca/fgb2/gbrowse/fly" check_values="false" target="_top"> 
+        <display>go to modENCODE fly server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=modENCODEfly" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="dm2" >
+            <value_translation>
+                <value galaxy_value="dm2" remote_value="fly" />
+            </value_translation>
+        </request_param>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="d" missing="" />
+                <value name="dbkey" missing="dm2" />
+                <value name="q" missing="" />
+                <value name="s" missing="" />
+                <value name="t" missing="" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/flymine.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="Flymine" id="flymine" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://www.flymine.org" check_values="false" method="get"> 
+        <display>go to Flymine server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" >
+            <value_translation>
+                <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect -->
+            </value_translation>
+        </request_param>
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/flymine_test.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="Flymine test" id="flymine_test" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get"> 
+        <display>go to Flymine server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/genbank.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+from Bio import GenBank
+import sys, os, textwrap
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def make_fasta(rec):
+    '''Creates fasta format from a record'''
+    gi   = rec.annotations.get('gi','')
+    org  = rec.annotations.get('organism','')
+    date = rec.annotations.get('date','')
+    head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date)
+    body = '\n'.join(textwrap.wrap(rec.seq.data, width=80))
+    return head, body
+    
+if __name__ == '__main__':
+    
+    mode  = sys.argv[1]
+    text  = sys.argv[2]
+    output_file = sys.argv[3]
+
+    print 'Searching for %s <br>' % text
+    
+    # check if inputs are all numbers
+    try:
+        gi_list = text.split()
+        tmp = map(int, gi_list)
+    except ValueError:
+        gi_list = GenBank.search_for(text, max_ids=10)
+    
+    fp = open(output_file, 'wt')
+    record_parser = GenBank.FeatureParser()
+    ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser = record_parser)
+    for gid in gi_list:
+        res = ncbi_dict[gid]
+        head, body =  make_fasta(res)
+        fp.write(head+body+'\n')
+        print head
+    fp.close()
+
+   
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/genbank.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,25 @@
+<tool id="genbank" name="Connect to Genbank">
+<!--  <description>queries genbank</description> -->
+  <command interpreter="python">genbank.py $mode "$text" $output</command>
+  <inputs>
+    <param name="mode" type="select">
+      <option value="nucleotide">nucleotide database</option>
+      <option value="protein">proteins database</option>
+      <label>Get sequences from the</label>
+    </param>
+    <param name="text" size="40" type="text" value="6273291">
+      <label>with accession ID</label>
+    </param>   
+  </inputs>
+  <outputs>
+    <data format="fasta" name="output" />
+  </outputs>
+  <help>
+At the moment this tool allows the following simple searches:
+
+- by GI: **51594135**
+- by accession: **CF622840**
+- using text: **human hbb1** (this feature is experimental)
+  </help>
+
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/gramene_mart.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+
+    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
+    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
+-->
+<tool name="GrameneMart" id="gramenemart" tool_type="data_source" version="1.0.1">
+    <description> Central server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://www.gramene.org/biomart/martview" check_values="false" method="get" target="_top">
+        <display>go to GrameneMart Central $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="_export" missing="1" />
+                <value name="GALAXY_URL" missing="0" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular">
+            <value_translation>
+                <value galaxy_value="tabular" remote_value="TSV" />
+            </value_translation> 
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="Biomart query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="tabular" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/hapmapmart.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<!--
+    hacked from biomart.xml - testing hapmap biomart - problem is going to be converting these to lped/pbed
+    the data returned will be in all sorts of different shapes - and the sample ids need to be obtained separately
+    to create reliable pedigrees. eesh...
+
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+
+    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
+    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
+-->
+<tool name="HapMapMart" id="hapmapmart" tool_type="data_source" version="0.0.01">
+	<description>HapMap Biomart</description>
+	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+	<inputs action="http://hapmap.ncbi.nlm.nih.gov/biomart/martview" check_values="false" method="get" target="_top">
+		<display>go to HapMap BioMart $GALAXY_URL</display>
+		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/hapmapmart" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="_export" missing="1" />
+                <value name="GALAXY_URL" missing="0" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" >
+            <value_translation>
+                <value galaxy_value="tabular" remote_value="TSV" />
+            </value_translation>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="hg18" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="human" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="HapMap query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+    </request_param_translation>
+	<uihints minwidth="800"/>
+	<outputs>
+		<data name="output" format="tabular" />
+	</outputs>
+	<options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/hbvar.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<tool name="HbVar" id="hbvar">
+	
+	<description>Human Hemoglobin Variants and Thalassemias</description>
+	
+	<command/>
+	
+	<inputs action="http://globin.bx.psu.edu/cgi-bin/hbvar/query_vars3" check_values="false" method="get" target="_top">
+		<display>go to HbVar database $GALAXY_URL $tool_id</display>
+		<param name="GALAXY_URL" type="baseurl" value="/tool_runner/hbvar" />
+		<param name="tool_id" type="hidden" value = "hbvar"/>
+	</inputs>
+	
+	<uihints minwidth="800"/>
+	
+	<code file="hbvar_filter.py"/>
+	
+	<outputs>
+		<data name="output" format="txt" />
+	</outputs>
+	
+	<options sanitize="False" refresh="True"/>
+	
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/hbvar_filter.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,77 @@
+#TODO: Set dbkey to proper UCSC build, if known
+import urllib
+
+from galaxy import datatypes, config
+import tempfile, shutil
+
+def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
+    """Sets the name of the data"""
+    data_name = param_dict.get( 'name', 'HbVar query' )
+    data_type = param_dict.get( 'type', 'txt' )
+    if data_type == 'txt': data_type='interval' #All data is TSV, assume interval
+    name, data = out_data.items()[0]
+    data = app.datatypes_registry.change_datatype(data, data_type)
+    data.name = data_name
+    out_data[name] = data
+
+def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
+    """Verifies the data after the run"""
+
+    URL = param_dict.get( 'URL', None )
+    URL = URL + '&_export=1&GALAXY_URL=0'
+    if not URL:
+        raise Exception('Datasource has not sent back a URL parameter')
+
+    CHUNK_SIZE = 2**20 # 1Mb 
+    MAX_SIZE   = CHUNK_SIZE * 100
+    
+    try:
+        page = urllib.urlopen(URL)
+    except Exception, exc:
+        raise Exception('Problems connecting to %s (%s)' % (URL, exc) )
+
+    name, data = out_data.items()[0]
+    
+    fp = open(data.file_name, 'wb')
+    size = 0
+    while 1:
+        chunk = page.read(CHUNK_SIZE)
+        if not chunk:
+            break
+        if size > MAX_SIZE:
+            raise Exception('----- maximum datasize exceeded ---')
+        size += len(chunk)
+        fp.write(chunk)
+
+    fp.close()
+    #Set meta data, format file to be valid interval type
+    if isinstance(data.datatype, datatypes.interval.Interval):
+        data.set_meta(first_line_is_header=True)
+        #check for missing meta data, if all there, comment first line and process file
+        if not data.missing_meta():
+            line_ctr = -1
+            temp = tempfile.NamedTemporaryFile('w')
+            temp_filename = temp.name
+            temp.close()
+            temp = open(temp_filename,'w')
+            chromCol = int(data.metadata.chromCol) - 1
+            startCol = int(data.metadata.startCol) - 1
+            strandCol = int(data.metadata.strandCol) - 1
+            
+            
+            for line in open(data.file_name, 'r'):
+                line_ctr += 1
+                
+                fields = line.strip().split('\t')
+                
+                temp.write("%s\n" % '\t'.join(fields))
+            
+            temp.close()
+            shutil.move(temp_filename,data.file_name)
+            
+        else:
+            data = app.datatypes_registry.change_datatype(data, 'tabular')
+    data.set_size()
+    data.set_peek()
+    app.model.context.add( data )
+    app.model.context.flush()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/import.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+"""
+Script that imports locally stored data as a new dataset for the user
+Usage: import id outputfile
+"""
+import sys, os
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+BUFFER = 1048576
+
+dataid   = sys.argv[1]
+out_name = sys.argv[2]
+
+
+id2name = {
+    'eryth'         : 'ErythPreCRMmm3_cusTrk.txt',
+    'cishg16'       : 'ReglRegHBBhg16CusTrk.txt',
+    'cishg17'       : 'ReglRegHBBhg17CusTrk.txt',
+    'exons'         : 'ExonsKnownGenes_mm3.txt',
+    'krhg16'        : 'known_regulatory_hg16.bed',
+    'krhg17'        : 'known_regulatory_hg17.bed',
+    'tARhg16mmc'    : 'hg16.mouse.t_AR.cold.bed',
+    'tARhg16mmm'    : 'hg16.mouse.t_AR.medium.bed',
+    'tARhg16mmh'    : 'hg16.mouse.t_AR.hot.bed',
+    'tARhg16rnc'    : 'hg16.rat.t_AR.cold.bed',
+    'tARhg16rnm'    : 'hg16.rat.t_AR.medium.bed',
+    'tARhg16rnh'    : 'hg16.rat.t_AR.hot.bed',
+    'phastConsHg16' : 'phastConsMost_hg16.bed',
+    'omimhg16'      : 'omimDisorders_hg16.tab',
+    'omimhg17'      : 'omimDisorders_hg17.tab',
+
+}
+
+fname = id2name.get(dataid, '')
+if not fname:
+    print 'Importing invalid data %s' % dataid
+    sys.exit()
+else:
+    print 'Imported %s' % fname
+
+# this path is hardcoded
+inp_name = os.path.join('database', 'import', fname)
+
+try:
+    inp = open(inp_name, 'rt')
+except:
+    print 'Could not find file %s' % inp_name
+    sys.exit()
+
+out = open(out_name, 'wt')
+
+while 1:
+    data = inp.read(BUFFER)
+    if not data:
+        break
+    out.write(data)
+
+inp.close()
+out.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/import.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,27 @@
+<tool id="Featured datasets4" name="Featured datasets">
+  <description>(PSU prepared queries)</description>
+  <command interpreter="python">import.py $data $output</command>
+  <inputs>
+	<display>$data</display>		
+	<param name="data" type="select" display="radio">
+      <option value="eryth">Erythroid predicted cis-regulatory modules</option>
+      <option value="exons">Exons of protein-coding genes in the mouse genome, assembly mm3</option>
+      <option value="cishg16 ">Known cis-regulatory modules in the human HBB gene complex (hg16)</option>
+      <option value="cishg17">Known cis-regulatory modules in the human HBB gene complex (hg17)</option>
+      <option value="krhg16">Known regulatory regions (hg16)</option>
+      <option value="krhg17">Known regulatory regions (hg17)</option>
+      <option value="tARhg16mmc">Human (hg16) evolutionary cold region (vs mouse)</option>
+      <option value="tARhg16mmm">Human (hg16) evolutionary medium region (vs mouse)</option>
+      <option value="tARhg16mmh">Human (hg16) evolutionary hot region (vs mouse)</option>
+      <option value="tARhg16rnc">Human (hg16) evolutionary cold region (vs rat)</option>
+      <option value="tARhg16rnm">Human (hg16) evolutionary medium region (vs rat)</option>
+      <option value="tARhg16rnh">Human (hg16) evolutionary hot region (vs rat)</option>
+      <option value="phastConsHg16">phastCons hg16 (stringent, top ~5%) from UCSC</option>
+      <option value="omimhg16">OMIM disorders (hg16)</option>
+      <option value="omimhg17">OMIM disorders (hg17)</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="bed" name="output" />
+  </outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/metabolicmine.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tool name="metabolicMine" id="metabolicmine" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://www.metabolicmine.org/beta/begin.do" check_values="false" method="get"> 
+        <display>go to metabolicMine server $GALAXY_URL</display>
+    </inputs>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/microbial_import.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+"""
+Script that imports locally stored data as a new dataset for the user
+Usage: import id outputfile
+"""
+import sys, os
+from shutil import copyfile
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+BUFFER = 1048576
+
+uids = sys.argv[1].split(",")
+out_file1 = sys.argv[2]
+
+#remove NONE from uids
+have_none = True
+while have_none:
+    try:
+        uids.remove('None')
+    except:
+        have_none = False
+
+
+#create dictionary keyed by uid of tuples of (displayName,filePath,build) for all files
+available_files = {}
+try:
+    filename = sys.argv[-1]
+    for i, line in enumerate( file( filename ) ):
+        if not line or line[0:1] == "#" : continue
+        fields = line.split('\t')
+        try:
+            info_type = fields.pop(0)
+            
+            if info_type.upper()=="DATA":
+                uid = fields.pop(0)
+                org_num = fields.pop(0)
+                chr_acc = fields.pop(0)
+                feature = fields.pop(0)
+                filetype = fields.pop(0)
+                path = fields.pop(0).replace("\r","").replace("\n","")
+                
+                file_type = filetype
+                build = org_num
+                description = uid
+            else:
+                continue
+        except:
+            continue
+
+        available_files[uid]=(description,path,build,file_type,chr_acc)
+except:
+    print >>sys.stderr, "It appears that the configuration file for this tool is missing."
+
+#create list of tuples of (displayName,FileName,build) for desired files
+desired_files = []
+for uid in uids:
+    try:
+        desired_files.append(available_files[uid])
+    except:
+        continue
+
+#copy first file to contents of given output file
+file1_copied = False
+while not file1_copied:
+    try:
+        first_file = desired_files.pop(0)
+    except:
+        print >>sys.stderr, "There were no valid files requested."
+        sys.exit()
+    file1_desc, file1_path, file1_build, file1_type,file1_chr_acc = first_file
+    try:
+        copyfile(file1_path,out_file1)
+        print "#File1\t"+file1_desc+"\t"+file1_chr_acc+"\t"+file1_build+"\t"+file1_type
+        file1_copied = True
+    except:
+        print >>sys.stderr, "The file specified is missing."
+        continue
+        #print >>sys.stderr, "The file specified is missing."
+    
+
+#Tell post-process filter where remaining files reside
+for extra_output in desired_files:
+    file_desc, file_path, file_build, file_type,file_chr_acc = extra_output
+    print "#NewFile\t"+file_desc+"\t"+file_chr_acc+"\t"+file_build+"\t"+file_path+"\t"+file_type
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/microbial_import.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,115 @@
+<tool id="microbial_import1" name="Get Microbial Data">
+  <command interpreter="python">microbial_import.py $CDS,$tRNA,$rRNA,$sequence,$GeneMark,$GeneMarkHMM,$Glimmer3 $output ${GALAXY_DATA_INDEX_DIR}/microbial_data.loc</command>
+  <inputs>
+      <param name="kingdom" type="select" label="Select the Desired Kingdom">
+        <options from_file="microbial_data.loc" startswith="ORG">
+          <column name="name" index="3"/>
+          <column name="value" index="3"/>
+          <filter type="unique_value" name="unique" column="3"/>
+        </options>
+      </param>
+      <param name="org" type="select" label="Select the Desired Organism">
+        <options from_file="microbial_data.loc" startswith="ORG">
+          <column name="name" index="2"/>
+          <column name="value" index="1"/>
+          <filter type="param_value" ref="kingdom" name="kingdom" column="3"/>
+          <filter type="sort_by" column="2"/>
+        </options>
+      </param>
+      <param name="CDS" type="select" label="Select Desired Coding Sequences" display="checkboxes" multiple="True">
+        <options from_file="microbial_data.loc" startswith="DATA">
+          <column name="name" index="3"/>
+          <column name="value" index="1"/>
+          <column name="feature" index="4"/>
+          <filter type="param_value" ref="org" name="kingdom" column="2"/>
+          <filter type="static_value" name="feature" value="CDS" column="4"/>
+        </options>
+      </param>
+      <param name="tRNA" type="select" label="Select Desired tRNA" display="checkboxes" multiple="True">
+        <options from_file="microbial_data.loc" startswith="DATA">
+          <column name="name" index="3"/>
+          <column name="value" index="1"/>
+          <column name="feature" index="4"/>
+          <filter type="param_value" ref="org" name="kingdom" column="2"/>
+          <filter type="static_value" name="feature" value="tRNA" column="4"/>
+        </options>
+      </param>
+      <param name="rRNA" type="select" label="Select Desired rRNA" display="checkboxes" multiple="True">
+        <options from_file="microbial_data.loc" startswith="DATA">
+          <column name="name" index="3"/>
+          <column name="value" index="1"/>
+          <column name="feature" index="4"/>
+          <filter type="param_value" ref="org" name="kingdom" column="2"/>
+          <filter type="static_value" name="feature" value="rRNA" column="4"/>
+        </options>
+      </param>
+      <param name="sequence" type="select" label="Select Desired DNA Sequences" display="checkboxes" multiple="True">
+        <options from_file="microbial_data.loc" startswith="DATA">
+          <column name="name" index="3"/>
+          <column name="value" index="1"/>
+          <column name="feature" index="4"/>
+          <filter type="param_value" ref="org" name="kingdom" column="2"/>
+          <filter type="static_value" name="feature" value="sequence" column="4"/>
+        </options>
+      </param>
+      <param name="GeneMark" type="select" label="Select Desired GeneMark Annotations" display="checkboxes" multiple="True">
+        <options from_file="microbial_data.loc" startswith="DATA">
+          <column name="name" index="3"/>
+          <column name="value" index="1"/>
+          <column name="feature" index="4"/>
+          <filter type="param_value" ref="org" name="kingdom" column="2"/>
+          <filter type="static_value" name="feature" value="GeneMark" column="4"/>
+        </options>
+      </param>
+      <param name="GeneMarkHMM" type="select" label="Select Desired GeneMarkHMM Annotations" display="checkboxes" multiple="True">
+        <options from_file="microbial_data.loc" startswith="DATA">
+          <column name="name" index="3"/>
+          <column name="value" index="1"/>
+          <column name="feature" index="4"/>
+          <filter type="param_value" ref="org" name="kingdom" column="2"/>
+          <filter type="static_value" name="feature" value="GeneMarkHMM" column="4"/>
+        </options>
+      </param>
+      <param name="Glimmer3" type="select" label="Select Desired Glimmer3 Annotations" display="checkboxes" multiple="True">
+        <options from_file="microbial_data.loc" startswith="DATA">
+          <column name="name" index="3"/>
+          <column name="value" index="1"/>
+          <column name="feature" index="4"/>
+          <filter type="param_value" ref="org" name="kingdom" column="2"/>
+          <filter type="static_value" name="feature" value="Glimmer3" column="4"/>
+        </options>
+      </param>
+  </inputs>
+  <outputs>
+    <data format="bed" name="output"/>
+  </outputs>
+  <code file="microbial_import_code.py"/>
+  <help>
+
+This tool will allow you to obtain various genomic datasets for any completed Microbial Genome Project as listed at NCBI_.
+
+.. _NCBI: http://www.ncbi.nlm.nih.gov/genomes/lproks.cgi?view=1
+
+Current datasets available include
+  1. CDS
+  2. tRNA
+  3. rRNA
+  4. FASTA Sequences
+  5. GeneMark Annotations
+  6. GeneMarkHMM Annotations
+  7. Glimmer3 Annotations
+
+-----
+
+Organisms in **bold** are available at the UCSC Browser.
+
+-----
+
+.. class:: infomark
+
+**Note:** Having trouble locating your organism?  Click here_ for a list of available species and their location.
+
+.. _here: http://wiki.g2.bx.psu.edu/Main/Data%20Libraries/Microbes
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/microbial_import_code.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,154 @@
+
+def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
+    # FIXME: this function is duplicated in the DynamicOptions class.  It is used here only to
+    # set data.name in exec_after_process(). 
+    microbe_info= {}
+    orgs = {}
+    
+    filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR
+    for i, line in enumerate( open( filename ) ):
+        line = line.rstrip( '\r\n' )
+        if line and not line.startswith( '#' ):
+            fields = line.split( sep )
+            #read each line, if not enough fields, go to next line
+            try:
+                info_type = fields.pop(0)
+                if info_type.upper() == "ORG":
+                    #ORG     12521   Clostridium perfringens SM101   bacteria        Firmicutes      CP000312,CP000313,CP000314,CP000315     http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521
+                    org_num = fields.pop(0)
+                    name = fields.pop(0)
+                    kingdom = fields.pop(0)
+                    group = fields.pop(0)
+                    chromosomes = fields.pop(0)
+                    info_url = fields.pop(0)
+                    link_site = fields.pop(0)
+                    if org_num not in orgs:
+                        orgs[ org_num ] = {}
+                        orgs[ org_num ][ 'chrs' ] = {}
+                    orgs[ org_num ][ 'name' ] = name
+                    orgs[ org_num ][ 'kingdom' ] = kingdom
+                    orgs[ org_num ][ 'group' ] = group
+                    orgs[ org_num ][ 'chromosomes' ] = chromosomes
+                    orgs[ org_num ][ 'info_url' ] = info_url
+                    orgs[ org_num ][ 'link_site' ] = link_site
+                elif info_type.upper() == "CHR":
+                    #CHR     12521   CP000315        Clostridium perfringens phage phiSM101, complete genome 38092   110684521       CP000315.1
+                    org_num = fields.pop(0)
+                    chr_acc = fields.pop(0)
+                    name = fields.pop(0)
+                    length = fields.pop(0)
+                    gi = fields.pop(0)
+                    gb = fields.pop(0)
+                    info_url = fields.pop(0)
+                    chr = {}
+                    chr[ 'name' ] = name
+                    chr[ 'length' ] = length
+                    chr[ 'gi' ] = gi
+                    chr[ 'gb' ] = gb
+                    chr[ 'info_url' ] = info_url
+                    if org_num not in orgs:
+                        orgs[ org_num ] = {}
+                        orgs[ org_num ][ 'chrs' ] = {}
+                    orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr
+                elif info_type.upper() == "DATA":
+                    #DATA    12521_12521_CDS 12521   CP000315        CDS     bed     /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed
+                    uid = fields.pop(0)
+                    org_num = fields.pop(0)
+                    chr_acc = fields.pop(0)
+                    feature = fields.pop(0)
+                    filetype = fields.pop(0)
+                    path = fields.pop(0)
+                    data = {}
+                    data[ 'filetype' ] = filetype
+                    data[ 'path' ] = path
+                    data[ 'feature' ] = feature
+
+                    if org_num not in orgs:
+                        orgs[ org_num ] = {}
+                        orgs[ org_num ][ 'chrs' ] = {}
+                    if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]:
+                        orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {}
+                    orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data
+                else: continue
+            except: continue
+    for org_num in orgs:
+        org = orgs[ org_num ]
+        if org[ 'kingdom' ] not in microbe_info:
+            microbe_info[ org[ 'kingdom' ] ] = {}
+        if org_num not in microbe_info[ org[ 'kingdom' ] ]:
+            microbe_info[ org[ 'kingdom' ] ][org_num] = org
+    return microbe_info
+
+#post processing, set build for data and add additional data to history
+from galaxy import datatypes, config, jobs, tools
+from shutil import copyfile
+
+def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
+    base_dataset = out_data.items()[0][1]
+    history = base_dataset.history
+    if history == None:
+        print "unknown history!"
+        return
+    kingdom = param_dict.get( 'kingdom', None )
+    #group = param_dict.get( 'group', None )
+    org = param_dict.get( 'org', None )
+    
+    #if not (kingdom or group or org):
+    if not (kingdom or org):
+        print "Parameters are not available."
+    #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values
+    if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ):
+        kingdom = kingdom.value
+    if isinstance( org, tools.parameters.basic.UnvalidatedValue ):
+        org = org.value
+    
+    GALAXY_DATA_INDEX_DIR = app.config.tool_data_path
+    microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' )
+    new_stdout = ""
+    split_stdout = stdout.split("\n")
+    basic_name = ""
+    for line in split_stdout:
+        fields = line.split("\t")
+        if fields[0] == "#File1":
+            description = fields[1]
+            chr = fields[2]
+            dbkey = fields[3]
+            file_type = fields[4]
+            name, data = out_data.items()[0]
+            data.set_size()
+            basic_name = data.name
+            data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")"
+            data.dbkey = dbkey
+            data.info = data.name
+            data = app.datatypes_registry.change_datatype( data, file_type )
+            data.init_meta()
+            data.set_peek()
+            app.model.context.add( data )
+            app.model.context.flush()
+        elif fields[0] == "#NewFile":
+            description = fields[1]
+            chr = fields[2]
+            dbkey = fields[3]
+            filepath = fields[4]
+            file_type = fields[5]
+            newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library
+            newdata.set_size()
+            newdata.extension = file_type
+            newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")"
+            app.model.context.add( newdata )
+            app.model.context.flush()
+            app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset )
+            history.add_dataset( newdata )
+            app.model.context.add( history )
+            app.model.context.flush()
+            try:
+                copyfile(filepath,newdata.file_name)
+                newdata.info = newdata.name
+                newdata.state = jobs.JOB_OK
+            except:
+                newdata.info = "The requested file is missing from the system."
+                newdata.state = jobs.JOB_ERROR
+            newdata.dbkey = dbkey
+            newdata.init_meta()
+            newdata.set_peek()
+            app.model.context.flush()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/modmine.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="modENCODE modMine" id="modmine" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://intermine.modencode.org/" check_values="false" method="get"> 
+        <display>go to modENCODE modMine server $GALAXY_URL</display>
+    </inputs>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ratmine.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="Ratmine" id="ratmine" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get"> 
+        <display>go to Ratmine server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="Ratmine query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" >
+            <value_translation>
+                <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect -->
+            </value_translation>
+        </request_param>
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_archaea.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<tool name="UCSC Archaea" id="ucsc_proxy">
+	
+	<description>table browser</description>
+	
+	<command interpreter="python">
+		ucsc_proxy.py $param_file $output
+	</command>
+	
+	<inputs action="/ucsc_proxy/index" check_values="false">
+		<display>go to UCSC $init $hgta_outputType</display>
+		<param type="hidden" name="init" value="3"/>
+		<param type="hidden" name="hgta_outputType" value="bed"/>
+	</inputs>
+	
+	<code file="ucsc_filter.py"/>
+	
+	<outputs>
+		<data name="output" format="bed" />
+	</outputs>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_filter.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,68 @@
+# runs after the job (and after the default post-filter)
+from galaxy import datatypes, jobs
+
+def validate(incoming):
+    """Validator"""
+    #raise Exception, 'not quite right'
+    pass
+
+def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
+    """Sets the name of the data"""
+    outputType = param_dict.get( 'hgta_outputType', None )
+    if isinstance(outputType, list) and len(outputType)>0: outputType = outputType[-1]
+    items = out_data.items()
+    
+    for name, data in items:
+        data.name  = param_dict.get('display', data.name)
+        data.dbkey = param_dict.get('dbkey', '???')
+
+        if outputType == 'wigData':
+            ext = "wig"
+        elif outputType == 'maf':
+            ext = "maf"
+        elif outputType == 'gff':
+            ext = "gff"
+        elif outputType == 'gff3':
+            ext = "gff3"
+        else:
+            if 'hgta_doPrintSelectedFields' in param_dict:
+                ext = "interval"
+            elif 'hgta_doGetBed' in param_dict:
+                ext = "bed"
+            elif 'hgta_doGenomicDna' in param_dict:
+                ext = "fasta"
+            elif 'hgta_doGenePredSequence' in param_dict:
+                ext = "fasta"
+            else:
+                ext = "interval"
+        
+        data = app.datatypes_registry.change_datatype(data, ext)
+        out_data[name] = data
+        
+def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
+    """Verifies the data after the run"""
+    items = out_data.items()
+    for name, data in items:
+        data.set_size()
+        try:            
+            err_msg, err_flag = 'Errors:', False
+            line_count = 0
+            num_lines = len(file(data.file_name).readlines())
+            for line in file(data.file_name):
+                line_count += 1
+                if line and line[0] == '-':
+                    if line_count + 3 == num_lines and not err_flag:
+                        err_flag = True
+                        err_msg = "Warning: It appears that your results have been truncated by UCSC. View the bottom of your result file for details."
+                        break
+                    err_flag = True
+                    err_msg = err_msg +" (line "+str(line_count)+")"+line
+            data.set_peek()
+            if isinstance(data.datatype, datatypes.interval.Interval) and data.missing_meta():
+                data = app.datatypes_registry.change_datatype(data, 'tabular')
+                out_data[name] = data
+            if err_flag:
+                raise Exception(err_msg)
+        except Exception, exc:
+            data.info  = data.info + "\n" + str(exc)
+            data.blurb = "error"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_proxy.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+import urllib
+import sys, os
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+CHUNK   = 2**20 # 1Mb 
+MAXSIZE = CHUNK * 100
+if __name__ == '__main__':
+
+    if len(sys.argv) != 3:
+        print 'Usage ucsc.py input_params output_file'
+        sys.exit()
+
+    inp_file = sys.argv[1]
+    out_file = sys.argv[2]
+
+    DEFAULT_URL = "http://genome.ucsc.edu/hgTables?"
+    
+    # this must stay a list to allow multiple selections for the same widget name (checkboxes)
+    params  = []
+    for line in file(inp_file):
+        line = line.strip()
+        if line:
+            parts = line.split('=')
+            if len(parts) == 0:
+                key = ""
+                value = ""
+            elif len(parts) == 1:
+                key = parts[0]
+                value = ""
+            else:
+                key = parts[0]
+                value = parts[1]
+            if key == 'display':
+                print value
+            # get url from params, refered from proxy.py, initialized by the tool xml
+            elif key == 'proxy_url':
+                DEFAULT_URL = value
+            else:
+                params.append( (key, value) )
+    
+    #print params
+    
+    encoded_params = urllib.urlencode(params)
+    url = DEFAULT_URL + encoded_params
+
+    #print url
+
+    page = urllib.urlopen(url)
+
+    fp = open(out_file, 'wt')
+    size = 0
+    while 1:
+        data = page.read(CHUNK)
+        if not data:
+            break
+        if size > MAXSIZE:
+            fp.write('----- maximum datasize exceeded ---\n')
+            break
+        size += len(data)
+        fp.write(data)
+
+    fp.close()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_proxy.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<tool name="UCSC Main" id="ucsc_proxy">
+	
+	<description>table browser proxy</description>
+	
+	<command interpreter="python">
+		ucsc_proxy.py $param_file $output
+	</command>
+	
+	<inputs action="/ucsc_proxy/index" check_values="false">
+		<display>go to UCSC $init $hgta_outputType</display>
+		<param type="hidden" name="init" value="1"/>
+		<param type="hidden" name="hgta_outputType" value="bed"/>
+	</inputs>
+	
+	<code file="ucsc_filter.py"/>
+	
+	<outputs>
+		<data name="output" format="bed" />
+	</outputs>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_tablebrowser.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source">
+    <description>table browser</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
+        <display>go to UCSC Table Browser $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
+        <param name="tool_id" type="hidden" value="ucsc_table_direct1" />
+        <param name="sendToGalaxy" type="hidden" value="1" />
+        <param name="hgta_compressType" type="hidden" value="none" />
+        <param name="hgta_outputType" type="hidden" value="bed" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
+        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
+        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
+        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" >
+            <value_translation>
+                <value galaxy_value="auto" remote_value="primaryTable" />
+                <value galaxy_value="auto" remote_value="selectedFields" />
+                <value galaxy_value="wig" remote_value="wigData" />
+                <value galaxy_value="interval" remote_value="tab" />
+                <value galaxy_value="html" remote_value="hyperlinks" />
+                <value galaxy_value="fasta" remote_value="sequence" />
+                <value galaxy_value="gtf" remote_value="gff" />
+            </value_translation>
+        </request_param>
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/>
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_tablebrowser_archaea.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source">
+    <description>table browser</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
+        <display>go to UCSC Table Browser $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
+        <param name="tool_id" type="hidden" value="ucsc_table_direct_archaea1" />
+        <param name="sendToGalaxy" type="hidden" value="1" />
+        <param name="hgta_compressType" type="hidden" value="none" />
+        <param name="hgta_outputType" type="hidden" value="bed" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
+        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
+        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
+        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" >
+            <value_translation>
+                <value galaxy_value="auto" remote_value="primaryTable" />
+                <value galaxy_value="auto" remote_value="selectedFields" />
+                <value galaxy_value="wig" remote_value="wigData" />
+                <value galaxy_value="interval" remote_value="tab" />
+                <value galaxy_value="html" remote_value="hyperlinks" />
+                <value galaxy_value="fasta" remote_value="sequence" />
+                <value galaxy_value="gtf" remote_value="gff" />
+            </value_translation>
+        </request_param>
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/>
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_tablebrowser_test.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source">
+    <description>table browser</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
+        <display>go to UCSC Table Browser $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
+        <param name="tool_id" type="hidden" value="ucsc_table_direct_test1" />
+        <param name="sendToGalaxy" type="hidden" value="1" />
+        <param name="hgta_compressType" type="hidden" value="none" />
+        <param name="hgta_outputType" type="hidden" value="bed" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
+        <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" />
+        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
+        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" >
+            <value_translation>
+                <value galaxy_value="auto" remote_value="primaryTable" />
+                <value galaxy_value="auto" remote_value="selectedFields" />
+                <value galaxy_value="wig" remote_value="wigData" />
+                <value galaxy_value="interval" remote_value="tab" />
+                <value galaxy_value="html" remote_value="hyperlinks" />
+                <value galaxy_value="fasta" remote_value="sequence" />
+                <value galaxy_value="gtf" remote_value="gff" />
+            </value_translation>
+        </request_param>
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/>
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/ucsc_testproxy.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<tool name="UCSC Test" id="ucsc_testproxy">
+	
+	<description>table browser proxy</description>
+	
+	<command interpreter="python">
+		ucsc_proxy.py $param_file $output
+	</command>
+	
+	<inputs action="/ucsc_proxy/index" check_values="false">
+		<display>go to UCSC genome-test $init $hgta_outputType</display>
+		<param type="hidden" name="init" value="2"/>
+		<param type="hidden" name="hgta_outputType" value="bed"/>
+	</inputs>
+	
+	<code file="ucsc_filter.py"/>
+	
+	<outputs>
+		<data name="output" format="bed" />
+	</outputs>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/upload.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,394 @@
+#!/usr/bin/env python
+#Processes uploads from the user.
+
+# WARNING: Changes in this tool (particularly as related to parsing) may need
+# to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools
+
+import urllib, sys, os, gzip, tempfile, shutil, re, gzip, zipfile, codecs, binascii
+from galaxy import eggs
+# need to import model before sniff to resolve a circular import dependency
+import galaxy.model
+from galaxy.datatypes.checkers import *
+from galaxy.datatypes import sniff
+from galaxy.datatypes.binary import *
+from galaxy.datatypes.images import Pdf
+from galaxy.datatypes.registry import Registry
+from galaxy import util
+from galaxy.datatypes.util.image_util import *
+from galaxy.util.json import *
+
+try:
+    import Image as PIL
+except ImportError:
+    try:
+        from PIL import Image as PIL
+    except:
+        PIL = None
+
+try:
+    import bz2
+except:
+    bz2 = None
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg, ret=1 ):
+    sys.stderr.write( msg )
+    sys.exit( ret )
+def file_err( msg, dataset, json_file ):
+    json_file.write( to_json_string( dict( type = 'dataset',
+                                           ext = 'data',
+                                           dataset_id = dataset.dataset_id,
+                                           stderr = msg ) ) + "\n" )
+    # never remove a server-side upload
+    if dataset.type in ( 'server_dir', 'path_paste' ):
+        return
+    try:
+        os.remove( dataset.path )
+    except:
+        pass
+def safe_dict(d):
+    """
+    Recursively clone json structure with UTF-8 dictionary keys
+    http://mellowmachines.com/blog/2009/06/exploding-dictionary-with-unicode-keys-as-python-arguments/
+    """
+    if isinstance(d, dict):
+        return dict([(k.encode('utf-8'), safe_dict(v)) for k,v in d.iteritems()])
+    elif isinstance(d, list):
+        return [safe_dict(x) for x in d]
+    else:
+        return d
+def check_bam( file_path ):
+    return Bam().sniff( file_path )
+def check_sff( file_path ):
+    return Sff().sniff( file_path )
+def check_pdf( file_path ):
+    return Pdf().sniff( file_path )
+def check_bigwig( file_path ):
+    return BigWig().sniff( file_path )
+def check_bigbed( file_path ):
+    return BigBed().sniff( file_path )
+def parse_outputs( args ):
+    rval = {}
+    for arg in args:
+        id, files_path, path = arg.split( ':', 2 )
+        rval[int( id )] = ( path, files_path )
+    return rval
+def add_file( dataset, registry, json_file, output_path ):
+    data_type = None
+    line_count = None
+    converted_path = None
+    stdout = None
+    link_data_only = dataset.get( 'link_data_only', 'copy_files' )
+
+    try:
+        ext = dataset.file_type
+    except AttributeError:
+        file_err( 'Unable to process uploaded file, missing file_type parameter.', dataset, json_file )
+        return
+
+    if dataset.type == 'url':
+        try:
+            temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dataset.path ), prefix='url_paste' )
+        except Exception, e:
+            file_err( 'Unable to fetch %s\n%s' % ( dataset.path, str( e ) ), dataset, json_file )
+            return
+        dataset.path = temp_name
+    # See if we have an empty file
+    if not os.path.exists( dataset.path ):
+        file_err( 'Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file )
+        return
+    if not os.path.getsize( dataset.path ) > 0:
+        file_err( 'The uploaded file is empty', dataset, json_file )
+        return
+    if not dataset.type == 'url':
+        # Already set is_multi_byte above if type == 'url'
+        try:
+            dataset.is_multi_byte = util.is_multi_byte( codecs.open( dataset.path, 'r', 'utf-8' ).read( 100 ) )
+        except UnicodeDecodeError, e:
+            dataset.is_multi_byte = False
+    # Is dataset an image?
+    image = check_image( dataset.path )
+    if image:
+        if not PIL:
+            image = None
+        # get_image_ext() returns None if nor a supported Image type
+        ext = get_image_ext( dataset.path, image )
+        data_type = ext
+    # Is dataset content multi-byte?
+    elif dataset.is_multi_byte:
+        data_type = 'multi-byte char'
+        ext = sniff.guess_ext( dataset.path, is_multi_byte=True )
+    # Is dataset content supported sniffable binary?
+    elif check_bam( dataset.path ):
+        ext = 'bam'
+        data_type = 'bam'
+    elif check_sff( dataset.path ):
+        ext = 'sff'
+        data_type = 'sff'
+    elif check_pdf( dataset.path ):
+        ext = 'pdf'
+        data_type = 'pdf'
+    elif check_bigwig( dataset.path ):
+        ext = 'bigwig'
+        data_type = 'bigwig'
+    elif check_bigbed( dataset.path ):
+        ext = 'bigbed'
+        data_type = 'bigbed'
+    if not data_type:
+        # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
+        is_gzipped, is_valid = check_gzip( dataset.path )
+        if is_gzipped and not is_valid:
+            file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file )
+            return
+        elif is_gzipped and is_valid:
+            if link_data_only == 'copy_files':
+                # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
+                CHUNK_SIZE = 2**20 # 1Mb   
+                fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False )
+                gzipped_file = gzip.GzipFile( dataset.path, 'rb' )
+                while 1:
+                    try:
+                        chunk = gzipped_file.read( CHUNK_SIZE )
+                    except IOError:
+                        os.close( fd )
+                        os.remove( uncompressed )
+                        file_err( 'Problem decompressing gzipped data', dataset, json_file )
+                        return
+                    if not chunk:
+                        break
+                    os.write( fd, chunk )
+                os.close( fd )
+                gzipped_file.close()
+                # Replace the gzipped file with the decompressed file if it's safe to do so
+                if dataset.type in ( 'server_dir', 'path_paste' ):
+                    dataset.path = uncompressed
+                else:
+                    shutil.move( uncompressed, dataset.path )
+            dataset.name = dataset.name.rstrip( '.gz' )
+            data_type = 'gzip'
+        if not data_type and bz2 is not None:
+            # See if we have a bz2 file, much like gzip
+            is_bzipped, is_valid = check_bz2( dataset.path )
+            if is_bzipped and not is_valid:
+                file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file )
+                return
+            elif is_bzipped and is_valid:
+                if link_data_only == 'copy_files':
+                    # We need to uncompress the temp_name file
+                    CHUNK_SIZE = 2**20 # 1Mb   
+                    fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False )
+                    bzipped_file = bz2.BZ2File( dataset.path, 'rb' )
+                    while 1:
+                        try:
+                            chunk = bzipped_file.read( CHUNK_SIZE )
+                        except IOError:
+                            os.close( fd )
+                            os.remove( uncompressed )
+                            file_err( 'Problem decompressing bz2 compressed data', dataset, json_file )
+                            return
+                        if not chunk:
+                            break
+                        os.write( fd, chunk )
+                    os.close( fd )
+                    bzipped_file.close()
+                    # Replace the bzipped file with the decompressed file if it's safe to do so
+                    if dataset.type in ( 'server_dir', 'path_paste' ):
+                        dataset.path = uncompressed
+                    else:
+                        shutil.move( uncompressed, dataset.path )
+                dataset.name = dataset.name.rstrip( '.bz2' )
+                data_type = 'bz2'
+        if not data_type:
+            # See if we have a zip archive
+            is_zipped = check_zip( dataset.path )
+            if is_zipped:
+                if link_data_only == 'copy_files':
+                    CHUNK_SIZE = 2**20 # 1Mb
+                    uncompressed = None
+                    uncompressed_name = None
+                    unzipped = False
+                    z = zipfile.ZipFile( dataset.path )
+                    for name in z.namelist():
+                        if name.endswith('/'):
+                            continue
+                        if unzipped:
+                            stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
+                            break
+                        fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False )
+                        if sys.version_info[:2] >= ( 2, 6 ):
+                            zipped_file = z.open( name )
+                            while 1:
+                                try:
+                                    chunk = zipped_file.read( CHUNK_SIZE )
+                                except IOError:
+                                    os.close( fd )
+                                    os.remove( uncompressed )
+                                    file_err( 'Problem decompressing zipped data', dataset, json_file )
+                                    return
+                                if not chunk:
+                                    break
+                                os.write( fd, chunk )
+                            os.close( fd )
+                            zipped_file.close()
+                            uncompressed_name = name
+                            unzipped = True
+                        else:
+                            # python < 2.5 doesn't have a way to read members in chunks(!)
+                            try:
+                                outfile = open( uncompressed, 'wb' )
+                                outfile.write( z.read( name ) )
+                                outfile.close()
+                                uncompressed_name = name
+                                unzipped = True
+                            except IOError:
+                                os.close( fd )
+                                os.remove( uncompressed )
+                                file_err( 'Problem decompressing zipped data', dataset, json_file )
+                                return
+                    z.close()
+                    # Replace the zipped file with the decompressed file if it's safe to do so
+                    if uncompressed is not None:
+                        if dataset.type in ( 'server_dir', 'path_paste' ):
+                            dataset.path = uncompressed
+                        else:
+                            shutil.move( uncompressed, dataset.path )
+                        dataset.name = uncompressed_name
+                data_type = 'zip'
+        if not data_type:
+            if check_binary( dataset.path ):
+                # We have a binary dataset, but it is not Bam, Sff or Pdf
+                data_type = 'binary'
+                #binary_ok = False
+                parts = dataset.name.split( "." )
+                if len( parts ) > 1:
+                    ext = parts[1].strip().lower()
+                    if ext not in unsniffable_binary_formats:
+                        file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file )
+                        return
+                    elif ext in unsniffable_binary_formats and dataset.file_type != ext:
+                        err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext )
+                        file_err( err_msg, dataset, json_file )
+                        return
+        if not data_type:
+            # We must have a text file
+            if check_html( dataset.path ):
+                file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file )
+                return
+        if data_type != 'binary':
+            if link_data_only == 'copy_files':
+                in_place = True
+                if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]:
+                    in_place = False
+                if dataset.space_to_tab:
+                    line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place )
+                else:
+                    line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place )
+            if dataset.file_type == 'auto':
+                ext = sniff.guess_ext( dataset.path, registry.sniff_order )
+            else:
+                ext = dataset.file_type
+            data_type = ext
+    # Save job info for the framework
+    if ext == 'auto' and dataset.ext:
+        ext = dataset.ext
+    if ext == 'auto':
+        ext = 'data'
+    datatype = registry.get_datatype_by_extension( ext )
+    if dataset.type in ( 'server_dir', 'path_paste' ) and link_data_only == 'link_to_files':
+        # Never alter a file that will not be copied to Galaxy's local file store.
+        if datatype.dataset_content_needs_grooming( dataset.path ):
+            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
+                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
+            file_err( err_msg, dataset, json_file )
+            return
+    if link_data_only == 'copy_files' and dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]:
+        # Move the dataset to its "real" path
+        if converted_path is not None:
+            shutil.copy( converted_path, output_path )
+            try:
+                os.remove( converted_path )
+            except:
+                pass
+        else:
+            # This should not happen, but it's here just in case
+            shutil.copy( dataset.path, output_path )
+    elif link_data_only == 'copy_files':
+        shutil.move( dataset.path, output_path )
+    # Write the job info
+    stdout = stdout or 'uploaded %s file' % data_type
+    info = dict( type = 'dataset',
+                 dataset_id = dataset.dataset_id,
+                 ext = ext,
+                 stdout = stdout,
+                 name = dataset.name,
+                 line_count = line_count )
+    json_file.write( to_json_string( info ) + "\n" )
+    if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path ):
+        # Groom the dataset content if necessary
+        datatype.groom_dataset_content( output_path )
+def add_composite_file( dataset, registry, json_file, output_path, files_path ):
+        if dataset.composite_files:
+            os.mkdir( files_path )
+            for name, value in dataset.composite_files.iteritems():
+                value = util.bunch.Bunch( **value )
+                if dataset.composite_file_paths[ value.name ] is None and not value.optional:
+                    file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file )
+                    break
+                elif dataset.composite_file_paths[value.name] is not None:
+                    dp = dataset.composite_file_paths[value.name][ 'path' ]
+                    isurl = dp.find('://') <> -1 # todo fixme
+                    if isurl:
+                       try:
+                           temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dp ), prefix='url_paste' )
+                       except Exception, e:
+                           file_err( 'Unable to fetch %s\n%s' % ( dp, str( e ) ), dataset, json_file )
+                           return
+                       dataset.path = temp_name
+                       dp = temp_name
+                    if not value.is_binary:
+                        if dataset.composite_file_paths[ value.name ].get( 'space_to_tab', value.space_to_tab ):
+                            sniff.convert_newlines_sep2tabs( dp )
+                        else:
+                            sniff.convert_newlines( dp )
+                    shutil.move( dp, os.path.join( files_path, name ) )
+        # Move the dataset to its "real" path
+        shutil.move( dataset.primary_file, output_path )
+        # Write the job info
+        info = dict( type = 'dataset',
+                     dataset_id = dataset.dataset_id,
+                     stdout = 'uploaded %s file' % dataset.file_type )
+        json_file.write( to_json_string( info ) + "\n" )
+
+def __main__():
+
+    if len( sys.argv ) < 4:
+        print >>sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...'
+        sys.exit( 1 )
+
+    output_paths = parse_outputs( sys.argv[4:] )
+    json_file = open( 'galaxy.json', 'w' )
+
+    registry = Registry( sys.argv[1], sys.argv[2] )
+
+    for line in open( sys.argv[3], 'r' ):
+        dataset = from_json_string( line )
+        dataset = util.bunch.Bunch( **safe_dict( dataset ) )
+        try:
+            output_path = output_paths[int( dataset.dataset_id )][0]
+        except:
+            print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id
+            sys.exit( 1 )
+        if dataset.type == 'composite':
+            files_path = output_paths[int( dataset.dataset_id )][1]
+            add_composite_file( dataset, registry, json_file, output_path, files_path )
+        else:
+            add_file( dataset, registry, json_file, output_path )
+    # clean up paramfile
+    try:
+        os.remove( sys.argv[3] )
+    except:
+        pass
+
+if __name__ == '__main__':
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/upload.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,213 @@
+<?xml version="1.0"?>
+
+<tool name="Upload File" id="upload1" version="1.1.3">
+  <description>
+    from your computer  
+  </description>
+  <action module="galaxy.tools.actions.upload" class="UploadToolAction"/>
+  <command interpreter="python">
+      upload.py $GALAXY_ROOT_DIR $GALAXY_DATATYPES_CONF_FILE $paramfile
+    #set $outnum = 0
+    #while $varExists('output%i' % $outnum):
+        #set $output = $getVar('output%i' % $outnum)
+        #set $outnum += 1
+        #set $file_name = $output.file_name
+        ## FIXME: This is not future-proof for other uses of external_filename (other than for use by the library upload's "link data" feature)
+        #if $output.dataset.dataset.external_filename:
+            #set $file_name = "None"
+        #end if
+        ${output.dataset.dataset.id}:${output.files_path}:${file_name}
+    #end while
+  </command>
+  <inputs nginx_upload="true">
+    <param name="file_type" type="select" label="File Format" help="Which format? See help below">
+      <options from_parameter="tool.app.datatypes_registry.upload_file_formats" transform_lines="[ &quot;%s%s%s&quot; % ( line, self.separator, line ) for line in obj ]">
+        <column name="value" index="1"/>
+        <column name="name" index="0"/>
+        <filter type="sort_by" column="0"/>
+        <filter type="add_value" name="Auto-detect" value="auto" index="0"/>
+      </options>
+    </param>
+    <param name="async_datasets" type="hidden" value="None"/>
+    <upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type" metadata_ref="files_metadata">
+        <param name="file_data" type="file" size="30" label="File" ajax-upload="true" help="TIP: Due to browser limitations, uploading files larger than 2GB is guaranteed to fail.  To upload large files, use the URL method (below) or FTP (if enabled by the site administrator).">
+        <validator type="expression" message="You will need to reselect the file you specified (%s)." substitute_value_in_message="True">not ( ( isinstance( value, unicode ) or isinstance( value, str ) ) and value != "" )</validator> <!-- use validator to post message to user about needing to reselect the file, since most browsers won't accept the value attribute for file inputs -->
+      </param>
+      <param name="url_paste" type="text" area="true" size="5x35" label="URL/Text" help="Here you may specify a list of URLs (one per line) or paste the contents of a file."/> 
+      <param name="ftp_files" type="ftpfile" label="Files uploaded via FTP"/>
+      <param name="space_to_tab" type="select" display="checkboxes" multiple="True" label="Convert spaces to tabs" help="Use this option if you are entering intervals by hand."> 
+        <option value="Yes">Yes</option>
+      </param>
+    </upload_dataset>
+    <param name="dbkey" type="genomebuild" label="Genome" />
+    <conditional name="files_metadata" title="Specify metadata" value_from="self:app.datatypes_registry.get_upload_metadata_params" value_ref="file_type" value_ref_in_group="False" />
+    <!-- <param name="other_dbkey" type="text" label="Or user-defined Genome" /> -->
+  </inputs>
+  <help>
+  
+**Auto-detect**
+
+The system will attempt to detect Axt, Fasta, Fastqsolexa, Gff, Gff3, Html, Lav, Maf, Tabular, Wiggle, Bed and Interval (Bed with headers) formats. If your file is not detected properly as one of the known formats, it most likely means that it has some format problems (e.g., different number of columns on different rows). You can still coerce the system to set your data to the format you think it should be.  You can also upload compressed files, which will automatically be decompressed. 
+
+-----
+
+**Ab1**
+
+A binary sequence file in 'ab1' format with a '.ab1' file extension.  You must manually select this 'File Format' when uploading the file.
+
+-----
+
+**Axt**
+
+blastz pairwise alignment format.  Each alignment block in an axt file contains three lines: a summary line and 2 sequence lines.  Blocks are separated from one another by blank lines.  The summary line contains chromosomal position and size information about the alignment. It consists of 9 required fields.
+
+-----
+
+**Bam**
+
+A binary file compressed in the BGZF format with a '.bam' file extension.
+
+-----
+
+**Bed**
+
+* Tab delimited format (tabular)
+* Does not require header line
+* Contains 3 required fields:
+
+  - chrom - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1).
+  - chromStart - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0.
+  - chromEnd - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+
+* May contain 9 additional optional BED fields:
+
+  - name - Defines the name of the BED line. This label is displayed to the left of the BED line in the Genome Browser window when the track is open to full display mode or directly to the left of the item in pack mode.
+  - score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray).
+  - strand - Defines the strand - either '+' or '-'.
+  - thickStart - The starting position at which the feature is drawn thickly (for example, the start codon in gene displays).
+  - thickEnd - The ending position at which the feature is drawn thickly (for example, the stop codon in gene displays).
+  - itemRgb - An RGB value of the form R,G,B (e.g. 255,0,0). If the track line itemRgb attribute is set to "On", this RBG value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser.
+  - blockCount - The number of blocks (exons) in the BED line.
+  - blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
+  - blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
+
+* Example::
+
+    chr22 1000 5000 cloneA 960 + 1000 5000 0 2 567,488, 0,3512
+    chr22 2000 6000 cloneB 900 - 2000 6000 0 2 433,399, 0,3601
+
+-----
+
+**Fasta**
+
+A sequence in FASTA format consists of a single-line description, followed by lines of sequence data.  The first character of the description line is a greater-than (">") symbol in the first column.  All lines should be shorter than 80 characters::
+
+    >sequence1
+    atgcgtttgcgtgc
+    gtcggtttcgttgc
+    >sequence2
+    tttcgtgcgtatag
+    tggcgcggtga
+
+-----
+
+**FastqSolexa**
+
+FastqSolexa is the Illumina (Solexa) variant of the Fastq format, which stores sequences and quality scores in a single file::
+
+    @seq1  
+    GACAGCTTGGTTTTTAGTGAGTTGTTCCTTTCTTT  
+    +seq1  
+    hhhhhhhhhhhhhhhhhhhhhhhhhhPW@hhhhhh  
+    @seq2  
+    GCAATGACGGCAGCAATAAACTCAACAGGTGCTGG  
+    +seq2  
+    hhhhhhhhhhhhhhYhhahhhhWhAhFhSIJGChO
+    
+Or:: 
+
+    @seq1
+    GAATTGATCAGGACATAGGACAACTGTAGGCACCAT
+    +seq1
+    40 40 40 40 35 40 40 40 25 40 40 26 40 9 33 11 40 35 17 40 40 33 40 7 9 15 3 22 15 30 11 17 9 4 9 4
+    @seq2
+    GAGTTCTCGTCGCCTGTAGGCACCATCAATCGTATG
+    +seq2
+    40 15 40 17 6 36 40 40 40 25 40 9 35 33 40 14 14 18 15 17 19 28 31 4 24 18 27 14 15 18 2 8 12 8 11 9
+    
+-----
+
+**Gff**
+
+GFF lines have nine required fields that must be tab-separated.
+
+-----
+
+**Gff3**
+
+The GFF3 format addresses the most common extensions to GFF, while preserving backward compatibility with previous formats.
+
+-----
+
+**Interval (Genomic Intervals)**
+
+- Tab delimited format (tabular)
+- File must start with definition line in the following format (columns may be in any order).::
+
+    #CHROM START END STRAND
+
+- CHROM - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1).
+- START - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0.
+- END - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+- STRAND - Defines the strand - either '+' or '-'.
+
+- Example::
+
+    #CHROM START END   STRAND NAME COMMENT
+    chr1   10    100   +      exon myExon
+    chrX   1000  10050 -      gene myGene
+
+-----
+
+**Lav**
+
+Lav is the primary output format for BLASTZ.  The first line of a .lav file begins with #:lav..
+
+-----
+
+**MAF**
+
+TBA and multiz multiple alignment format.  The first line of a .maf file begins with ##maf. This word is followed by white-space-separated "variable=value" pairs. There should be no white space surrounding the "=".
+
+-----
+
+**Scf**
+
+A binary sequence file in 'scf' format with a '.scf' file extension.  You must manually select this 'File Format' when uploading the file.
+
+-----
+
+**Sff**
+
+A binary file in 'Standard Flowgram Format' with a '.sff' file extension.
+
+-----
+
+**Tabular (tab delimited)**
+
+Any data in tab delimited format (tabular)
+
+-----
+
+**Wig**
+
+The wiggle format is line-oriented.  Wiggle data is preceded by a track definition line, which adds a number of options for controlling the default display of this track.
+
+-----
+
+**Other text type**
+
+Any text file
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/worm_modencode.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<tool name="modENCODE worm" id="modENCODEworm" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://modencode.oicr.on.ca/fgb2/gbrowse/worm" check_values="false" target="_top"> 
+        <display>go to modENCODE worm server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=modENCODEworm" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="ce6" >
+            <value_translation>
+                <value galaxy_value="ce6" remote_value="worm" />
+            </value_translation>
+        </request_param>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="d" missing="" />
+                <value name="dbkey" missing="ce6" />
+                <value name="q" missing="" />
+                <value name="s" missing="" />
+                <value name="t" missing="" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/wormbase.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<tool name="Wormbase" id="wormbase" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://www.wormbase.org/db/seq/gbgff/c_elegans/" check_values="false" target="_top"> 
+        <display>go to Wormbase server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="d" missing="" />
+                <value name="dbkey" missing="" />
+                <value name="q" missing="" />
+                <value name="s" missing="" />
+                <value name="t" missing="" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/wormbase_test.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<tool name="Wormbase" id="wormbase_test" tool_type="data_source">
+	<description>test server</description>
+	<command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+	<inputs action="http://dev.wormbase.org/db/seq/gbrowse/c_elegans/" check_values="false" target="_top"> 
+		<display>go to Wormbase test server $GALAXY_URL</display>
+		<param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase_test" />
+	</inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="">
+            <append_param separator="&amp;" first_separator="?" join="=">
+                <value name="d" missing="" />
+                <value name="dbkey" missing="" />
+                <value name="q" missing="" />
+                <value name="s" missing="" />
+                <value name="t" missing="" />
+            </append_param>
+        </request_param>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/>
+	</outputs>
+	<options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/yeastmine.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<tool name="YeastMine" id="yeastmine" tool_type="data_source">
+    <description>server</description>
+    <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command>
+    <inputs action="http://yeastmine.yeastgenome.org/yeastmine/begin.do" check_values="false" method="get"> 
+        <display>go to yeastMine server $GALAXY_URL</display>
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" >
+            <value_translation>
+                <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect -->
+            </value_translation>
+        </request_param>
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/discreteWavelet/execute_dwt_IvC_all.pl	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,210 @@
+#!/usr/bin/perl -w
+use warnings;
+use IO::Handle;
+
+$usage = "execute_dwt_IvC_all.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out]  \n";
+die $usage unless @ARGV == 4;
+
+#get the input arguments
+my $firstInputFile = $ARGV[0];
+my $secondInputFile = $ARGV[1];
+my $firstOutputFile = $ARGV[2];
+my $secondOutputFile = $ARGV[3];
+
+open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n");
+open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n");
+open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
+open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
+open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
+
+#save all error messages into the error file $errorFile using the error file handle ERROR
+STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
+
+
+print "There are two input data files: \n";
+print "The input data file is: $firstInputFile \n";
+print "The control data file is: $secondInputFile \n";
+
+# IvC test
+$test = "IvC";
+
+# construct an R script to implement the IvC test
+print "\n";
+
+$r_script = "get_dwt_IvC_test.r"; 
+print "$r_script \n";
+
+# R script
+open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
+print Rcmd "
+        ###########################################################################################
+        # code to do wavelet Indel vs. Control
+        # signal is the difference I-C; function is second moment i.e. variance from zero not mean
+        # to perform wavelet transf. of signal, scale-by-scale analysis of the function 
+        # create null bands by permuting the original data series
+        # generate plots and table matrix of correlation coefficients including p-values
+        ############################################################################################
+        library(\"Rwave\");
+        library(\"wavethresh\");
+        library(\"waveslim\");
+        
+        options(echo = FALSE)
+        
+        # normalize data
+        norm <- function(data){
+            v <- (data - mean(data))/sd(data);
+            if(sum(is.na(v)) >= 1){
+                v <- data;
+            }
+            return(v);
+        }
+        
+        dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", wf = \"haar\", boundary = \"reflection\") {
+            print(test);
+            print(pdf);
+            print(table);
+            
+            pdf(file = pdf);
+            final_pvalue = NULL;
+            title = NULL;
+                
+            short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels;
+            title <- c(\"motif\");
+            for (i in 1:short.levels){
+            	title <- c(title, paste(i, \"moment2\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"), paste(i, \"test\", sep = \"_\"));
+            }
+            print(title);
+        
+            # loop to compare a vs a
+            for(i in 1:length(names.short)){
+        		wave1.dwt = NULL;
+        		m2.dwt = diff = var.dwt = NULL;
+        		out = NULL;
+                out <- vector(length = length(title));
+        
+        		print(names.short[i]);
+        		print(names.long[i]);
+                        
+        		# need exit if not comparing motif(a) vs motif(a)
+        		if (names.short[i] != names.long[i]){
+                	stop(paste(\"motif\", names.short[i], \"is not the same as\", names.long[i], sep = \" \"));
+        		}
+        		else {
+                	# signal is the difference I-C data sets
+                    diff<-data.short[,i]-data.long[,i];
+        
+                    # normalize the signal
+                    diff<-norm(diff);
+        
+                    # function is 2nd moment
+                    # 2nd moment m_j = 1/N[sum_N(W_j + V_J)^2] = 1/N sum_N(W_j)^2 + (X_bar)^2 
+            		wave1.dwt <- dwt(diff, wf = wf, short.levels, boundary = boundary);
+            		var.dwt <- wave.variance(wave1.dwt);
+                	m2.dwt <- vector(length = short.levels)
+                    for(level in 1:short.levels){
+                    	m2.dwt[level] <- var.dwt[level, 1] + (mean(diff)^2);
+                    }
+                                
+            		# CI bands by permutation of time series
+            		feature1 = feature2 = NULL;
+            		feature1 = data.short[, i];
+            		feature2 = data.long[, i];
+            		null = results = med = NULL; 
+            		m2_25 = m2_975 = NULL;
+            
+            		for (k in 1:1000) {
+                		nk_1 = nk_2 = NULL;
+                		m2_null = var_null = NULL;
+                		null.levels = null_wave1 = null_diff = NULL;
+                		nk_1 <- sample(feature1, length(feature1), replace = FALSE);
+                		nk_2 <- sample(feature2, length(feature2), replace = FALSE);
+                		null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
+                		null_diff <- nk_1-nk_2;
+                		null_diff <- norm(null_diff);
+                		null_wave1 <- dwt(null_diff, wf = wf, short.levels, boundary = boundary);
+                        var_null <- wave.variance(null_wave1);
+                		m2_null <- vector(length = null.levels);
+                		for(level in 1:null.levels){
+                        	m2_null[level] <- var_null[level, 1] + (mean(null_diff)^2);
+                		}
+                		null= rbind(null, m2_null);
+            		}
+                
+            		null <- apply(null, 2, sort, na.last = TRUE);
+            		m2_25 <- null[25,];
+            		m2_975 <- null[975,];
+            		med <- apply(null, 2, median, na.rm = TRUE);
+
+            		# plot
+            		results <- cbind(m2.dwt, m2_25, m2_975);
+            		matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), xlab = \"Wavelet Scale\", ylab = c(\"Wavelet 2nd Moment\", test), main = (names.short[i]), cex.main = 0.75);
+            		abline(h = 1);
+
+            		# get pvalues by comparison to null distribution
+            		out <- c(names.short[i]);
+            		for (m in 1:length(m2.dwt)){
+                    	print(paste(\"scale\", m, sep = \" \"));
+                        print(paste(\"m2\", m2.dwt[m], sep = \" \"));
+                        print(paste(\"median\", med[m], sep = \" \"));
+                        out <- c(out, format(m2.dwt[m], digits = 4));	
+                        pv = NULL;
+                        if(is.na(m2.dwt[m])){
+                        	pv <- \"NA\"; 
+                        } 
+                        else {
+                        	if (m2.dwt[m] >= med[m]){
+                            	# R tail test
+                                tail <- \"R\";
+                                pv <- (length(which(null[, m] >= m2.dwt[m])))/(length(na.exclude(null[, m])));
+                            }
+                            else{
+                                if (m2.dwt[m] < med[m]){
+                                	# L tail test
+                                    tail <- \"L\";
+                                    pv <- (length(which(null[, m] <= m2.dwt[m])))/(length(na.exclude(null[, m])));
+                                }
+                            }
+                        }
+                        out <- c(out, pv);
+                        print(pv);  
+                        out <- c(out, tail);
+                    }
+                    final_pvalue <-rbind(final_pvalue, out);
+                    print(out);
+                }
+            }
+            
+            colnames(final_pvalue) <- title;
+            write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE);
+            dev.off();
+        }\n";
+
+print Rcmd "
+        # execute
+        # read in data 
+        
+        inputData <- read.delim(\"$firstInputFile\");
+        inputDataNames <- colnames(inputData);
+        
+        controlData <- read.delim(\"$secondInputFile\");
+        controlDataNames <- colnames(controlData);
+        
+        # call the test function to implement IvC test
+        dwt_cor(inputData, inputDataNames, controlData, controlDataNames, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\");
+        print (\"done with the correlation test\");
+\n";
+
+print Rcmd "#eof\n";
+
+close Rcmd;
+
+system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n");
+system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n");
+system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n");
+
+#close the input and output and error files
+close(ERROR);
+close(OUTPUT2);
+close(OUTPUT1);
+close(INPUT2);
+close(INPUT1);
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/discreteWavelet/execute_dwt_IvC_all.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,112 @@
+<tool id="compute_p-values_second_moments_feature_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Second Moments for Feature Occurrences" version="1.0.0">
+  <description>between two datasets using Discrete Wavelet Transfoms</description>
+  
+  <command interpreter="perl">
+  	execute_dwt_IvC_all.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2
+  </command>
+
+  <inputs>
+  	<param format="tabular" name="inputFile1" type="data" label="Select the first input file"/>	
+  	<param format="tabular" name="inputFile2" type="data" label="Select the second input file"/>
+  </inputs>
+  
+  <outputs>
+    <data format="tabular" name="outputFile1"/> 
+    <data format="pdf" name="outputFile2"/>
+  </outputs>
+  	
+  <help> 
+
+.. class:: infomark
+
+**What it does**
+
+This program generates plots and computes table matrix of second moments, p-values, and test orientations at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique. 
+
+The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and  k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales.
+
+The program has two input files obtained as follows:
+
+For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program. 
+
+The program gives two output files:
+
+- The first output file is a TABULAR format file representing the second moments, p-values, and test orientations for each feature at each scale.
+- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the second moment for that feature at every scale.
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file. 
+
+-----
+
+**Example**
+
+Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file::
+
+	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget	
+		226			403			416			221				1165
+		236			444			380			241				1223
+		242			496			391			195				1116
+		243			429			364			191				1118
+		244			410			371			236				1063
+		230			386			370			217				1087
+		275			404			402			214				1044
+		265			443			365			231				1086
+		255			390			354			246				1114
+		281			384			406			232				1102
+		263			459			369			251				1135
+		280			433			400			251				1159
+		278			385			382			231				1147
+		248			393			389			211				1162
+		251			403			385			246				1114
+		239			383			347			227				1172
+
+And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file:: 
+
+	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
+		235			374			407			257				1159
+		244			356			353			212				1128
+		233			343			322			204				1110
+		222			329			398			253				1054
+		216			325			328			253				1129
+		257			368			352			221				1115
+		238			360			346			224				1102
+		225			350			377			248				1107
+		230			330			365			236				1132
+		241			389			357			220				1120
+		274			354			392			235				1120
+		250			379			354			210				1102
+		254			329			320			251				1080
+		221			355			406			279				1127
+		224			330			390			249				1129
+		246			366			364			218				1176
+
+  
+We notice that the number of scales here is 4 because 16 = 2^4. Runnig the program on the above input files gives the following output:
+
+The first output file::
+
+	motif				1_moment2	1_pval	1_test	2_moment2	2_pval	2_test	3_moment2	3_pval	3_test	4_moment2	4_pval	4_test
+	
+	deletionHoptspot		0.8751		0.376	L	1.549		0.168	R	0.6152		0.434	L	0.5735		0.488	R
+	insertionHoptspot		0.902		0.396	L	1.172		0.332	R	0.6843		0.456	L	1.728		0.213	R
+	dnaPolPauseFrameshift		1.65		0.013	R	0.267		0.055	L	0.1387		0.124	L	0.4516		0.498	L
+	topoisomeraseCleavageSite	0.7443		0.233	L	1.023		0.432	R	1.933		0.155	R	1.09		0.3	R
+	translinTarget			0.5084		0.057	L	0.8219		0.446	L	3.604		0.019	R	0.4377		0.492	L
+
+The second output file:
+
+.. image:: ./static/operation_icons/dwt_IvC_1.png
+.. image:: ./static/operation_icons/dwt_IvC_2.png
+.. image:: ./static/operation_icons/dwt_IvC_3.png
+.. image:: ./static/operation_icons/dwt_IvC_4.png
+.. image:: ./static/operation_icons/dwt_IvC_5.png
+
+  </help>  
+  
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/discreteWavelet/execute_dwt_cor_aVa_perClass.pl	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,221 @@
+#!/usr/bin/perl -w
+
+use warnings;
+use IO::Handle;
+
+$usage = "execute_dwt_cor_aVa_perClass.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out]  \n";
+die $usage unless @ARGV == 4;
+
+#get the input arguments
+my $firstInputFile = $ARGV[0];
+my $secondInputFile = $ARGV[1];
+my $firstOutputFile = $ARGV[2];
+my $secondOutputFile = $ARGV[3];
+
+open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n");
+open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n");
+open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
+open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
+open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
+
+#save all error messages into the error file $errorFile using the error file handle ERROR
+STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
+
+print "There are two input data files: \n";
+print "The input data file is: $firstInputFile \n";
+print "The control data file is: $secondInputFile \n";
+
+# IvC test
+$test = "cor_aVa";
+
+# construct an R script to implement the IvC test
+print "\n";
+
+$r_script = "get_dwt_cor_aVa_test.r"; 
+print "$r_script \n";
+
+open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
+print Rcmd "
+	##################################################################################
+	# code to do all correlation tests of form: motif(a) vs. motif(a)
+	# add code to create null bands by permuting the original data series
+	# generate plots and table matrix of correlation coefficients including p-values
+	##################################################################################
+	library(\"Rwave\");
+	library(\"wavethresh\");
+	library(\"waveslim\");
+	
+	options(echo = FALSE)
+	
+	# normalize data
+	norm <- function(data){
+        v <- (data - mean(data))/sd(data);
+        if(sum(is.na(v)) >= 1){
+        	v <- data;
+        }
+        return(v);
+	}
+	
+	dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
+		print(test);
+	    print(pdf);
+		print(table);
+		
+	    pdf(file = pdf);   
+	    final_pvalue = NULL;
+		title = NULL;
+		
+	    short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels;
+		title <- c(\"motif\");
+        for (i in 1:short.levels){
+	        title <- c(title, paste(i, \"cor\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"));
+        }
+        print(title);
+	
+        # normalize the raw data
+        data.short <- apply(data.short, 2, norm);
+        data.long <- apply(data.long, 2, norm);
+        
+        for(i in 1:length(names.short)){
+        	# Kendall Tau
+            # DWT wavelet correlation function
+            # include significance to compare
+            wave1.dwt = wave2.dwt = NULL;
+            tau.dwt = NULL;
+            out = NULL;
+
+            print(names.short[i]);
+            print(names.long[i]);
+            
+            # need exit if not comparing motif(a) vs motif(a)
+            if (names.short[i] != names.long[i]){
+            	stop(paste(\"motif\", names.short[i], \"is not the same as\", names.long[i], sep = \" \"));
+            }
+            else {
+            	wave1.dwt <- dwt(data.short[, i], wf = wf, short.levels, boundary = boundary);
+                wave2.dwt <- dwt(data.long[, i], wf = wf, short.levels, boundary = boundary);
+                tau.dwt <- vector(length=short.levels)
+                       
+				#perform cor test on wavelet coefficients per scale 
+				for(level in 1:short.levels){
+                	w1_level = w2_level = NULL;
+                    w1_level <- (wave1.dwt[[level]]);
+                    w2_level <- (wave2.dwt[[level]]);
+                    tau.dwt[level] <- cor.test(w1_level, w2_level, method = method)\$estimate;
+                }
+                
+                # CI bands by permutation of time series
+                feature1 = feature2 = NULL;
+                feature1 = data.short[, i];
+                feature2 = data.long[, i];
+                null = results = med = NULL; 
+                cor_25 = cor_975 = NULL;
+                
+                for (k in 1:1000) {
+                	nk_1 = nk_2 = NULL;
+                    null.levels = NULL;
+                    cor = NULL;
+                    null_wave1 = null_wave2 = NULL;
+                    
+                    nk_1 <- sample(feature1, length(feature1), replace = FALSE);
+                    nk_2 <- sample(feature2, length(feature2), replace = FALSE);
+                    null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
+                    cor <- vector(length = null.levels);
+                    null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary);
+                    null_wave2 <- dwt(nk_2, wf = wf, short.levels, boundary = boundary);
+
+                    for(level in 1:null.levels){
+                    	null_level1 = null_level2 = NULL;
+                        null_level1 <- (null_wave1[[level]]);
+                        null_level2 <- (null_wave2[[level]]);
+                        cor[level] <- cor.test(null_level1, null_level2, method = method)\$estimate;
+                    }
+                    null = rbind(null, cor);
+                }
+                
+                null <- apply(null, 2, sort, na.last = TRUE);
+                print(paste(\"NAs\", length(which(is.na(null))), sep = \" \"));
+                cor_25 <- null[25,];
+                cor_975 <- null[975,];
+                med <- (apply(null, 2, median, na.rm = TRUE));
+
+				# plot
+                results <- cbind(tau.dwt, cor_25, cor_975);
+                matplot(results, type = \"b\", pch = \"*\" , lty = 1, col = c(1, 2, 2), ylim = c(-1, 1), xlab = \"Wavelet Scale\", ylab = \"Wavelet Correlation Kendall's Tau\", main = (paste(test, names.short[i], sep = \" \")), cex.main = 0.75);
+                abline(h = 0);
+
+                # get pvalues by comparison to null distribution
+ 			    ### modify pval calculation for error type II of T test ####
+                out <- (names.short[i]);
+                for (m in 1:length(tau.dwt)){
+                	print(paste(\"scale\", m, sep = \" \"));
+                    print(paste(\"tau\", tau.dwt[m], sep = \" \"));
+                    print(paste(\"med\", med[m], sep = \" \"));
+					out <- c(out, format(tau.dwt[m], digits = 3));	
+                    pv = NULL;
+                    if(is.na(tau.dwt[m])){
+                    	pv <- \"NA\"; 
+                    } 
+                    else {
+                    	if (tau.dwt[m] >= med[m]){
+                        	# R tail test
+                            print(paste(\"R\"));
+                            ### per sv ok to use inequality not strict
+                            pv <- (length(which(null[, m] >= tau.dwt[m])))/(length(na.exclude(null[, m])));
+                            if (tau.dwt[m] == med[m]){
+								print(\"tau == med\");
+                                print(summary(null[, m]));
+                            }
+                    	}
+                        else if (tau.dwt[m] < med[m]){
+                        	# L tail test
+                            print(paste(\"L\"));
+                            pv <- (length(which(null[, m] <= tau.dwt[m])))/(length(na.exclude(null[, m])));
+                        }
+					}
+					out <- c(out, pv);
+                    print(paste(\"pval\", pv, sep = \" \"));
+                }
+                final_pvalue <- rbind(final_pvalue, out);
+				print(out);
+        	}
+        }
+        colnames(final_pvalue) <- title;
+        write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE)
+        dev.off();
+	}\n";
+
+print Rcmd "
+	# execute
+	# read in data 
+		
+	inputData1 = inputData2 = NULL;
+	inputData.short1 = inputData.short2 = NULL;
+	inputDataNames.short1 = inputDataNames.short2 = NULL;
+		
+	inputData1 <- read.delim(\"$firstInputFile\");
+	inputData.short1 <- inputData1[, +c(1:ncol(inputData1))];
+	inputDataNames.short1 <- colnames(inputData.short1);
+		
+	inputData2 <- read.delim(\"$secondInputFile\");
+	inputData.short2 <- inputData2[, +c(1:ncol(inputData2))];
+	inputDataNames.short2 <- colnames(inputData.short2);
+	
+	# cor test for motif(a) in inputData1 vs motif(a) in inputData2
+	dwt_cor(inputData.short1, inputDataNames.short1, inputData.short2, inputDataNames.short2, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\");
+	print (\"done with the correlation test\");
+	
+	#eof\n";
+close Rcmd;
+
+system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n");
+system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n");
+system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n");
+
+#close the input and output and error files
+close(ERROR);
+close(OUTPUT2);
+close(OUTPUT1);
+close(INPUT2);
+close(INPUT1);
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/discreteWavelet/execute_dwt_cor_aVa_perClass.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,112 @@
+<tool id="compute_p-values_correlation_coefficients_feature_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Correlation Coefficients for Feature Occurrences" version="1.0.0">
+  <description>between two datasets using Discrete Wavelet Transfoms</description>
+  
+  <command interpreter="perl">
+  	execute_dwt_cor_aVa_perClass.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2
+  </command>
+
+  <inputs>
+  	<param format="tabular" name="inputFile1" type="data" label="Select the first input file"/>	
+  	<param format="tabular" name="inputFile2" type="data" label="Select the second input file"/>
+  </inputs>
+  
+  <outputs>
+    <data format="tabular" name="outputFile1"/> 
+    <data format="pdf" name="outputFile2"/>
+  </outputs>
+  	
+  <help> 
+
+.. class:: infomark
+
+**What it does**
+
+This program generates plots and computes table matrix of coefficient correlations and p-values at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique. 
+
+The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and  k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales.
+
+The program has two input files obtained as follows:
+
+For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program. 
+
+The program gives two output files:
+
+- The first output file is a TABULAR format file representing the coefficient correlations and p-values for each feature at each scale.
+- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the coefficient correlation for that feature at every scale.
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file. 
+
+-----
+
+**Example**
+
+Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file::
+
+	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget	
+		269			366			330			238				1129
+		239			328			327			283				1188
+		254			351			358			297				1151
+		262			371			355			256				1107
+		254			361			352			234				1192
+		265			354			367			240				1182
+		255			359			333			235				1217
+		271			389			387			272				1241
+		240			305			341			249				1159
+		272			351			337			257				1169
+		275			351			337			233				1158
+		305			331			361			253				1172
+		277			341			343			253				1113
+		266			362			355			267				1162
+		235			326			329			241				1230
+		254			335			360			251				1172
+
+And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file::
+
+	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
+		104			146			142			113				478
+		89			146			151			94				495
+		100			176			151			88				435
+		96			163			128			114				468
+		99			138			144			91				513
+		112			126			162			106				468
+		86			127			145			83				491
+		104			145			171			110				496
+		91			121			147			104				469
+		103			141			145			98				458
+		92			134			142			117				468
+		97			146			145			107				471
+		115			121			136			109				470
+		113			135			138			101				491
+		111			150			138			102				451
+		94			128			151			138				481
+
+  
+We notice that the number of scales here is 4 because 16 = 2^4. Running the program on the above input files gives the following output:
+
+The first output file::
+
+	motif				1_cor		1_pval		2_cor		2_pval		3_cor		3_pval		4_cor		4_pval
+	
+	deletionHoptspot		0.4		0.072		0.143		0.394		-0.667		0.244		1		0.491
+	insertionHoptspot		0.343		0.082		-0.0714		0.446		-1		0.12		1		0.502
+	dnaPolPauseFrameshift		0.617		0.004		-0.5		0.13		0.667		0.234		1		0.506
+	topoisomeraseCleavageSite	-0.183		0.242		-0.286		0.256		0.333		0.353		-1		0.489
+	translinTarget			0.0167		0.503		-0.0714		0.469		1		0.136		1		0.485
+
+The second output file:
+
+.. image:: ./static/operation_icons/dwt_cor_aVa_1.png
+.. image:: ./static/operation_icons/dwt_cor_aVa_2.png
+.. image:: ./static/operation_icons/dwt_cor_aVa_3.png
+.. image:: ./static/operation_icons/dwt_cor_aVa_4.png
+.. image:: ./static/operation_icons/dwt_cor_aVa_5.png
+
+  </help>  
+  
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/discreteWavelet/execute_dwt_cor_aVb_all.pl	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,223 @@
+#!/usr/bin/perl -w
+
+use warnings;
+use IO::Handle;
+
+$usage = "execute_dwt_cor_aVb_all.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out]  \n";
+die $usage unless @ARGV == 4;
+
+#get the input arguments
+my $firstInputFile = $ARGV[0];
+my $secondInputFile = $ARGV[1];
+my $firstOutputFile = $ARGV[2];
+my $secondOutputFile = $ARGV[3];
+
+open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n");
+open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n");
+open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
+open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
+open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
+
+#save all error messages into the error file $errorFile using the error file handle ERROR
+STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
+
+print "There are two input data files: \n";
+print "The input data file is: $firstInputFile \n";
+print "The control data file is: $secondInputFile \n";
+
+# IvC test
+$test = "cor_aVb_all";
+
+# construct an R script to implement the IvC test
+print "\n";
+
+$r_script = "get_dwt_cor_aVa_test.r"; 
+print "$r_script \n";
+
+
+# R script
+open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
+print Rcmd "
+	#################################################################################
+	# code to do all correlation tests of form: motif(a) vs. motif(b)
+	# add code to create null bands by permuting the original data series
+	# generate plots and table matrix of correlation coefficients including p-values
+	#################################################################################
+	library(\"Rwave\");
+	library(\"wavethresh\");
+	library(\"waveslim\");
+	
+	options(echo = FALSE)
+	
+	# normalize data
+	norm <- function(data){
+		v <- (data - mean(data))/sd(data);
+		if(sum(is.na(v)) >= 1){
+			v <- data;
+		}
+		return(v);
+	}
+	
+	dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
+		print(test);
+		print(pdf);
+		print(table);
+		
+		pdf(file = pdf);
+		final_pvalue = NULL;
+		title = NULL;
+		
+		short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels;
+		title <- c(\"motif1\", \"motif2\");
+		for (i in 1:short.levels){
+			title <- c(title, paste(i, \"cor\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"));
+		}
+		print(title);
+	
+		# normalize the raw data
+		data.short <- apply(data.short, 2, norm);
+		data.long <- apply(data.long, 2, norm);
+	
+		# loop to compare a vs b
+		for(i in 1:length(names.short)){
+			for(j in 1:length(names.long)){
+				if(i >= j){
+					next;
+				} 
+				else { 
+					# Kendall Tau
+					# DWT wavelet correlation function
+					# include significance to compare
+					wave1.dwt = wave2.dwt = NULL;
+					tau.dwt = NULL;
+					out = NULL;
+	
+					print(names.short[i]);
+					print(names.long[j]);
+					
+					# need exit if not comparing motif(a) vs motif(a)
+					if (names.short[i] == names.long[j]){
+						stop(paste(\"motif\", names.short[i], \"is the same as\", names.long[j], sep = \" \"));
+					}
+					else {
+						wave1.dwt <- dwt(data.short[, i], wf = wf, short.levels, boundary = boundary);
+						wave2.dwt <- dwt(data.long[, j], wf = wf, short.levels, boundary = boundary);
+						tau.dwt <-vector(length = short.levels)
+				   
+						# perform cor test on wavelet coefficients per scale 
+						for(level in 1:short.levels){
+							w1_level = w2_level = NULL;
+							w1_level <- (wave1.dwt[[level]]);
+							w2_level <- (wave2.dwt[[level]]);
+							tau.dwt[level] <- cor.test(w1_level, w2_level, method = method)\$estimate;
+						}
+						
+						# CI bands by permutation of time series
+						feature1 = feature2 = NULL;
+						feature1 = data.short[, i];
+						feature2 = data.long[, j];
+						null = results = med = NULL; 
+						cor_25 = cor_975 = NULL;
+						
+						for (k in 1:1000) {
+							nk_1 = nk_2 = NULL;
+							null.levels = NULL;
+							cor = NULL;
+							null_wave1 = null_wave2 = NULL;
+							
+							nk_1 <- sample(feature1, length(feature1), replace = FALSE);
+							nk_2 <- sample(feature2, length(feature2), replace = FALSE);
+							null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
+							cor <- vector(length = null.levels);
+							null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary);
+							null_wave2 <- dwt(nk_2, wf = wf, short.levels, boundary = boundary);
+
+							for(level in 1:null.levels){
+								null_level1 = null_level2 = NULL;
+								null_level1 <- (null_wave1[[level]]);
+								null_level2 <- (null_wave2[[level]]);
+								cor[level] <- cor.test(null_level1, null_level2, method = method)\$estimate;
+							}
+							null = rbind(null, cor);
+						}
+							
+						null <- apply(null, 2, sort, na.last = TRUE);
+						cor_25 <- null[25, ];
+						cor_975 <- null[975, ];
+						med <- (apply(null, 2, median, na.rm = TRUE));
+
+						# plot
+						results <- cbind(tau.dwt, cor_25, cor_975);
+						matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), ylim = c(-1, 1), xlab = \"Wavelet Scale\", ylab = \"Wavelet Correlation Kendall's Tau\", main = (paste(test, names.short[i], \"vs.\", names.long[j], sep = \" \")), cex.main = 0.75);
+						abline(h = 0);
+	
+						# get pvalues by comparison to null distribution
+						### modify pval calculation for error type II of T test ####
+						out <- c(names.short[i],names.long[j]);
+						for (m in 1:length(tau.dwt)){
+							print(m);
+							print(tau.dwt[m]);
+							out <- c(out, format(tau.dwt[m], digits = 3));	
+							pv = NULL;
+							if(is.na(tau.dwt[m])){
+								pv <- \"NA\"; 
+							} 
+							else{
+								if (tau.dwt[m] >= med[m]){
+									# R tail test
+									pv <- (length(which(null[, m] >= tau.dwt[m])))/(length(na.exclude(null[, m])));
+								}
+								else{
+									if (tau.dwt[m] < med[m]){
+										# L tail test
+										pv <- (length(which(null[, m] <= tau.dwt[m])))/(length(na.exclude(null[, m])));
+									}
+								}
+							}
+							out <- c(out, pv);
+							print(pv);
+						}
+						final_pvalue <-rbind(final_pvalue, out);
+						print(out);
+					}
+				}
+			}
+		}
+		colnames(final_pvalue) <- title;
+		write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE)
+		dev.off();
+	}\n";
+	
+print Rcmd "
+	# execute
+	# read in data 
+
+	inputData1 = inputData2 = NULL;
+	inputData.short1 = inputData.short2 = NULL;
+	inputDataNames.short1 = inputDataNames.short2 = NULL;
+	
+	inputData1 <- read.delim(\"$firstInputFile\");
+	inputData.short1 <- inputData1[, +c(1:ncol(inputData1))];
+	inputDataNames.short1 <- colnames(inputData.short1);
+		
+	inputData2 <- read.delim(\"$secondInputFile\");
+	inputData.short2 <- inputData2[, +c(1:ncol(inputData2))];
+	inputDataNames.short2 <- colnames(inputData.short2);
+	
+	# cor test for motif(a) in inputData1 vs motif(b) in inputData2
+	dwt_cor(inputData.short1, inputDataNames.short1, inputData.short2, inputDataNames.short2, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\");
+	print (\"done with the correlation test\");
+
+	#eof\n";
+close Rcmd;
+
+system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n");
+system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n");
+system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n");
+
+#close the input and output and error files
+close(ERROR);
+close(OUTPUT2);
+close(OUTPUT1);
+close(INPUT2);
+close(INPUT1);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/discreteWavelet/execute_dwt_cor_aVb_all.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,123 @@
+<tool id="compute_p-values_correlation_coefficients_featureA_featureB_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Correlation Coefficients for Occurrences of Two Set of Features" version="1.0.0">
+  <description>between two datasets using Discrete Wavelet Transfoms</description>
+  
+  <command interpreter="perl">
+  	execute_dwt_cor_aVb_all.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2
+  </command>
+
+  <inputs>
+  	<param format="tabular" name="inputFile1" type="data" label="Select the first input file"/>	
+  	<param format="tabular" name="inputFile2" type="data" label="Select the second input file"/>
+  </inputs>
+  
+  <outputs>
+    <data format="tabular" name="outputFile1"/> 
+    <data format="pdf" name="outputFile2"/>
+  </outputs>
+  	
+  <help> 
+
+.. class:: infomark
+
+**What it does**
+
+This program generates plots and computes table matrix of coefficient correlations and p-values at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique. 
+
+The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and  k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales.
+
+The program has two input files obtained as follows:
+
+For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program. 
+
+The program gives two output files:
+
+- The first output file is a TABULAR format file representing the coefficient correlations and p-values for each feature at each scale.
+- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the coefficient correlations for that feature at every scale.
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file. 
+
+-----
+
+**Example**
+
+Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file::
+
+	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget	
+		82			162			158			79				459
+		111			196			154			75				459
+		98			178			160			79				475
+		113			201			170			113				436
+		113			173			147			95				446
+		107			150			155			84				436
+		106			166			175			96				448
+		113			176			135			106				514
+		113			170			152			87				450
+		95			152			167			93				467
+		91			171			169			118				426
+		84			139			160			100				459
+		92			154			164			104				440
+		100			145			154			98				472
+		91			161			152			71				461
+		117			164			139			97				463
+
+And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file::
+
+	deletionHoptspot	insertionHoptspot	dnaPolPauseFrameshift	topoisomeraseCleavageSite	translinTarget
+		269			366			330			238				1129
+		239			328			327			283				1188
+		254			351			358			297				1151
+		262			371			355			256				1107
+		254			361			352			234				1192
+		265			354			367			240				1182
+		255			359			333			235				1217
+		271			389			387			272				1241
+		240			305			341			249				1159
+		272			351			337			257				1169
+		275			351			337			233				1158
+		305			331			361			253				1172
+		277			341			343			253				1113
+		266			362			355			267				1162
+		235			326			329			241				1230
+		254			335			360			251				1172
+
+  
+We notice that the number of scales here is 4 because 16 = 2^4. Running the program on the above input files gives the following output:
+
+The first output file::
+
+	motif1				motif2				1_cor		1_pval		2_cor		2_pval		3_cor		3_pval		4_cor		4_pval
+	
+	deletionHoptspot		insertionHoptspot		-0.1		0.346		-0.214		0.338		1		0.127		1		0.467
+	deletionHoptspot		dnaPolPauseFrameshift		0.167		0.267		-0.214		0.334		1		0.122		1		0.511
+	deletionHoptspot		topoisomeraseCleavageSite	0.167		0.277		0.143		0.412		-0.667		0.243		1		0.521
+	deletionHoptspot		translinTarget			0		0.505		0.0714		0.441		1		0.124		1		0.518
+	insertionHoptspot		dnaPolPauseFrameshift		-0.202		0.238		0.143		0.379		-1		0.122		1		0.517
+	insertionHoptspot		topoisomeraseCleavageSite	-0.0336		0.457		0.214		0.29		0.667		0.252		1		0.503
+	insertionHoptspot		translinTarget			0.0672		0.389		0.429		0.186		-1		0.119		1		0.506
+	dnaPolPauseFrameshift		topoisomeraseCleavageSite	-0.353		0.101		0.357		0.228		0		0.612		-1		0.49
+	dnaPolPauseFrameshift		translinTarget			-0.151		0.303		-0.571		0.09		-0.333		0.37		-1		1
+	topoisomeraseCleavageSite	translinTarget			-0.37		0.077		-0.222		0.297		0.667		0.234		-1		0.471
+
+The second output file:
+
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_1.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_2.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_3.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_4.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_5.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_6.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_7.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_8.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_9.png
+.. image:: ./static/operation_icons/dwt_cor_aVb_all_10.png
+
+
+  </help>  
+  
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/discreteWavelet/execute_dwt_var_perClass.pl	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,320 @@
+#!/usr/bin/perl -w
+
+use warnings;
+use IO::Handle;
+use POSIX qw(floor ceil);
+
+# example: perl execute_dwt_var_perClass.pl hg18_NCNR_10bp_3flanks_deletionHotspot_data_del.txt deletionHotspot 3flanks del
+
+$usage = "execute_dwt_var_perClass.pl [TABULAR.in] [TABULAR.out] [TABULAR.out] [PDF.out] \n";
+die $usage unless @ARGV == 4;
+
+#get the input arguments
+my $inputFile = $ARGV[0];
+my $firstOutputFile = $ARGV[1];
+my $secondOutputFile = $ARGV[2];
+my $thirdOutputFile = $ARGV[3];
+
+open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n");
+open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n");
+open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n");
+open (OUTPUT3, ">", $thirdOutputFile) || die("Could not open file $thirdOutputFile \n");
+open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
+
+#save all error messages into the error file $errorFile using the error file handle ERROR
+STDERR -> fdopen( \*ERROR,  "w" ) or die ("Could not direct errors to the error file error.txt \n");
+
+# choosing meaningful names for the output files
+$max_dwt = $firstOutputFile; 
+$pvalue = $secondOutputFile; 
+$pdf = $thirdOutputFile; 
+
+# count the number of columns in the input file
+while($buffer = <INPUT>){
+	#if ($buffer =~ m/interval/){
+		chomp($buffer);
+		$buffer =~ s/^#\s*//;
+		@contrl = split(/\t/, $buffer);
+		last;
+	#}
+}
+print "The number of columns in the input file is: " . (@contrl) . "\n";
+print "\n";
+
+# count the number of motifs in the input file
+$count = 0;
+for ($i = 0; $i < @contrl; $i++){
+	$count++;
+	print "# $contrl[$i]\n";
+}
+print "The number of motifs in the input file is:  $count \n";
+
+# check if the number of motifs is not a multiple of 12, and round up is so
+$count2 = ($count/12);
+if ($count2 =~ m/(\D)/){
+	print "the number of motifs is not a multiple of 12 \n";
+	$count2 = ceil($count2);
+}
+else {
+	print "the number of motifs is a multiple of 12 \n";
+}
+print "There will be $count2 subfiles\n\n";
+
+# split infile into subfiles only 12 motif per file for R plotting
+for ($x = 1; $x <= $count2; $x++){
+	$a = (($x - 1) * 12 + 1);
+	$b = $x * 12;
+	
+	if ($x < $count2){
+		print "# data.short $x <- data_test[, +c($a:$b)]; \n"; 
+	}
+	else{
+		print "# data.short $x <- data_test[, +c($a:ncol(data_test)]; \n";
+	}
+}
+
+print "\n";
+print "There are 4 output files: \n";
+print "The first output file is a pdf file\n";
+print "The second output file is a max_dwt file\n";
+print "The third output file is a pvalues file\n";
+print "The fourth output file is a test_final_pvalues file\n";
+
+# write R script
+$r_script = "get_dwt_varPermut_getMax.r"; 
+print "The R file name is: $r_script \n";
+
+open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
+
+print Rcmd "
+	######################################################################
+	# plot power spectra, i.e. wavelet variance by class
+	# add code to create null bands by permuting the original data series
+	# get class of maximum significant variance per feature
+	# generate plots and table matrix of variance including p-values
+	######################################################################
+	library(\"Rwave\");
+	library(\"wavethresh\");
+	library(\"waveslim\");
+
+	options(echo = FALSE)
+
+	# normalize data
+	norm <- function(data){
+		v <- (data-mean(data))/sd(data);
+    	if(sum(is.na(v)) >= 1){
+    		v<-data;
+    	}
+    	return(v);
+	}
+
+	dwt_var_permut_getMax <- function(data, names, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
+		max_var = NULL;
+    	matrix = NULL;
+		title = NULL;
+    	final_pvalue = NULL;
+		short.levels = NULL;
+		scale = NULL;
+	
+    	print(names);
+    	
+   	 	par(mfcol = c(length(names), length(names)), mar = c(0, 0, 0, 0), oma = c(4, 3, 3, 2), xaxt = \"s\", cex = 1, las = 1);
+   	 	
+    	short.levels <- wd(data[, 1], filter.number = filter, bc = bc)\$nlevels;
+    	
+    	title <- c(\"motif\");
+    	for (i in 1:short.levels){
+    		title <- c(title, paste(i, \"var\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"), paste(i, \"test\", sep = \"_\"));
+    	}
+    	print(title);
+        
+		# normalize the raw data
+    	data<-apply(data,2,norm);
+
+    	for(i in 1:length(names)){
+    		for(j in 1:length(names)){
+				temp = NULL;
+				results = NULL;
+				wave1.dwt = NULL;
+				out = NULL;
+				
+				out <- vector(length = length(title));
+            	temp <- vector(length = short.levels);
+            	
+            	if(i < j) {
+            		plot(temp, type = \"n\", axes = FALSE, xlab = NA, ylab = NA);
+                	box(col = \"grey\"); 
+                	grid(ny = 0, nx = NULL);
+            	} else {
+            		if (i > j){
+                		plot(temp, type = \"n\", axes = FALSE, xlab = NA, ylab = NA);
+                    	box(col = \"grey\"); 
+                    	grid(ny = 0, nx = NULL);
+                 	} else {
+                 	
+                 		wave1.dwt <- dwt(data[, i], wf = wf, short.levels, boundary = boundary); 
+                		
+                		temp_row = (short.levels + 1 ) * -1;
+                		temp_col = 1;
+                    	temp <- wave.variance(wave1.dwt)[temp_row, temp_col];
+
+                    	#permutations code :
+                    	feature1 = NULL;
+						null = NULL;
+						var_25 = NULL;
+						var_975 = NULL;
+						med = NULL;
+
+                    	feature1 = data[, i];
+                    	for (k in 1:1000) {
+							nk_1 = NULL;
+							null.levels = NULL;
+							var = NULL;
+							null_wave1 = NULL;
+
+                        	nk_1 = sample(feature1, length(feature1), replace = FALSE);
+                        	null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels;
+                        	var <- vector(length = length(null.levels));
+                        	null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary);
+                        	var<- wave.variance(null_wave1)[-8, 1];
+                        	null= rbind(null, var);
+                    	}
+                    	null <- apply(null, 2, sort, na.last = TRUE);
+                    	var_25 <- null[25, ];
+                    	var_975 <- null[975, ];
+                    	med <- (apply(null, 2, median, na.rm = TRUE));
+
+                    	# plot
+                    	results <- cbind(temp, var_25, var_975);
+                    	matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), axes = F);
+
+                    	# get pvalues by comparison to null distribution
+                    	out <- (names[i]);
+                    	for (m in 1:length(temp)){
+                    		print(paste(\"scale\", m, sep = \" \"));
+                        	print(paste(\"var\", temp[m], sep = \" \"));
+                        	print(paste(\"med\", med[m], sep = \" \"));
+                        	pv = tail = NULL;
+							out <- c(out, format(temp[m], digits = 3));	
+                        	if (temp[m] >= med[m]){
+                        		# R tail test
+                            	print(\"R\");
+	                        	tail <- \"R\";
+                            	pv <- (length(which(null[, m] >= temp[m])))/(length(na.exclude(null[, m])));
+
+                        	} else {
+                        		if (temp[m] < med[m]){
+                                	# L tail test
+                                	print(\"L\");
+	                            	tail <- \"L\";
+                                	pv <- (length(which(null[, m] <= temp[m])))/(length(na.exclude(null[, m])));
+                        		}
+							}
+							out <- c(out, pv);
+							print(pv);
+							out <- c(out, tail);
+                    	}
+                    	final_pvalue <-rbind(final_pvalue, out);
+                 	
+                 
+                    	# get variances outside null bands by comparing temp to null
+                    	## temp stores variance for each scale, and null stores permuted variances for null bands
+                    	for (n in 1:length(temp)){
+                    		if (temp[n] <= var_975[n]){
+                        		temp[n] <- NA;
+                        	} else {
+                        		temp[n] <- temp[n];
+                        	}
+                    	}
+                    	matrix <- rbind(matrix, temp)
+            		}
+            	}
+	        	# labels
+	        	if (i == 1){
+	        		mtext(names[j], side = 2, line = 0.5, las = 3, cex = 0.25);
+	        	}
+	        	if (j == 1){
+	        		mtext(names[i], side = 3, line = 0.5, cex = 0.25);
+	        	}
+	        	if (j == length(names)){
+	        		axis(1, at = (1:short.levels), las = 3, cex.axis = 0.5);
+	        	}
+    		}
+    	}
+		colnames(final_pvalue) <- title;
+    	#write.table(final_pvalue, file = \"test_final_pvalue.txt\", sep = \"\\t\", quote = FALSE, row.names = FALSE, append = TRUE);
+
+		# get maximum variance larger than expectation by comparison to null bands
+    	varnames <- vector();
+    	for(i in 1:length(names)){
+    		name1 = paste(names[i], \"var\", sep = \"_\")
+        	varnames <- c(varnames, name1)
+    	}
+   		rownames(matrix) <- varnames;
+    	colnames(matrix) <- (1:short.levels);
+    	max_var <- names;
+    	scale <- vector(length = length(names));
+    	for (x in 1:nrow(matrix)){
+        	if (length(which.max(matrix[x, ])) == 0){
+            	scale[x] <- NA;
+        	}
+        	else{
+        		scale[x] <- colnames(matrix)[which.max(matrix[x, ])];
+        	}
+    	}
+    	max_var <- cbind(max_var, scale);
+    	write.table(max_var, file = \"$max_dwt\", sep = \"\\t\", quote = FALSE, row.names = FALSE, append = TRUE);
+    	return(final_pvalue);
+	}\n";
+
+print Rcmd "
+	# execute
+	# read in data 
+