Mercurial > repos > xuebing > sharplabtool
changeset 3:33c067c3ae34
Deleted selected files
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:47:53 -0500 |
parents | c2a356708570 |
children | 3b1e54dc14d4 |
files | fimo2.xml mytools.zip tools/.DS_Store tools/._.DS_Store tools/._mytools tools/._tool_conf.xml tools/annotation_profiler/annotation_profiler.xml tools/annotation_profiler/annotation_profiler_for_interval.py tools/bedtools/._bedToBam.xml tools/bedtools/bedToBam.xml tools/data_destination/epigraph.xml tools/data_destination/epigraph_test.xml tools/data_source/access_libraries.xml tools/data_source/bed_convert.xml tools/data_source/biomart.xml tools/data_source/biomart_test.xml tools/data_source/bx_browser.xml tools/data_source/cbi_rice_mart.xml tools/data_source/data_source.py tools/data_source/echo.py tools/data_source/echo.xml tools/data_source/encode_db.xml tools/data_source/epigraph_import.xml tools/data_source/epigraph_import_test.xml tools/data_source/eupathdb.xml tools/data_source/fetch.py tools/data_source/fly_modencode.xml tools/data_source/flymine.xml tools/data_source/flymine_test.xml tools/data_source/genbank.py tools/data_source/genbank.xml tools/data_source/gramene_mart.xml tools/data_source/hapmapmart.xml tools/data_source/hbvar.xml tools/data_source/hbvar_filter.py tools/data_source/import.py tools/data_source/import.xml tools/data_source/metabolicmine.xml tools/data_source/microbial_import.py tools/data_source/microbial_import.xml tools/data_source/microbial_import_code.py tools/data_source/modmine.xml tools/data_source/ratmine.xml tools/data_source/ucsc_archaea.xml tools/data_source/ucsc_filter.py tools/data_source/ucsc_proxy.py tools/data_source/ucsc_proxy.xml tools/data_source/ucsc_tablebrowser.xml tools/data_source/ucsc_tablebrowser_archaea.xml tools/data_source/ucsc_tablebrowser_test.xml tools/data_source/ucsc_testproxy.xml tools/data_source/upload.py tools/data_source/upload.xml tools/data_source/worm_modencode.xml tools/data_source/wormbase.xml tools/data_source/wormbase_test.xml tools/data_source/yeastmine.xml tools/discreteWavelet/execute_dwt_IvC_all.pl tools/discreteWavelet/execute_dwt_IvC_all.xml tools/discreteWavelet/execute_dwt_cor_aVa_perClass.pl tools/discreteWavelet/execute_dwt_cor_aVa_perClass.xml tools/discreteWavelet/execute_dwt_cor_aVb_all.pl tools/discreteWavelet/execute_dwt_cor_aVb_all.xml tools/discreteWavelet/execute_dwt_var_perClass.pl tools/discreteWavelet/execute_dwt_var_perClass.xml tools/discreteWavelet/execute_dwt_var_perFeature.pl tools/discreteWavelet/execute_dwt_var_perFeature.xml tools/emboss_5/emboss_antigenic.xml tools/emboss_5/emboss_backtranseq.xml tools/emboss_5/emboss_banana.pl tools/emboss_5/emboss_banana.xml tools/emboss_5/emboss_biosed.xml tools/emboss_5/emboss_btwisted.xml tools/emboss_5/emboss_cai.xml tools/emboss_5/emboss_cai_custom.xml tools/emboss_5/emboss_chaos.xml tools/emboss_5/emboss_charge.xml tools/emboss_5/emboss_checktrans.xml tools/emboss_5/emboss_chips.xml tools/emboss_5/emboss_cirdna.xml tools/emboss_5/emboss_codcmp.xml tools/emboss_5/emboss_coderet.xml tools/emboss_5/emboss_compseq.xml tools/emboss_5/emboss_cpgplot.xml tools/emboss_5/emboss_cpgplot_wrapper.pl tools/emboss_5/emboss_cpgreport.xml tools/emboss_5/emboss_cusp.xml tools/emboss_5/emboss_cutseq.xml tools/emboss_5/emboss_dan.xml tools/emboss_5/emboss_degapseq.xml tools/emboss_5/emboss_descseq.xml tools/emboss_5/emboss_diffseq.xml tools/emboss_5/emboss_digest.xml tools/emboss_5/emboss_dotmatcher.xml tools/emboss_5/emboss_dotpath.xml tools/emboss_5/emboss_dottup.xml tools/emboss_5/emboss_dreg.xml tools/emboss_5/emboss_einverted.xml tools/emboss_5/emboss_epestfind.xml tools/emboss_5/emboss_equicktandem.xml tools/emboss_5/emboss_est2genome.xml tools/emboss_5/emboss_etandem.xml tools/emboss_5/emboss_extractfeat.xml tools/emboss_5/emboss_extractseq.xml tools/emboss_5/emboss_format_corrector.py tools/emboss_5/emboss_freak.xml tools/emboss_5/emboss_fuzznuc.xml tools/emboss_5/emboss_fuzzpro.xml tools/emboss_5/emboss_fuzztran.xml tools/emboss_5/emboss_garnier.xml tools/emboss_5/emboss_geecee.xml tools/emboss_5/emboss_getorf.xml tools/emboss_5/emboss_helixturnhelix.xml tools/emboss_5/emboss_hmoment.xml tools/emboss_5/emboss_iep.xml tools/emboss_5/emboss_infoseq.xml tools/emboss_5/emboss_infoseq_wrapper.pl tools/emboss_5/emboss_isochore.xml tools/emboss_5/emboss_lindna.xml tools/emboss_5/emboss_marscan.xml tools/emboss_5/emboss_maskfeat.xml tools/emboss_5/emboss_maskseq.xml tools/emboss_5/emboss_matcher.xml tools/emboss_5/emboss_megamerger.xml tools/emboss_5/emboss_merger.xml tools/emboss_5/emboss_msbar.xml tools/emboss_5/emboss_multiple_outputfile_wrapper.pl tools/emboss_5/emboss_needle.xml tools/emboss_5/emboss_newcpgreport.xml tools/emboss_5/emboss_newcpgseek.xml tools/emboss_5/emboss_newseq.xml tools/emboss_5/emboss_noreturn.xml tools/emboss_5/emboss_notseq.xml tools/emboss_5/emboss_nthseq.xml tools/emboss_5/emboss_octanol.xml tools/emboss_5/emboss_oddcomp.xml tools/emboss_5/emboss_palindrome.xml tools/emboss_5/emboss_pasteseq.xml tools/emboss_5/emboss_patmatdb.xml tools/emboss_5/emboss_pepcoil.xml tools/emboss_5/emboss_pepinfo.xml tools/emboss_5/emboss_pepnet.xml tools/emboss_5/emboss_pepstats.xml tools/emboss_5/emboss_pepwheel.xml tools/emboss_5/emboss_pepwindow.xml tools/emboss_5/emboss_pepwindowall.xml tools/emboss_5/emboss_plotcon.xml tools/emboss_5/emboss_plotorf.xml tools/emboss_5/emboss_polydot.xml tools/emboss_5/emboss_preg.xml tools/emboss_5/emboss_prettyplot.xml tools/emboss_5/emboss_prettyseq.xml tools/emboss_5/emboss_primersearch.xml tools/emboss_5/emboss_revseq.xml tools/emboss_5/emboss_seqmatchall.xml tools/emboss_5/emboss_seqret.xml tools/emboss_5/emboss_showfeat.xml tools/emboss_5/emboss_shuffleseq.xml tools/emboss_5/emboss_sigcleave.xml tools/emboss_5/emboss_single_outputfile_wrapper.pl tools/emboss_5/emboss_sirna.xml tools/emboss_5/emboss_sixpack.xml tools/emboss_5/emboss_skipseq.xml tools/emboss_5/emboss_splitter.xml tools/emboss_5/emboss_supermatcher.xml tools/emboss_5/emboss_syco.xml tools/emboss_5/emboss_tcode.xml tools/emboss_5/emboss_textsearch.xml tools/emboss_5/emboss_tmap.xml tools/emboss_5/emboss_tranalign.xml tools/emboss_5/emboss_transeq.xml tools/emboss_5/emboss_trimest.xml tools/emboss_5/emboss_trimseq.xml tools/emboss_5/emboss_twofeat.xml tools/emboss_5/emboss_union.xml tools/emboss_5/emboss_vectorstrip.xml tools/emboss_5/emboss_water.xml tools/emboss_5/emboss_wobble.xml tools/emboss_5/emboss_wordcount.xml tools/emboss_5/emboss_wordmatch.xml tools/encode/gencode_partition.xml tools/encode/random_intervals.xml tools/encode/random_intervals_no_bits.py tools/encode/split_by_partitions.py tools/evolution/add_scores.xml tools/evolution/codingSnps.pl tools/evolution/codingSnps.xml tools/evolution/codingSnps_filter.py tools/evolution/mutate_snp_codon.py tools/evolution/mutate_snp_codon.xml tools/extract/extract_genomic_dna.py tools/extract/extract_genomic_dna.xml tools/extract/liftOver_wrapper.py tools/extract/liftOver_wrapper.xml tools/extract/phastOdds/get_scores_galaxy.py tools/extract/phastOdds/phastOdds_tool.xml tools/fasta_tools/fasta_compute_length.py tools/fasta_tools/fasta_compute_length.xml tools/fasta_tools/fasta_concatenate_by_species.py tools/fasta_tools/fasta_concatenate_by_species.xml tools/fasta_tools/fasta_filter_by_length.py tools/fasta_tools/fasta_filter_by_length.xml tools/fasta_tools/fasta_to_tabular.py tools/fasta_tools/fasta_to_tabular.xml tools/fasta_tools/tabular_to_fasta.py tools/fasta_tools/tabular_to_fasta.xml tools/fastq/fastq_combiner.py tools/fastq/fastq_combiner.xml tools/fastq/fastq_filter.py tools/fastq/fastq_filter.xml tools/fastq/fastq_groomer.py tools/fastq/fastq_groomer.xml tools/fastq/fastq_manipulation.py tools/fastq/fastq_manipulation.xml tools/fastq/fastq_masker_by_quality.py tools/fastq/fastq_masker_by_quality.xml tools/fastq/fastq_paired_end_deinterlacer.py tools/fastq/fastq_paired_end_deinterlacer.xml tools/fastq/fastq_paired_end_interlacer.py tools/fastq/fastq_paired_end_interlacer.xml tools/fastq/fastq_paired_end_joiner.py tools/fastq/fastq_paired_end_joiner.xml tools/fastq/fastq_paired_end_splitter.py tools/fastq/fastq_paired_end_splitter.xml tools/fastq/fastq_stats.py tools/fastq/fastq_stats.xml tools/fastq/fastq_to_fasta.py tools/fastq/fastq_to_fasta.xml tools/fastq/fastq_to_tabular.py tools/fastq/fastq_to_tabular.xml tools/fastq/fastq_trimmer.py tools/fastq/fastq_trimmer.xml tools/fastq/fastq_trimmer_by_quality.py tools/fastq/fastq_trimmer_by_quality.xml tools/fastq/tabular_to_fastq.py tools/fastq/tabular_to_fastq.xml tools/fastx_toolkit/fasta_clipping_histogram.xml tools/fastx_toolkit/fasta_formatter.xml tools/fastx_toolkit/fasta_nucleotide_changer.xml tools/fastx_toolkit/fastq_quality_boxplot.xml tools/fastx_toolkit/fastq_quality_converter.xml tools/fastx_toolkit/fastq_quality_filter.xml tools/fastx_toolkit/fastq_to_fasta.xml tools/fastx_toolkit/fastx_artifacts_filter.xml tools/fastx_toolkit/fastx_barcode_splitter.xml tools/fastx_toolkit/fastx_barcode_splitter_galaxy_wrapper.sh tools/fastx_toolkit/fastx_clipper.xml tools/fastx_toolkit/fastx_collapser.xml tools/fastx_toolkit/fastx_nucleotides_distribution.xml tools/fastx_toolkit/fastx_quality_statistics.xml tools/fastx_toolkit/fastx_renamer.xml tools/fastx_toolkit/fastx_reverse_complement.xml tools/fastx_toolkit/fastx_trimmer.xml tools/filters/CreateInterval.pl tools/filters/CreateInterval.xml tools/filters/axt_to_concat_fasta.py tools/filters/axt_to_concat_fasta.xml tools/filters/axt_to_fasta.py tools/filters/axt_to_fasta.xml tools/filters/axt_to_lav.py tools/filters/axt_to_lav.xml tools/filters/axt_to_lav_code.py tools/filters/bed2gff.xml tools/filters/bed_to_bigbed.xml tools/filters/bed_to_gff_converter.py tools/filters/catWrapper.py tools/filters/catWrapper.xml tools/filters/changeCase.pl tools/filters/changeCase.xml tools/filters/commWrapper.pl tools/filters/commWrapper.xml tools/filters/compare.xml tools/filters/condense_characters.pl tools/filters/condense_characters.xml tools/filters/convert_characters.pl tools/filters/convert_characters.py tools/filters/convert_characters.xml tools/filters/cutWrapper.pl tools/filters/cutWrapper.xml tools/filters/fileGrep.xml tools/filters/fixedValueColumn.pl tools/filters/fixedValueColumn.xml tools/filters/gff/extract_GFF_Features.py tools/filters/gff/extract_GFF_Features.xml tools/filters/gff/gff_filter_by_attribute.py tools/filters/gff/gff_filter_by_attribute.xml tools/filters/gff/gff_filter_by_feature_count.py tools/filters/gff/gff_filter_by_feature_count.xml tools/filters/gff/gtf_filter_by_attribute_values_list.py tools/filters/gff/gtf_filter_by_attribute_values_list.xml tools/filters/gff2bed.xml tools/filters/gff_to_bed_converter.py tools/filters/grep.py tools/filters/grep.xml tools/filters/gtf2bedgraph.xml tools/filters/gtf_to_bedgraph_converter.py tools/filters/headWrapper.pl tools/filters/headWrapper.xml tools/filters/join.py tools/filters/joinWrapper.pl tools/filters/joinWrapper.py tools/filters/joiner.xml tools/filters/joiner2.xml tools/filters/lav_to_bed.py tools/filters/lav_to_bed.xml tools/filters/lav_to_bed_code.py tools/filters/mergeCols.py tools/filters/mergeCols.xml tools/filters/pasteWrapper.pl tools/filters/pasteWrapper.xml tools/filters/randomlines.py tools/filters/randomlines.xml tools/filters/remove_beginning.pl tools/filters/remove_beginning.xml tools/filters/sff_extract.py tools/filters/sff_extractor.xml tools/filters/sorter.py tools/filters/sorter.xml tools/filters/tailWrapper.pl tools/filters/tailWrapper.xml tools/filters/trimmer.py tools/filters/trimmer.xml tools/filters/ucsc_gene_bed_to_exon_bed.py tools/filters/ucsc_gene_bed_to_exon_bed.xml tools/filters/ucsc_gene_bed_to_intron_bed.py tools/filters/ucsc_gene_bed_to_intron_bed.xml tools/filters/ucsc_gene_table_to_intervals.py tools/filters/ucsc_gene_table_to_intervals.xml tools/filters/uniq.py tools/filters/uniq.xml tools/filters/wc_gnu.xml tools/filters/wig_to_bigwig.xml tools/filters/wiggle_to_simple.py tools/filters/wiggle_to_simple.xml tools/galaxy-loc.tar.gz tools/gatk/analyze_covariates.xml tools/gatk/count_covariates.xml tools/gatk/gatk_wrapper.py tools/gatk/indel_realigner.xml tools/gatk/realigner_target_creator.xml tools/gatk/table_recalibration.xml tools/gatk/unified_genotyper.xml tools/genetrack/genetrack_indexer.py tools/genetrack/genetrack_indexer.xml tools/genetrack/genetrack_peak_prediction.py tools/genetrack/genetrack_peak_prediction.xml tools/genome_diversity/cdblib.py tools/genome_diversity/extract_flanking_dna.py tools/genome_diversity/extract_flanking_dna.xml tools/genome_diversity/extract_primers.py tools/genome_diversity/extract_primers.xml tools/genome_diversity/genome_diversity.py tools/genome_diversity/select_restriction_enzymes.py tools/genome_diversity/select_restriction_enzymes.xml tools/genome_diversity/select_snps.py tools/genome_diversity/select_snps.xml tools/human_genome_variation/BEAM2_wrapper.sh tools/human_genome_variation/beam.xml tools/human_genome_variation/ctd.pl tools/human_genome_variation/ctd.xml tools/human_genome_variation/disease_ontology_gene_fuzzy_selector.pl tools/human_genome_variation/freebayes.xml tools/human_genome_variation/funDo.xml tools/human_genome_variation/gpass.pl tools/human_genome_variation/gpass.xml tools/human_genome_variation/hilbertvis.sh tools/human_genome_variation/hilbertvis.xml tools/human_genome_variation/ldtools.xml tools/human_genome_variation/ldtools_wrapper.sh tools/human_genome_variation/linkToDavid.pl tools/human_genome_variation/linkToDavid.xml tools/human_genome_variation/linkToGProfile.pl tools/human_genome_variation/linkToGProfile.xml tools/human_genome_variation/lped_to_geno.pl tools/human_genome_variation/lps.xml tools/human_genome_variation/lps_tool_wrapper.sh tools/human_genome_variation/mergeSnps.pl tools/human_genome_variation/pagetag.py tools/human_genome_variation/pass.xml tools/human_genome_variation/pass_wrapper.sh tools/human_genome_variation/senatag.py tools/human_genome_variation/sift.xml tools/human_genome_variation/sift_variants_wrapper.sh tools/human_genome_variation/snpFreq.xml tools/human_genome_variation/snpFreq2.pl tools/hyphy/hyphy_branch_lengths_wrapper.py tools/hyphy/hyphy_branch_lengths_wrapper.xml tools/hyphy/hyphy_dnds_wrapper.py tools/hyphy/hyphy_dnds_wrapper.xml tools/hyphy/hyphy_nj_tree_wrapper.py tools/hyphy/hyphy_nj_tree_wrapper.xml tools/ilmn_pacbio/abyss.xml tools/ilmn_pacbio/assembly_stats.py tools/ilmn_pacbio/assembly_stats.xml tools/ilmn_pacbio/cov_model.py tools/ilmn_pacbio/quake.py tools/ilmn_pacbio/quake.xml tools/ilmn_pacbio/quake_pe.xml tools/ilmn_pacbio/quake_wrapper.py tools/ilmn_pacbio/smrtpipe.py tools/ilmn_pacbio/smrtpipe_filter.xml tools/ilmn_pacbio/smrtpipe_galaxy.py tools/ilmn_pacbio/smrtpipe_hybrid.xml tools/ilmn_pacbio/soap_denovo.xml tools/indels/indel_analysis.py tools/indels/indel_analysis.xml tools/indels/indel_sam2interval.py tools/indels/indel_sam2interval.xml tools/indels/indel_table.py tools/indels/indel_table.xml tools/indels/sam_indel_filter.py tools/indels/sam_indel_filter.xml tools/maf/genebed_maf_to_fasta.xml tools/maf/interval2maf.py tools/maf/interval2maf.xml tools/maf/interval2maf_pairwise.xml tools/maf/interval_maf_to_merged_fasta.py tools/maf/interval_maf_to_merged_fasta.xml tools/maf/maf_by_block_number.py tools/maf/maf_by_block_number.xml tools/maf/maf_filter.py tools/maf/maf_filter.xml tools/maf/maf_limit_size.py tools/maf/maf_limit_size.xml tools/maf/maf_limit_to_species.py tools/maf/maf_limit_to_species.xml tools/maf/maf_reverse_complement.py tools/maf/maf_reverse_complement.xml tools/maf/maf_split_by_species.py tools/maf/maf_split_by_species.xml tools/maf/maf_stats.py tools/maf/maf_stats.xml tools/maf/maf_thread_for_species.py tools/maf/maf_thread_for_species.xml tools/maf/maf_to_bed.py tools/maf/maf_to_bed.xml tools/maf/maf_to_bed_code.py tools/maf/maf_to_fasta.xml tools/maf/maf_to_fasta_concat.py tools/maf/maf_to_fasta_multiple_sets.py tools/maf/maf_to_interval.py tools/maf/maf_to_interval.xml tools/maf/vcf_to_maf_customtrack.py tools/maf/vcf_to_maf_customtrack.xml tools/meme/._meme.xml tools/meme/fimo.xml tools/meme/fimo_wrapper.py tools/meme/meme.xml tools/metag_tools/blat_coverage_report.py tools/metag_tools/blat_coverage_report.xml tools/metag_tools/blat_mapping.py tools/metag_tools/blat_mapping.xml tools/metag_tools/blat_wrapper.py tools/metag_tools/blat_wrapper.xml tools/metag_tools/convert_SOLiD_color2nuc.py tools/metag_tools/convert_SOLiD_color2nuc.xml tools/metag_tools/fastqsolexa_to_fasta_qual.py tools/metag_tools/fastqsolexa_to_fasta_qual.xml tools/metag_tools/mapping_to_ucsc.py tools/metag_tools/mapping_to_ucsc.xml tools/metag_tools/megablast_wrapper.py tools/metag_tools/megablast_wrapper.xml tools/metag_tools/megablast_xml_parser.py tools/metag_tools/megablast_xml_parser.xml tools/metag_tools/rmap_wrapper.py tools/metag_tools/rmap_wrapper.xml tools/metag_tools/rmapq_wrapper.py tools/metag_tools/rmapq_wrapper.xml tools/metag_tools/short_reads_figure_high_quality_length.py tools/metag_tools/short_reads_figure_high_quality_length.xml tools/metag_tools/short_reads_figure_score.py tools/metag_tools/short_reads_figure_score.xml tools/metag_tools/short_reads_trim_seq.py tools/metag_tools/short_reads_trim_seq.xml tools/metag_tools/shrimp_color_wrapper.py tools/metag_tools/shrimp_color_wrapper.xml tools/metag_tools/shrimp_wrapper.py tools/metag_tools/shrimp_wrapper.xml tools/metag_tools/split_paired_reads.py tools/metag_tools/split_paired_reads.xml tools/multivariate_stats/cca.py tools/multivariate_stats/cca.xml tools/multivariate_stats/kcca.py tools/multivariate_stats/kcca.xml tools/multivariate_stats/kpca.py tools/multivariate_stats/kpca.xml tools/multivariate_stats/pca.py tools/multivariate_stats/pca.xml tools/mutation/visualize.py tools/mutation/visualize.xml tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/blastxml_to_tabular.xml tools/ncbi_blast_plus/hide_stderr.py tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml tools/new_operations/basecoverage.xml tools/new_operations/cluster.xml tools/new_operations/column_join.py tools/new_operations/column_join.xml tools/new_operations/complement.xml tools/new_operations/concat.xml tools/new_operations/coverage.xml tools/new_operations/flanking_features.py tools/new_operations/flanking_features.xml tools/new_operations/get_flanks.py tools/new_operations/get_flanks.xml tools/new_operations/gops_basecoverage.py tools/new_operations/gops_cluster.py tools/new_operations/gops_complement.py tools/new_operations/gops_concat.py tools/new_operations/gops_coverage.py tools/new_operations/gops_intersect.py tools/new_operations/gops_join.py tools/new_operations/gops_merge.py tools/new_operations/gops_subtract.py tools/new_operations/intersect.xml tools/new_operations/join.xml tools/new_operations/merge.xml tools/new_operations/operation_filter.py tools/new_operations/subtract.xml tools/new_operations/subtract_query.py tools/new_operations/subtract_query.xml tools/new_operations/tables_arithmetic_operations.pl tools/new_operations/tables_arithmetic_operations.xml tools/next_gen_conversion/bwa_solid2fastq_modified.pl tools/next_gen_conversion/fastq_conversions.py tools/next_gen_conversion/fastq_conversions.xml tools/next_gen_conversion/fastq_gen_conv.py tools/next_gen_conversion/fastq_gen_conv.xml tools/next_gen_conversion/solid2fastq.py tools/next_gen_conversion/solid2fastq.xml tools/next_gen_conversion/solid_to_fastq.py tools/next_gen_conversion/solid_to_fastq.xml tools/ngs_rna/cuffcompare_wrapper.py tools/ngs_rna/cuffcompare_wrapper.xml tools/ngs_rna/cuffdiff_wrapper.py tools/ngs_rna/cuffdiff_wrapper.xml tools/ngs_rna/cufflinks_wrapper.py tools/ngs_rna/cufflinks_wrapper.xml tools/ngs_rna/filter_transcripts_via_tracking.py tools/ngs_rna/filter_transcripts_via_tracking.xml tools/ngs_rna/tophat_color_wrapper.xml tools/ngs_rna/tophat_wrapper.py tools/ngs_rna/tophat_wrapper.xml tools/ngs_rna/trinity_all.xml tools/ngs_simulation/ngs_simulation.py tools/ngs_simulation/ngs_simulation.xml tools/peak_calling/ccat_2_wrapper.xml tools/peak_calling/ccat_wrapper.py tools/peak_calling/ccat_wrapper.xml tools/peak_calling/macs_wrapper.py tools/peak_calling/macs_wrapper.xml tools/peak_calling/sicer_wrapper.py tools/peak_calling/sicer_wrapper.xml tools/picard/picard_AddOrReplaceReadGroups.xml tools/picard/picard_BamIndexStats.xml tools/picard/picard_MarkDuplicates.xml tools/picard/picard_ReorderSam.xml tools/picard/picard_ReplaceSamHeader.xml tools/picard/picard_wrapper.py tools/picard/rgPicardASMetrics.xml tools/picard/rgPicardFixMate.xml tools/picard/rgPicardGCBiasMetrics.xml tools/picard/rgPicardHsMetrics.xml tools/picard/rgPicardInsertSize.xml tools/picard/rgPicardLibComplexity.xml tools/picard/rgPicardMarkDups.xml tools/plotting/bar_chart.py tools/plotting/bar_chart.xml tools/plotting/boxplot.xml tools/plotting/histogram.py tools/plotting/histogram2.xml tools/plotting/plot_filter.py tools/plotting/plotter.py tools/plotting/r_wrapper.sh tools/plotting/scatterplot.py tools/plotting/scatterplot.xml tools/plotting/xy_plot.xml tools/regVariation/best_regression_subsets.py tools/regVariation/best_regression_subsets.xml tools/regVariation/categorize_elements_satisfying_criteria.pl tools/regVariation/categorize_elements_satisfying_criteria.xml tools/regVariation/compute_motif_frequencies_for_all_motifs.pl tools/regVariation/compute_motif_frequencies_for_all_motifs.xml tools/regVariation/compute_motifs_frequency.pl tools/regVariation/compute_motifs_frequency.xml tools/regVariation/compute_q_values.pl tools/regVariation/compute_q_values.xml tools/regVariation/delete_overlapping_indels.pl tools/regVariation/delete_overlapping_indels.xml tools/regVariation/draw_stacked_barplots.pl tools/regVariation/draw_stacked_barplots.xml tools/regVariation/featureCounter.py tools/regVariation/featureCounter.xml tools/regVariation/getIndelRates_3way.py tools/regVariation/getIndelRates_3way.xml tools/regVariation/getIndels.py tools/regVariation/getIndels_2way.xml tools/regVariation/getIndels_3way.xml tools/regVariation/linear_regression.py tools/regVariation/linear_regression.xml tools/regVariation/maf_cpg_filter.py tools/regVariation/maf_cpg_filter.xml tools/regVariation/microsatellite_birthdeath.pl tools/regVariation/microsatellite_birthdeath.xml tools/regVariation/microsats_alignment_level.py tools/regVariation/microsats_alignment_level.xml tools/regVariation/microsats_mutability.py tools/regVariation/microsats_mutability.xml tools/regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl tools/regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.xml tools/regVariation/parseMAF_smallIndels.pl tools/regVariation/quality_filter.py tools/regVariation/quality_filter.xml tools/regVariation/qv_to_bqv.py tools/regVariation/qv_to_bqv.xml tools/regVariation/rcve.py tools/regVariation/rcve.xml tools/regVariation/substitution_rates.py tools/regVariation/substitution_rates.xml tools/regVariation/substitutions.py tools/regVariation/substitutions.xml tools/regVariation/t_test_two_samples.pl tools/regVariation/t_test_two_samples.xml tools/regVariation/windowSplitter.py tools/regVariation/windowSplitter.xml tools/rgenetics/listFiles.py tools/rgenetics/plinkbinJZ.py tools/rgenetics/plinkbinJZ.pyc tools/rgenetics/rgCaCo.py tools/rgenetics/rgCaCo.xml tools/rgenetics/rgClean.py tools/rgenetics/rgClean.xml tools/rgenetics/rgClustalw.py tools/rgenetics/rgClustalw.xml tools/rgenetics/rgEigPCA.py tools/rgenetics/rgEigPCA.xml tools/rgenetics/rgFastQC.py tools/rgenetics/rgFastQC.xml tools/rgenetics/rgGLM.py tools/rgenetics/rgGLM.xml tools/rgenetics/rgGLM_code.py tools/rgenetics/rgGRR.py tools/rgenetics/rgGRR.xml tools/rgenetics/rgGTOOL.py tools/rgenetics/rgGTOOL.xml tools/rgenetics/rgHaploView.py tools/rgenetics/rgHaploView.xml tools/rgenetics/rgLDIndep.py tools/rgenetics/rgLDIndep.xml tools/rgenetics/rgLDIndep_code.py tools/rgenetics/rgManQQ.py tools/rgenetics/rgManQQ.xml tools/rgenetics/rgManQQ_code.py tools/rgenetics/rgPedSub.py tools/rgenetics/rgPedSub.xml tools/rgenetics/rgQC.py tools/rgenetics/rgQC.xml tools/rgenetics/rgQQ.py tools/rgenetics/rgQQ.xml tools/rgenetics/rgQQ_code.py tools/rgenetics/rgRegion.py tools/rgenetics/rgRegion.xml tools/rgenetics/rgTDT.py tools/rgenetics/rgTDT.xml tools/rgenetics/rgWebLogo3.py tools/rgenetics/rgWebLogo3.xml tools/rgenetics/rgfakePed.py tools/rgenetics/rgfakePed.xml tools/rgenetics/rgfakePhe.py tools/rgenetics/rgfakePhe.xml tools/rgenetics/rgtest.sh tools/rgenetics/rgtest_one_tool.sh tools/rgenetics/rgutils.py tools/rgenetics/rgutils.pyc tools/rgenetics/test tools/rgenetics/test.eps tools/rgenetics/test.pdf tools/rgenetics/test.png tools/samtools/bam_to_sam.py tools/samtools/bam_to_sam.xml tools/samtools/pileup_interval.py tools/samtools/pileup_interval.xml tools/samtools/pileup_parser.pl tools/samtools/pileup_parser.xml tools/samtools/sam2interval.py tools/samtools/sam2interval.xml tools/samtools/sam_bitwise_flag_filter.py tools/samtools/sam_bitwise_flag_filter.xml tools/samtools/sam_merge.py tools/samtools/sam_merge.xml tools/samtools/sam_merge_code.py tools/samtools/sam_pileup.py tools/samtools/sam_pileup.xml tools/samtools/sam_to_bam.py tools/samtools/sam_to_bam.xml tools/samtools/samtools_flagstat.xml tools/solid_tools/maq_cs_wrapper.py tools/solid_tools/maq_cs_wrapper.xml tools/solid_tools/maq_cs_wrapper_code.py tools/solid_tools/qualsolid_boxplot_graph.sh tools/solid_tools/solid_qual_boxplot.xml tools/solid_tools/solid_qual_stats.py tools/solid_tools/solid_qual_stats.xml tools/sr_assembly/velvetg.xml tools/sr_assembly/velvetg_wrapper.py tools/sr_assembly/velveth.xml tools/sr_assembly/velveth_wrapper.py tools/sr_mapping/PerM.xml tools/sr_mapping/bfast_wrapper.py tools/sr_mapping/bfast_wrapper.xml tools/sr_mapping/bowtie_color_wrapper.xml tools/sr_mapping/bowtie_wrapper.py tools/sr_mapping/bowtie_wrapper.xml tools/sr_mapping/bwa_color_wrapper.xml tools/sr_mapping/bwa_wrapper.py tools/sr_mapping/bwa_wrapper.xml tools/sr_mapping/fastq_statistics.xml tools/sr_mapping/lastz_paired_reads_wrapper.py tools/sr_mapping/lastz_paired_reads_wrapper.xml tools/sr_mapping/lastz_wrapper.py tools/sr_mapping/lastz_wrapper.xml tools/sr_mapping/mosaik.xml tools/sr_mapping/srma_wrapper.py tools/sr_mapping/srma_wrapper.xml tools/stats/aggregate_binned_scores_in_intervals.xml tools/stats/aggregate_scores_in_intervals.py tools/stats/column_maker.py tools/stats/column_maker.xml tools/stats/cor.py tools/stats/cor.xml tools/stats/correlation.pl tools/stats/correlation.xml tools/stats/count_gff_features.py tools/stats/count_gff_features.xml tools/stats/dna_filtering.py tools/stats/dna_filtering.xml tools/stats/filtering.py tools/stats/filtering.xml tools/stats/generate_matrix_for_pca_lda.pl tools/stats/generate_matrix_for_pca_lda.xml tools/stats/grouping.py tools/stats/grouping.xml tools/stats/gsummary.py tools/stats/gsummary.xml tools/stats/gsummary.xml.groups tools/stats/lda_analy.xml tools/stats/plot_from_lda.xml tools/stats/r_wrapper.sh tools/stats/wiggle_to_simple.py tools/stats/wiggle_to_simple.xml tools/taxonomy/find_diag_hits.py tools/taxonomy/find_diag_hits.xml tools/taxonomy/gi2taxonomy.py tools/taxonomy/gi2taxonomy.xml tools/taxonomy/lca.py tools/taxonomy/lca.xml tools/taxonomy/poisson2test.py tools/taxonomy/poisson2test.xml tools/taxonomy/t2ps_wrapper.py tools/taxonomy/t2ps_wrapper.xml tools/taxonomy/t2t_report.xml tools/tool_conf.xml tools/unix_tools/._awk_tool.xml tools/unix_tools/._awk_wrapper.sh tools/unix_tools/._cut_tool.xml tools/unix_tools/._cut_wrapper.sh tools/unix_tools/._find_and_replace.pl tools/unix_tools/._find_and_replace.xml tools/unix_tools/._grep_tool.xml tools/unix_tools/._grep_wrapper.sh tools/unix_tools/._grep_wrapper_old.sh tools/unix_tools/._join_tool.sh tools/unix_tools/._join_tool.xml tools/unix_tools/._remove_ending.sh tools/unix_tools/._remove_ending.xml tools/unix_tools/._sed_tool.xml tools/unix_tools/._sed_wrapper.sh tools/unix_tools/._sort_tool.xml tools/unix_tools/._uniq_tool.xml tools/unix_tools/._word_list_grep.pl tools/unix_tools/._word_list_grep.xml tools/unix_tools/awk_tool.xml tools/unix_tools/awk_wrapper.sh tools/unix_tools/cut_tool.xml tools/unix_tools/cut_wrapper.sh tools/unix_tools/find_and_replace.pl tools/unix_tools/find_and_replace.xml tools/unix_tools/grep_tool.xml tools/unix_tools/grep_wrapper.sh tools/unix_tools/grep_wrapper_old.sh tools/unix_tools/join_tool.sh tools/unix_tools/join_tool.xml tools/unix_tools/remove_ending.sh tools/unix_tools/remove_ending.xml tools/unix_tools/sed_tool.xml tools/unix_tools/sed_wrapper.sh tools/unix_tools/sort_tool.xml tools/unix_tools/uniq_tool.xml tools/unix_tools/word_list_grep.pl tools/unix_tools/word_list_grep.xml tools/validation/fix_errors.py tools/validation/fix_errors.xml tools/validation/fix_errors_code.py tools/validation/validate.py tools/vcf_tools/annotate.py tools/vcf_tools/annotate.xml tools/vcf_tools/bedClass.py tools/vcf_tools/extract.py tools/vcf_tools/extract.xml tools/vcf_tools/filter.py tools/vcf_tools/filter.xml tools/vcf_tools/intersect.py tools/vcf_tools/intersect.xml tools/vcf_tools/tools.py tools/vcf_tools/vcfClass.py tools/vcf_tools/vcfPytools.py tools/visualization/GMAJ.py tools/visualization/GMAJ.xml tools/visualization/LAJ.py tools/visualization/LAJ.xml tools/visualization/LAJ_code.py tools/visualization/build_ucsc_custom_track.py tools/visualization/build_ucsc_custom_track.xml tools/visualization/build_ucsc_custom_track_code.py |
diffstat | 827 files changed, 0 insertions(+), 107829 deletions(-) [+] |
line wrap: on
line diff
--- a/fimo2.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -<tool id="fimo" name="motif search"> - <description>using FIMO</description> - <command> fimo - #if $background_select.bg_select == "fromfile": - -bgfile $bgfile - #end if - - $norc --max-stored-scores 5000000 --output-pthresh $pth --verbosity 1 $motif $database - && mv fimo_out/fimo.html ${html_outfile} - - && mv fimo_out/fimo.txt ${txt_outfile} - - && rm -rf fimo_out - - </command> - <inputs> - - <param name="motif" type="data" format="txt" label="Motif file" help="created using the tool create-motif-file, or import from Shared Data"/> - <param name="database" type="data" format="fasta" label="Sequence file (FASTA)"/> - - <conditional name="background_select"> - <param name="bg_select" type="select" label="Background model" > - <option value="uniform" selected="true">uniform</option> - <option value="fromfile">load from file</option> - </param> - <when value="fromfile"> - <param name="bgfile" type="data" format="txt" label="File for background model"/> - </when> - </conditional> - - <param name="pth" size="10" type="float" value="0.0001" label="p-value threshold"/> - <param name="norc" label="Do not score the reverse complement DNA strand. Both strands are scored by default" type="boolean" truevalue="-norc" falsevalue="" checked="False"/> - </inputs> - <outputs> - <data format="html" name="html_outfile" label="${tool.name} on ${on_string} (html)"/> - <data format="txt" name="txt_outfile" label="${tool.name} on ${on_string} (txt)"/> - </outputs> - <help> - -**What it does** - -This tool uses FIMO to find matches of a motif in a fasta file. See more details: - -http://meme.sdsc.edu/meme/fimo-intro.html - - </help> -</tool>
--- a/tools/annotation_profiler/annotation_profiler.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -<tool id="Annotation_Profiler_0" name="Profile Annotations" version="1.0.0"> - <description>for a set of genomic intervals</description> - <command interpreter="python">annotation_profiler_for_interval.py -i $input1 -c ${input1.metadata.chromCol} -s ${input1.metadata.startCol} -e ${input1.metadata.endCol} -o $out_file1 $keep_empty -p ${GALAXY_DATA_INDEX_DIR}/annotation_profiler/$dbkey $summary -b 3 -t $table_names</command> - <inputs> - <param format="interval" name="input1" type="data" label="Choose Intervals"> - <validator type="dataset_metadata_in_file" filename="annotation_profiler_valid_builds.txt" metadata_name="dbkey" metadata_column="0" message="Profiling is not currently available for this species."/> - </param> - <param name="keep_empty" type="select" label="Keep Region/Table Pairs with 0 Coverage"> - <option value="-k">Keep</option> - <option value="" selected="true">Discard</option> - </param> - <param name="summary" type="select" label="Output per Region/Summary"> - <option value="-S">Summary</option> - <option value="" selected="true">Per Region</option> - </param> - <param name="table_names" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true" label="Choose Tables to Use" help="Selecting no tables will result in using all tables." from_file="annotation_profiler_options.xml"/> - </inputs> - <outputs> - <data format="input" name="out_file1"> - <change_format> - <when input="summary" value="-S" format="tabular" /> - </change_format> - </data> - </outputs> - <tests> - <test> - <param name="input1" value="4.bed" dbkey="hg18"/> - <param name="keep_empty" value=""/> - <param name="summary" value=""/> - <param name="table_names" value="acembly,affyGnf1h,knownAlt,knownGene,mrna,multiz17way,multiz28way,refGene,snp126"/> - <output name="out_file1" file="annotation_profiler_1.out" /> - </test> - <test> - <param name="input1" value="3.bed" dbkey="hg18"/> - <param name="keep_empty" value=""/> - <param name="summary" value="Summary"/> - <param name="table_names" value="acembly,affyGnf1h,knownAlt,knownGene,mrna,multiz17way,multiz28way,refGene,snp126"/> - <output name="out_file1" file="annotation_profiler_2.out" /> - </test> - </tests> - <help> -**What it does** - -Takes an input set of intervals and for each interval determines the base coverage of the interval by a set of features (tables) available from UCSC. Genomic regions from the input feature data have been merged by overlap / direct adjacency (e.g. a table having ranges of: 1-10, 6-12, 12-20 and 25-28 results in two merged ranges of: 1-20 and 25-28). - -By default, this tool will check the coverage of your intervals against all available features; you may, however, choose to select only those tables that you want to include. Selecting a section heading will effectively cause all of its children to be selected. - -You may alternatively choose to receive a summary across all of the intervals that you provide. - ------ - -**Example** - -Using the interval below and selecting several tables:: - - chr1 4558 14764 uc001aab.1 0 - - -results in:: - - chr1 4558 14764 uc001aab.1 0 - snp126Exceptions 151 142 - chr1 4558 14764 uc001aab.1 0 - genomicSuperDups 10206 1 - chr1 4558 14764 uc001aab.1 0 - chainOryLat1 3718 1 - chr1 4558 14764 uc001aab.1 0 - multiz28way 10206 1 - chr1 4558 14764 uc001aab.1 0 - affyHuEx1 3553 32 - chr1 4558 14764 uc001aab.1 0 - netXenTro2 3050 1 - chr1 4558 14764 uc001aab.1 0 - intronEst 10206 1 - chr1 4558 14764 uc001aab.1 0 - xenoMrna 10203 1 - chr1 4558 14764 uc001aab.1 0 - ctgPos 10206 1 - chr1 4558 14764 uc001aab.1 0 - clonePos 10206 1 - chr1 4558 14764 uc001aab.1 0 - chainStrPur2Link 1323 29 - chr1 4558 14764 uc001aab.1 0 - affyTxnPhase3HeLaNuclear 9011 8 - chr1 4558 14764 uc001aab.1 0 - snp126orthoPanTro2RheMac2 61 58 - chr1 4558 14764 uc001aab.1 0 - snp126 205 192 - chr1 4558 14764 uc001aab.1 0 - chainEquCab1 10206 1 - chr1 4558 14764 uc001aab.1 0 - netGalGal3 3686 1 - chr1 4558 14764 uc001aab.1 0 - phastCons28wayPlacMammal 10172 3 - -Where:: - - The first added column is the table name. - The second added column is the number of bases covered by the table. - The third added column is the number of regions from the table that is covered by the interval. - -Alternatively, requesting a summary, using the intervals below and selecting several tables:: - - chr1 4558 14764 uc001aab.1 0 - - chr1 4558 19346 uc001aac.1 0 - - -results in:: - - #tableName tableSize tableRegionCount allIntervalCount allIntervalSize allCoverage allTableRegionsOverlaped allIntervalsOverlapingTable nrIntervalCount nrIntervalSize nrCoverage nrTableRegionsOverlaped nrIntervalsOverlapingTable - snp126Exceptions 133601 92469 2 24994 388 359 2 1 14788 237 217 1 - genomicSuperDups 12268847 657 2 24994 24994 2 2 1 14788 14788 1 1 - chainOryLat1 70337730 2542 2 24994 7436 2 2 1 14788 3718 1 1 - affyHuEx1 15703901 112274 2 24994 7846 70 2 1 14788 4293 38 1 - netXenTro2 111440392 1877 2 24994 6100 2 2 1 14788 3050 1 1 - snp126orthoPanTro2RheMac2 700436 690674 2 24994 124 118 2 1 14788 63 60 1 - intronEst 135796064 2332 2 24994 24994 2 2 1 14788 14788 1 1 - xenoMrna 129031327 1586 2 24994 20406 2 2 1 14788 10203 1 1 - snp126 956976 838091 2 24994 498 461 2 1 14788 293 269 1 - clonePos 224999719 39 2 24994 24994 2 2 1 14788 14788 1 1 - chainStrPur2Link 7948016 119841 2 24994 2646 58 2 1 14788 1323 29 1 - affyTxnPhase3HeLaNuclear 136797870 140244 2 24994 22601 17 2 1 14788 13590 9 1 - multiz28way 225928588 38 2 24994 24994 2 2 1 14788 14788 1 1 - ctgPos 224999719 39 2 24994 24994 2 2 1 14788 14788 1 1 - chainEquCab1 246306414 141 2 24994 24994 2 2 1 14788 14788 1 1 - netGalGal3 203351973 461 2 24994 7372 2 2 1 14788 3686 1 1 - phastCons28wayPlacMammal 221017670 22803 2 24994 24926 6 2 1 14788 14754 3 1 - -Where:: - - tableName is the name of the table - tableChromosomeCoverage is the number of positions existing in the table for only the chromosomes that were referenced by the interval file - tableChromosomeCount is the number of regions existing in the table for only the chromosomes that were referenced by the interval file - tableRegionCoverage is the number of positions existing in the table between the minimal and maximal bounding regions that were referenced by the interval file - tableRegionCount is the number of regions existing in the table between the minimal and maximal bounding regions that were referenced by the interval file - - allIntervalCount is the number of provided intervals - allIntervalSize is the sum of the lengths of the provided interval file - allCoverage is the sum of the coverage for each provided interval - allTableRegionsOverlapped is the sum of the number of regions of the table (non-unique) that were overlapped for each interval - allIntervalsOverlappingTable is the number of provided intervals which overlap the table - - nrIntervalCount is the number of non-redundant intervals - nrIntervalSize is the sum of the lengths of non-redundant intervals - nrCoverage is the sum of the coverage of non-redundant intervals - nrTableRegionsOverlapped is the number of regions of the table (unique) that were overlapped by the non-redundant intervals - nrIntervalsOverlappingTable is the number of non-redundant intervals which overlap the table - - -.. class:: infomark - -**TIP:** non-redundant (nr) refers to the set of intervals that remains after the intervals provided have been merged to resolve overlaps - - </help> -</tool>
--- a/tools/annotation_profiler/annotation_profiler_for_interval.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,360 +0,0 @@ -#!/usr/bin/env python -#Dan Blankenberg -#For a set of intervals, this tool returns the same set of intervals -#with 2 additional fields: the name of a Table/Feature and the number of -#bases covered. The original intervals are repeated for each Table/Feature. - -import sys, struct, optparse, os, random -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -import bx.intervals.io -import bx.bitset -try: - import psyco - psyco.full() -except: - pass - -assert sys.version_info[:2] >= ( 2, 4 ) - -class CachedRangesInFile: - DEFAULT_STRUCT_FORMAT = '<I' - def __init__( self, filename, profiler_info ): - self.file_size = os.stat( filename ).st_size - self.file = open( filename, 'rb' ) - self.filename = filename - self.fmt = profiler_info.get( 'profiler_struct_format', self.DEFAULT_STRUCT_FORMAT ) - self.fmt_size = int( profiler_info.get( 'profiler_struct_size', struct.calcsize( self.fmt ) ) ) - self.length = int( self.file_size / self.fmt_size / 2 ) - self._cached_ranges = [ None for i in xrange( self.length ) ] - def __getitem__( self, i ): - if self._cached_ranges[i] is not None: - return self._cached_ranges[i] - if i < 0: i = self.length + i - offset = i * self.fmt_size * 2 - self.file.seek( offset ) - try: - start = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0] - end = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0] - except Exception, e: - raise IndexError, e - self._cached_ranges[i] = ( start, end ) - return start, end - def __len__( self ): - return self.length - -class RegionCoverage: - def __init__( self, filename_base, profiler_info ): - try: - self._coverage = CachedRangesInFile( "%s.covered" % filename_base, profiler_info ) - except Exception, e: - #print "Error loading coverage file %s: %s" % ( "%s.covered" % filename_base, e ) - self._coverage = [] - try: - self._total_coverage = int( open( "%s.total_coverage" % filename_base ).read() ) - except Exception, e: - #print "Error loading total coverage file %s: %s" % ( "%s.total_coverage" % filename_base, e ) - self._total_coverage = 0 - def get_start_index( self, start ): - #binary search: returns index of range closest to start - if start > self._coverage[-1][1]: - return len( self._coverage ) - 1 - i = 0 - j = len( self._coverage) - 1 - while i < j: - k = ( i + j ) / 2 - if start <= self._coverage[k][1]: - j = k - else: - i = k + 1 - return i - def get_coverage( self, start, end ): - return self.get_coverage_regions_overlap( start, end )[0] - def get_coverage_regions_overlap( self, start, end ): - return self.get_coverage_regions_index_overlap( start, end )[0:2] - def get_coverage_regions_index_overlap( self, start, end ): - if len( self._coverage ) < 1 or start > self._coverage[-1][1] or end < self._coverage[0][0]: - return 0, 0, 0 - if self._total_coverage and start <= self._coverage[0][0] and end >= self._coverage[-1][1]: - return self._total_coverage, len( self._coverage ), 0 - coverage = 0 - region_count = 0 - start_index = self.get_start_index( start ) - for i in xrange( start_index, len( self._coverage ) ): - c_start, c_end = self._coverage[i] - if c_start > end: - break - if c_start <= end and c_end >= start: - coverage += min( end, c_end ) - max( start, c_start ) - region_count += 1 - return coverage, region_count, start_index - -class CachedCoverageReader: - def __init__( self, base_file_path, buffer = 10, table_names = None, profiler_info = None ): - self._base_file_path = base_file_path - self._buffer = buffer #number of chromosomes to keep in memory at a time - self._coverage = {} - if table_names is None: table_names = [ table_dir for table_dir in os.listdir( self._base_file_path ) if os.path.isdir( os.path.join( self._base_file_path, table_dir ) ) ] - for tablename in table_names: self._coverage[tablename] = {} - if profiler_info is None: profiler_info = {} - self._profiler_info = profiler_info - def iter_table_coverage_by_region( self, chrom, start, end ): - for tablename, coverage, regions in self.iter_table_coverage_regions_by_region( chrom, start, end ): - yield tablename, coverage - def iter_table_coverage_regions_by_region( self, chrom, start, end ): - for tablename, coverage, regions, index in self.iter_table_coverage_regions_index_by_region( chrom, start, end ): - yield tablename, coverage, regions - def iter_table_coverage_regions_index_by_region( self, chrom, start, end ): - for tablename, chromosomes in self._coverage.iteritems(): - if chrom not in chromosomes: - if len( chromosomes ) >= self._buffer: - #randomly remove one chromosome from this table - del chromosomes[ chromosomes.keys().pop( random.randint( 0, self._buffer - 1 ) ) ] - chromosomes[chrom] = RegionCoverage( os.path.join ( self._base_file_path, tablename, chrom ), self._profiler_info ) - coverage, regions, index = chromosomes[chrom].get_coverage_regions_index_overlap( start, end ) - yield tablename, coverage, regions, index - -class TableCoverageSummary: - def __init__( self, coverage_reader, chrom_lengths ): - self.coverage_reader = coverage_reader - self.chrom_lengths = chrom_lengths - self.chromosome_coverage = {} #dict of bitset by chromosome holding user's collapsed input intervals - self.total_interval_size = 0 #total size of user's input intervals - self.total_interval_count = 0 #total number of user's input intervals - self.table_coverage = {} #dict of total coverage by user's input intervals by table - self.table_chromosome_size = {} #dict of dict of table:chrom containing total coverage of table for a chrom - self.table_chromosome_count = {} #dict of dict of table:chrom containing total number of coverage ranges of table for a chrom - self.table_regions_overlaped_count = {} #total number of table regions overlaping user's input intervals (non unique) - self.interval_table_overlap_count = {} #total number of user input intervals which overlap table - self.region_size_errors = {} #dictionary of lists of invalid ranges by chromosome - def add_region( self, chrom, start, end ): - chrom_length = self.chrom_lengths.get( chrom ) - region_start = min( start, chrom_length ) - region_end = min( end, chrom_length ) - region_length = region_end - region_start - - if region_length < 1 or region_start != start or region_end != end: - if chrom not in self.region_size_errors: - self.region_size_errors[chrom] = [] - self.region_size_errors[chrom].append( ( start, end ) ) - if region_length < 1: return - - self.total_interval_size += region_length - self.total_interval_count += 1 - if chrom not in self.chromosome_coverage: - self.chromosome_coverage[chrom] = bx.bitset.BitSet( chrom_length ) - - self.chromosome_coverage[chrom].set_range( region_start, region_length ) - for table_name, coverage, regions in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, region_start, region_end ): - if table_name not in self.table_coverage: - self.table_coverage[table_name] = 0 - self.table_chromosome_size[table_name] = {} - self.table_regions_overlaped_count[table_name] = 0 - self.interval_table_overlap_count[table_name] = 0 - self.table_chromosome_count[table_name] = {} - if chrom not in self.table_chromosome_size[table_name]: - self.table_chromosome_size[table_name][chrom] = self.coverage_reader._coverage[table_name][chrom]._total_coverage - self.table_chromosome_count[table_name][chrom] = len( self.coverage_reader._coverage[table_name][chrom]._coverage ) - self.table_coverage[table_name] += coverage - if coverage: - self.interval_table_overlap_count[table_name] += 1 - self.table_regions_overlaped_count[table_name] += regions - def iter_table_coverage( self ): - def get_nr_coverage(): - #returns non-redundant coverage, where user's input intervals have been collapse to resolve overlaps - table_coverage = {} #dictionary of tables containing number of table bases overlaped by nr intervals - interval_table_overlap_count = {} #dictionary of tables containing number of nr intervals overlaping table - table_regions_overlap_count = {} #dictionary of tables containing number of regions overlaped (unique) - interval_count = 0 #total number of nr intervals - interval_size = 0 #holds total size of nr intervals - region_start_end = {} #holds absolute start,end for each user input chromosome - for chrom, chromosome_bitset in self.chromosome_coverage.iteritems(): - #loop through user's collapsed input intervals - end = 0 - last_end_index = {} - interval_size += chromosome_bitset.count_range() - while True: - if end >= chromosome_bitset.size: break - start = chromosome_bitset.next_set( end ) - if start >= chromosome_bitset.size: break - end = chromosome_bitset.next_clear( start ) - interval_count += 1 - if chrom not in region_start_end: - region_start_end[chrom] = [start, end] - else: - region_start_end[chrom][1] = end - for table_name, coverage, region_count, start_index in self.coverage_reader.iter_table_coverage_regions_index_by_region( chrom, start, end ): - if table_name not in table_coverage: - table_coverage[table_name] = 0 - interval_table_overlap_count[table_name] = 0 - table_regions_overlap_count[table_name] = 0 - table_coverage[table_name] += coverage - if coverage: - interval_table_overlap_count[table_name] += 1 - table_regions_overlap_count[table_name] += region_count - if table_name in last_end_index and last_end_index[table_name] == start_index: - table_regions_overlap_count[table_name] -= 1 - last_end_index[table_name] = start_index + region_count - 1 - table_region_coverage = {} #total coverage for tables by bounding nr interval region - table_region_count = {} #total number for tables by bounding nr interval region - for chrom, start_end in region_start_end.items(): - for table_name, coverage, region_count in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, start_end[0], start_end[1] ): - if table_name not in table_region_coverage: - table_region_coverage[table_name] = 0 - table_region_count[table_name] = 0 - table_region_coverage[table_name] += coverage - table_region_count[table_name] += region_count - return table_region_coverage, table_region_count, interval_count, interval_size, table_coverage, table_regions_overlap_count, interval_table_overlap_count - table_region_coverage, table_region_count, nr_interval_count, nr_interval_size, nr_table_coverage, nr_table_regions_overlap_count, nr_interval_table_overlap_count = get_nr_coverage() - for table_name in self.table_coverage: - #TODO: determine a type of statistic, then calculate and report here - yield table_name, sum( self.table_chromosome_size.get( table_name, {} ).values() ), sum( self.table_chromosome_count.get( table_name, {} ).values() ), table_region_coverage.get( table_name, 0 ), table_region_count.get( table_name, 0 ), self.total_interval_count, self.total_interval_size, self.table_coverage[table_name], self.table_regions_overlaped_count.get( table_name, 0), self.interval_table_overlap_count.get( table_name, 0 ), nr_interval_count, nr_interval_size, nr_table_coverage[table_name], nr_table_regions_overlap_count.get( table_name, 0 ), nr_interval_table_overlap_count.get( table_name, 0 ) - -def profile_per_interval( interval_filename, chrom_col, start_col, end_col, out_filename, keep_empty, coverage_reader ): - out = open( out_filename, 'wb' ) - for region in bx.intervals.io.NiceReaderWrapper( open( interval_filename, 'rb' ), chrom_col = chrom_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ): - for table_name, coverage, region_count in coverage_reader.iter_table_coverage_regions_by_region( region.chrom, region.start, region.end ): - if keep_empty or coverage: - #only output regions that have atleast 1 base covered unless empty are requested - out.write( "%s\t%s\t%s\t%s\n" % ( "\t".join( region.fields ), table_name, coverage, region_count ) ) - out.close() - -def profile_summary( interval_filename, chrom_col, start_col, end_col, out_filename, keep_empty, coverage_reader, chrom_lengths ): - out = open( out_filename, 'wb' ) - table_coverage_summary = TableCoverageSummary( coverage_reader, chrom_lengths ) - for region in bx.intervals.io.NiceReaderWrapper( open( interval_filename, 'rb' ), chrom_col = chrom_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ): - table_coverage_summary.add_region( region.chrom, region.start, region.end ) - - out.write( "#tableName\ttableChromosomeCoverage\ttableChromosomeCount\ttableRegionCoverage\ttableRegionCount\tallIntervalCount\tallIntervalSize\tallCoverage\tallTableRegionsOverlaped\tallIntervalsOverlapingTable\tnrIntervalCount\tnrIntervalSize\tnrCoverage\tnrTableRegionsOverlaped\tnrIntervalsOverlapingTable\n" ) - for table_name, table_chromosome_size, table_chromosome_count, table_region_coverage, table_region_count, total_interval_count, total_interval_size, total_coverage, table_regions_overlaped_count, interval_region_overlap_count, nr_interval_count, nr_interval_size, nr_coverage, nr_table_regions_overlaped_count, nr_interval_table_overlap_count in table_coverage_summary.iter_table_coverage(): - if keep_empty or total_coverage: - #only output tables that have atleast 1 base covered unless empty are requested - out.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( table_name, table_chromosome_size, table_chromosome_count, table_region_coverage, table_region_count, total_interval_count, total_interval_size, total_coverage, table_regions_overlaped_count, interval_region_overlap_count, nr_interval_count, nr_interval_size, nr_coverage, nr_table_regions_overlaped_count, nr_interval_table_overlap_count ) ) - out.close() - - #report chrom size errors as needed: - if table_coverage_summary.region_size_errors: - print "Regions provided extended beyond known chromosome lengths, and have been truncated as necessary, for the following intervals:" - for chrom, regions in table_coverage_summary.region_size_errors.items(): - if len( regions ) > 3: - extra_region_info = ", ... " - else: - extra_region_info = "" - print "%s has max length of %s, exceeded by %s%s." % ( chrom, chrom_lengths.get( chrom ), ", ".join( map( str, regions[:3] ) ), extra_region_info ) - -class ChromosomeLengths: - def __init__( self, profiler_info ): - self.chroms = {} - self.default_bitset_size = int( profiler_info.get( 'bitset_size', bx.bitset.MAX ) ) - chroms = profiler_info.get( 'chromosomes', None ) - if chroms: - for chrom in chroms.split( ',' ): - for fields in chrom.rsplit( '=', 1 ): - if len( fields ) == 2: - self.chroms[ fields[0] ] = int( fields[1] ) - else: - self.chroms[ fields[0] ] = self.default_bitset_size - def get( self, name ): - return self.chroms.get( name, self.default_bitset_size ) - -def parse_profiler_info( filename ): - profiler_info = {} - try: - for line in open( filename ): - fields = line.rstrip( '\n\r' ).split( '\t', 1 ) - if len( fields ) == 2: - if fields[0] in profiler_info: - if not isinstance( profiler_info[ fields[0] ], list ): - profiler_info[ fields[0] ] = [ profiler_info[ fields[0] ] ] - profiler_info[ fields[0] ].append( fields[1] ) - else: - profiler_info[ fields[0] ] = fields[1] - except: - pass #likely missing file - return profiler_info - -def __main__(): - parser = optparse.OptionParser() - parser.add_option( - '-k','--keep_empty', - action="store_true", - dest='keep_empty', - default=False, - help='Keep tables with 0 coverage' - ) - parser.add_option( - '-b','--buffer', - dest='buffer', - type='int',default=10, - help='Number of Chromosomes to keep buffered' - ) - parser.add_option( - '-c','--chrom_col', - dest='chrom_col', - type='int',default=1, - help='Chromosome column' - ) - parser.add_option( - '-s','--start_col', - dest='start_col', - type='int',default=2, - help='Start Column' - ) - parser.add_option( - '-e','--end_col', - dest='end_col', - type='int',default=3, - help='End Column' - ) - parser.add_option( - '-p','--path', - dest='path', - type='str',default='/galaxy/data/annotation_profiler/hg18', - help='Path to profiled data for this organism' - ) - parser.add_option( - '-t','--table_names', - dest='table_names', - type='str',default='None', - help='Table names requested' - ) - parser.add_option( - '-i','--input', - dest='interval_filename', - type='str', - help='Input Interval File' - ) - parser.add_option( - '-o','--output', - dest='out_filename', - type='str', - help='Input Interval File' - ) - parser.add_option( - '-S','--summary', - action="store_true", - dest='summary', - default=False, - help='Display Summary Results' - ) - - options, args = parser.parse_args() - - assert os.path.isdir( options.path ), IOError( "Configuration error: Table directory is missing (%s)" % options.path ) - - #get profiler_info - profiler_info = parse_profiler_info( os.path.join( options.path, 'profiler_info.txt' ) ) - - table_names = options.table_names.split( "," ) - if table_names == ['None']: table_names = None - coverage_reader = CachedCoverageReader( options.path, buffer = options.buffer, table_names = table_names, profiler_info = profiler_info ) - - if options.summary: - profile_summary( options.interval_filename, options.chrom_col - 1, options.start_col - 1, options.end_col -1, options.out_filename, options.keep_empty, coverage_reader, ChromosomeLengths( profiler_info ) ) - else: - profile_per_interval( options.interval_filename, options.chrom_col - 1, options.start_col - 1, options.end_col -1, options.out_filename, options.keep_empty, coverage_reader ) - - #print out data version info - print 'Data version (%s:%s:%s)' % ( profiler_info.get( 'dbkey', 'unknown' ), profiler_info.get( 'profiler_hash', 'unknown' ), profiler_info.get( 'dump_time', 'unknown' ) ) - -if __name__ == "__main__": __main__()
--- a/tools/bedtools/bedToBam.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -<tool id="bedToBam" name="bedToBam"> - <description>convert BED or GFF or VCF to BAM</description> - <command>bedToBam -i $input -g $genome $bed12 $mapq $ubam > $outfile </command> - <inputs> - <param name="input" format="bed,gff,vcf" type="data" label="Input file (BED,GFF,VCF)" help="BED files must be at least BED4 to be amenable to BAM (needs name field)"/> - <param name="genome" type="select" label="Select genome"> - <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.mm9.genome" selected="true">mm9</option> - <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.mm8.genome">mm8</option> - <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.hg18.genome">hg18</option> - <option value="/Users/xuebing/tools/BEDTools-Version-2.13.3/genomes/mouse.hg19.genome">hg19</option> - </param> - <param name="mapq" size="10" type="integer" value="255" label="Set the mappinq quality for the BAM records"/> - <param name="bed12" label="The BED file is in BED12 format" help="The BAM CIGAR string will reflect BED blocks" type="boolean" truevalue="-bed12" falsevalue="" checked="False"/> - <param name="ubam" label="Write uncompressed BAM output" help="Default is to write compressed BAM" type="boolean" truevalue="-ubam" falsevalue="" checked="False"/> - </inputs> - <outputs> - <data format="bam" name="outfile" /> - </outputs> - <help> - -**What it does** - -Program: bedToBam (v2.13.3) -Author: Aaron Quinlan (aaronquinlan@gmail.com) -Summary: Converts feature records to BAM format. - - - </help> -</tool>
--- a/tools/data_destination/epigraph.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_export"> - <description> and prediction with EpiGRAPH</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - -
--- a/tools/data_destination/epigraph_test.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_test_export"> - <description> and prediction with EpiGRAPH Test</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH test website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> -
--- a/tools/data_source/access_libraries.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -<?xml version="1.0"?> -<tool name="Access Libraries" id="library_access1"> - <description>stored locally</description> - <inputs action="/library/index" method="get" target="_parent"> - <param name="default_action" type="hidden" value="import_to_histories" /> - </inputs> - <uihints minwidth="800"/> -</tool>
--- a/tools/data_source/bed_convert.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -<tool id="BED File Converter1" name="BED File Converter"> - <description>creates a bed or xbed file containing from text query</description> - <command>noop</command> - <inputs> - <display>creates a bed or xbed file containing user assigned input of $input</display> - <param format="tabular" name="input" type="data" /> - <param name="chrom" size="4" type="text" value="all" /> - </inputs> - <outputs> - <data format="bed" name="out_file1" /> - </outputs> - <help>User specifies delimiter, header information, and column assignments and the file will be converted to BED or xBED. -</help> -</tool> \ No newline at end of file
--- a/tools/data_source/biomart.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. - - TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile - everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. ---> -<tool name="BioMart" id="biomart" tool_type="data_source" version="1.0.1"> - <description>Central server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> - <display>go to BioMart Central $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="_export" missing="1" /> - <value name="GALAXY_URL" missing="0" /> - </append_param> - </request_param> - <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="TSV" /> - </value_translation> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Biomart query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/biomart_test.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. - - TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile - everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. ---> -<tool name="BioMart" id="biomart_test" tool_type="data_source" version="1.0.1"> - <description>Test server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> - <display>go to BioMart Central $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="_export" missing="1" /> - <value name="GALAXY_URL" missing="0" /> - </append_param> - </request_param> - <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="TSV" /> - </value_translation> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Biomart test query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/bx_browser.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="BX main" id="bx_browser" tool_type="data_source"> - <description>browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://main.genome-browser.bx.psu.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to BX Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="bx_browser" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="primaryTable" /> - <value galaxy_value="tabular" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/cbi_rice_mart.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="CBI Rice Mart" id="cbi_rice_mart" tool_type="data_source" version="1.0.1"> - <description>rice mart</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://ricemart.cbi.edu.cn/biomart/martview/" check_values="false" method="get" target="_top"> - <display>go to RMap rice mart $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="_export" missing="1" /> - <value name="GALAXY_URL" missing="0" /> - </append_param> - </request_param> - <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="TSV" /> - </value_translation> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Rice mart query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/data_source.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# Retrieves data from external data source applications and stores in a dataset file. -# Data source application parameters are temporarily stored in the dataset file. -import socket, urllib, sys, os -from galaxy import eggs #eggs needs to be imported so that galaxy.util can find docutils egg... -from galaxy.util.json import from_json_string, to_json_string -import galaxy.model # need to import model before sniff to resolve a circular import dependency -from galaxy.datatypes import sniff -from galaxy.datatypes.registry import Registry -from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_FILE - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -GALAXY_PARAM_PREFIX = 'GALAXY' -GALAXY_ROOT_DIR = os.path.realpath( os.path.join( os.path.split( os.path.realpath( __file__ ) )[0], '..', '..' ) ) -GALAXY_DATATYPES_CONF_FILE = os.path.join( GALAXY_ROOT_DIR, 'datatypes_conf.xml' ) - -def load_input_parameters( filename, erase_file = True ): - datasource_params = {} - try: - json_params = from_json_string( open( filename, 'r' ).read() ) - datasource_params = json_params.get( 'param_dict' ) - except: - json_params = None - for line in open( filename, 'r' ): - try: - line = line.strip() - fields = line.split( '\t' ) - datasource_params[ fields[0] ] = fields[1] - except: - continue - if erase_file: - open( filename, 'w' ).close() #open file for writing, then close, removes params from file - return json_params, datasource_params - -def __main__(): - filename = sys.argv[1] - try: - max_file_size = int( sys.argv[2] ) - except: - max_file_size = 0 - - job_params, params = load_input_parameters( filename ) - if job_params is None: #using an older tabular file - enhanced_handling = False - job_params = dict( param_dict = params ) - job_params[ 'output_data' ] = [ dict( out_data_name = 'output', - ext = 'data', - file_name = filename, - extra_files_path = None ) ] - job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE ) - else: - enhanced_handling = True - json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata - - datatypes_registry = Registry( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] ) - - URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded - URL_method = params.get( 'URL_method', None ) - - # The Python support for fetching resources from the web is layered. urllib uses the httplib - # library, which in turn uses the socket library. As of Python 2.3 you can specify how long - # a socket should wait for a response before timing out. By default the socket module has no - # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2 - # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by - # doing the following. - socket.setdefaulttimeout( 600 ) - - for data_dict in job_params[ 'output_data' ]: - cur_filename = data_dict.get( 'file_name', filename ) - cur_URL = params.get( '%s|%s|URL' % ( GALAXY_PARAM_PREFIX, data_dict[ 'out_data_name' ] ), URL ) - if not cur_URL: - open( cur_filename, 'w' ).write( "" ) - stop_err( 'The remote data source application has not sent back a URL parameter in the request.' ) - - # The following calls to urllib.urlopen() will use the above default timeout - try: - if not URL_method or URL_method == 'get': - page = urllib.urlopen( cur_URL ) - elif URL_method == 'post': - page = urllib.urlopen( cur_URL, urllib.urlencode( params ) ) - except Exception, e: - stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) ) - if max_file_size: - file_size = int( page.info().get( 'Content-Length', 0 ) ) - if file_size > max_file_size: - stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) ) - #do sniff stream for multi_byte - try: - cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( cur_filename, os.O_WRONLY | os.O_CREAT ), cur_filename ) - except Exception, e: - stop_err( 'Unable to fetch %s:\n%s' % ( cur_URL, e ) ) - - #here import checks that upload tool performs - if enhanced_handling: - try: - ext = sniff.handle_uploaded_dataset_file( filename, datatypes_registry, ext = data_dict[ 'ext' ], is_multi_byte = is_multi_byte ) - except Exception, e: - stop_err( str( e ) ) - info = dict( type = 'dataset', - dataset_id = data_dict[ 'dataset_id' ], - ext = ext) - - json_file.write( "%s\n" % to_json_string( info ) ) - -if __name__ == "__main__": __main__()
--- a/tools/data_source/echo.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -#!/usr/bin/env python - -""" -Script that just echos the command line. -""" - -import sys - -assert sys.version_info[:2] >= ( 2, 4 ) - -print '-' * 20, "<br>" -for elem in sys.argv: - print elem, "<br>" -print '-' * 20, "<br>" \ No newline at end of file
--- a/tools/data_source/echo.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -<?xml version="1.0"?> - -<tool name="Echo" id="echo1"> - - <description> - echoes parameters - </description> - - <command interpreter="python">echo.py $input $database $output </command> - - <inputs> - <param format="tabular" name="input" type="data" label="Input stuff"/> - <param type="select" name="database" label="Database"> - <option value="alignseq.loc">Human (hg18)</option> - <option value="faseq.loc">Fly (dm3)</option> - </param> - </inputs> - - <outputs> - <data format="input" name="output" label="Blat on ${database.value_label}" /> - </outputs> - -</tool>
--- a/tools/data_source/encode_db.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -<?xml version="1.0"?> - -<tool name="EncodeDB" id="encode_db1"> - - <description> - at NHGRI - </description> - - <command interpreter="python"> - fetch.py "$url" $output - </command> - - <inputs action="http://research.nhgri.nih.gov/projects/ENCODEdb/cgi-bin/power_query.cgi" target="_top"> -<!-- <inputs action="http://localhost:9000/prepared"> --> - <display>go to EncodeDB $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/async/encode_db1" /> - </inputs> - - <uihints minwidth="800"/> - - <outputs> - <data format="bed" name="output" /> - </outputs> - - <options sanitize="False" refresh="True"/> - -</tool> \ No newline at end of file
--- a/tools/data_source/epigraph_import.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source"> - <description> server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/epigraph_import_test.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source"> - <description> test server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/eupathdb.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -<tool name="EuPathDB" id="eupathdb" tool_type="data_source" url_method="post"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://eupathdb.org/eupathdb/queries_tools.jsp" check_values="false" method="get"> - <display>go to EuPathDB server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=eupathdb" /> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/fetch.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -""" -Script that just echos the command line. -""" - -import sys, os, urllib - -assert sys.version_info[:2] >= ( 2, 4 ) - -BUFFER = 1048576 - -url = sys.argv[1] -out_name = sys.argv[2] - -out = open(out_name, 'wt') -try: - page = urllib.urlopen(url) - while 1: - data = page.read(BUFFER) - if not data: - break - out.write(data) -except Exception, e: - print 'Error getting the data -> %s' % e -out.close()
--- a/tools/data_source/fly_modencode.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -<?xml version="1.0"?> -<tool name="modENCODE fly" id="modENCODEfly" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://modencode.oicr.on.ca/fgb2/gbrowse/fly" check_values="false" target="_top"> - <display>go to modENCODE fly server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=modENCODEfly" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="dbkey" remote_name="dbkey" missing="dm2" > - <value_translation> - <value galaxy_value="dm2" remote_value="fly" /> - </value_translation> - </request_param> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="d" missing="" /> - <value name="dbkey" missing="dm2" /> - <value name="q" missing="" /> - <value name="s" missing="" /> - <value name="t" missing="" /> - </append_param> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/flymine.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine" id="flymine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.flymine.org" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> -
--- a/tools/data_source/flymine_test.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine test" id="flymine_test" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> -
--- a/tools/data_source/genbank.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -#!/usr/bin/env python -from Bio import GenBank -import sys, os, textwrap - -assert sys.version_info[:2] >= ( 2, 4 ) - -def make_fasta(rec): - '''Creates fasta format from a record''' - gi = rec.annotations.get('gi','') - org = rec.annotations.get('organism','') - date = rec.annotations.get('date','') - head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date) - body = '\n'.join(textwrap.wrap(rec.seq.data, width=80)) - return head, body - -if __name__ == '__main__': - - mode = sys.argv[1] - text = sys.argv[2] - output_file = sys.argv[3] - - print 'Searching for %s <br>' % text - - # check if inputs are all numbers - try: - gi_list = text.split() - tmp = map(int, gi_list) - except ValueError: - gi_list = GenBank.search_for(text, max_ids=10) - - fp = open(output_file, 'wt') - record_parser = GenBank.FeatureParser() - ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser = record_parser) - for gid in gi_list: - res = ncbi_dict[gid] - head, body = make_fasta(res) - fp.write(head+body+'\n') - print head - fp.close() - - -
--- a/tools/data_source/genbank.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -<tool id="genbank" name="Connect to Genbank"> -<!-- <description>queries genbank</description> --> - <command interpreter="python">genbank.py $mode "$text" $output</command> - <inputs> - <param name="mode" type="select"> - <option value="nucleotide">nucleotide database</option> - <option value="protein">proteins database</option> - <label>Get sequences from the</label> - </param> - <param name="text" size="40" type="text" value="6273291"> - <label>with accession ID</label> - </param> - </inputs> - <outputs> - <data format="fasta" name="output" /> - </outputs> - <help> -At the moment this tool allows the following simple searches: - -- by GI: **51594135** -- by accession: **CF622840** -- using text: **human hbb1** (this feature is experimental) - </help> - -</tool> \ No newline at end of file
--- a/tools/data_source/gramene_mart.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. - - TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile - everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. ---> -<tool name="GrameneMart" id="gramenemart" tool_type="data_source" version="1.0.1"> - <description> Central server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.gramene.org/biomart/martview" check_values="false" method="get" target="_top"> - <display>go to GrameneMart Central $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="_export" missing="1" /> - <value name="GALAXY_URL" missing="0" /> - </append_param> - </request_param> - <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular"> - <value_translation> - <value galaxy_value="tabular" remote_value="TSV" /> - </value_translation> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Biomart query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/hapmapmart.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -<?xml version="1.0"?> -<!-- - hacked from biomart.xml - testing hapmap biomart - problem is going to be converting these to lped/pbed - the data returned will be in all sorts of different shapes - and the sample ids need to be obtained separately - to create reliable pedigrees. eesh... - - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. - - TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile - everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. ---> -<tool name="HapMapMart" id="hapmapmart" tool_type="data_source" version="0.0.01"> - <description>HapMap Biomart</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://hapmap.ncbi.nlm.nih.gov/biomart/martview" check_values="false" method="get" target="_top"> - <display>go to HapMap BioMart $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner/hapmapmart" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="_export" missing="1" /> - <value name="GALAXY_URL" missing="0" /> - </append_param> - </request_param> - <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="TSV" /> - </value_translation> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="dbkey" remote_name="dbkey" missing="hg18" /> - <request_param galaxy_name="organism" remote_name="organism" missing="human" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="HapMap query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/hbvar.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -<?xml version="1.0"?> -<tool name="HbVar" id="hbvar"> - - <description>Human Hemoglobin Variants and Thalassemias</description> - - <command/> - - <inputs action="http://globin.bx.psu.edu/cgi-bin/hbvar/query_vars3" check_values="false" method="get" target="_top"> - <display>go to HbVar database $GALAXY_URL $tool_id</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner/hbvar" /> - <param name="tool_id" type="hidden" value = "hbvar"/> - </inputs> - - <uihints minwidth="800"/> - - <code file="hbvar_filter.py"/> - - <outputs> - <data name="output" format="txt" /> - </outputs> - - <options sanitize="False" refresh="True"/> - -</tool> -
--- a/tools/data_source/hbvar_filter.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -#TODO: Set dbkey to proper UCSC build, if known -import urllib - -from galaxy import datatypes, config -import tempfile, shutil - -def exec_before_job( app, inp_data, out_data, param_dict, tool=None): - """Sets the name of the data""" - data_name = param_dict.get( 'name', 'HbVar query' ) - data_type = param_dict.get( 'type', 'txt' ) - if data_type == 'txt': data_type='interval' #All data is TSV, assume interval - name, data = out_data.items()[0] - data = app.datatypes_registry.change_datatype(data, data_type) - data.name = data_name - out_data[name] = data - -def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): - """Verifies the data after the run""" - - URL = param_dict.get( 'URL', None ) - URL = URL + '&_export=1&GALAXY_URL=0' - if not URL: - raise Exception('Datasource has not sent back a URL parameter') - - CHUNK_SIZE = 2**20 # 1Mb - MAX_SIZE = CHUNK_SIZE * 100 - - try: - page = urllib.urlopen(URL) - except Exception, exc: - raise Exception('Problems connecting to %s (%s)' % (URL, exc) ) - - name, data = out_data.items()[0] - - fp = open(data.file_name, 'wb') - size = 0 - while 1: - chunk = page.read(CHUNK_SIZE) - if not chunk: - break - if size > MAX_SIZE: - raise Exception('----- maximum datasize exceeded ---') - size += len(chunk) - fp.write(chunk) - - fp.close() - #Set meta data, format file to be valid interval type - if isinstance(data.datatype, datatypes.interval.Interval): - data.set_meta(first_line_is_header=True) - #check for missing meta data, if all there, comment first line and process file - if not data.missing_meta(): - line_ctr = -1 - temp = tempfile.NamedTemporaryFile('w') - temp_filename = temp.name - temp.close() - temp = open(temp_filename,'w') - chromCol = int(data.metadata.chromCol) - 1 - startCol = int(data.metadata.startCol) - 1 - strandCol = int(data.metadata.strandCol) - 1 - - - for line in open(data.file_name, 'r'): - line_ctr += 1 - - fields = line.strip().split('\t') - - temp.write("%s\n" % '\t'.join(fields)) - - temp.close() - shutil.move(temp_filename,data.file_name) - - else: - data = app.datatypes_registry.change_datatype(data, 'tabular') - data.set_size() - data.set_peek() - app.model.context.add( data ) - app.model.context.flush()
--- a/tools/data_source/import.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -""" -Script that imports locally stored data as a new dataset for the user -Usage: import id outputfile -""" -import sys, os - -assert sys.version_info[:2] >= ( 2, 4 ) - -BUFFER = 1048576 - -dataid = sys.argv[1] -out_name = sys.argv[2] - - -id2name = { - 'eryth' : 'ErythPreCRMmm3_cusTrk.txt', - 'cishg16' : 'ReglRegHBBhg16CusTrk.txt', - 'cishg17' : 'ReglRegHBBhg17CusTrk.txt', - 'exons' : 'ExonsKnownGenes_mm3.txt', - 'krhg16' : 'known_regulatory_hg16.bed', - 'krhg17' : 'known_regulatory_hg17.bed', - 'tARhg16mmc' : 'hg16.mouse.t_AR.cold.bed', - 'tARhg16mmm' : 'hg16.mouse.t_AR.medium.bed', - 'tARhg16mmh' : 'hg16.mouse.t_AR.hot.bed', - 'tARhg16rnc' : 'hg16.rat.t_AR.cold.bed', - 'tARhg16rnm' : 'hg16.rat.t_AR.medium.bed', - 'tARhg16rnh' : 'hg16.rat.t_AR.hot.bed', - 'phastConsHg16' : 'phastConsMost_hg16.bed', - 'omimhg16' : 'omimDisorders_hg16.tab', - 'omimhg17' : 'omimDisorders_hg17.tab', - -} - -fname = id2name.get(dataid, '') -if not fname: - print 'Importing invalid data %s' % dataid - sys.exit() -else: - print 'Imported %s' % fname - -# this path is hardcoded -inp_name = os.path.join('database', 'import', fname) - -try: - inp = open(inp_name, 'rt') -except: - print 'Could not find file %s' % inp_name - sys.exit() - -out = open(out_name, 'wt') - -while 1: - data = inp.read(BUFFER) - if not data: - break - out.write(data) - -inp.close() -out.close()
--- a/tools/data_source/import.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -<tool id="Featured datasets4" name="Featured datasets"> - <description>(PSU prepared queries)</description> - <command interpreter="python">import.py $data $output</command> - <inputs> - <display>$data</display> - <param name="data" type="select" display="radio"> - <option value="eryth">Erythroid predicted cis-regulatory modules</option> - <option value="exons">Exons of protein-coding genes in the mouse genome, assembly mm3</option> - <option value="cishg16 ">Known cis-regulatory modules in the human HBB gene complex (hg16)</option> - <option value="cishg17">Known cis-regulatory modules in the human HBB gene complex (hg17)</option> - <option value="krhg16">Known regulatory regions (hg16)</option> - <option value="krhg17">Known regulatory regions (hg17)</option> - <option value="tARhg16mmc">Human (hg16) evolutionary cold region (vs mouse)</option> - <option value="tARhg16mmm">Human (hg16) evolutionary medium region (vs mouse)</option> - <option value="tARhg16mmh">Human (hg16) evolutionary hot region (vs mouse)</option> - <option value="tARhg16rnc">Human (hg16) evolutionary cold region (vs rat)</option> - <option value="tARhg16rnm">Human (hg16) evolutionary medium region (vs rat)</option> - <option value="tARhg16rnh">Human (hg16) evolutionary hot region (vs rat)</option> - <option value="phastConsHg16">phastCons hg16 (stringent, top ~5%) from UCSC</option> - <option value="omimhg16">OMIM disorders (hg16)</option> - <option value="omimhg17">OMIM disorders (hg17)</option> - </param> - </inputs> - <outputs> - <data format="bed" name="output" /> - </outputs> -</tool>
--- a/tools/data_source/metabolicmine.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -<?xml version="1.0"?> -<tool name="metabolicMine" id="metabolicmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.metabolicmine.org/beta/begin.do" check_values="false" method="get"> - <display>go to metabolicMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/microbial_import.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -#!/usr/bin/env python - -""" -Script that imports locally stored data as a new dataset for the user -Usage: import id outputfile -""" -import sys, os -from shutil import copyfile - -assert sys.version_info[:2] >= ( 2, 4 ) - -BUFFER = 1048576 - -uids = sys.argv[1].split(",") -out_file1 = sys.argv[2] - -#remove NONE from uids -have_none = True -while have_none: - try: - uids.remove('None') - except: - have_none = False - - -#create dictionary keyed by uid of tuples of (displayName,filePath,build) for all files -available_files = {} -try: - filename = sys.argv[-1] - for i, line in enumerate( file( filename ) ): - if not line or line[0:1] == "#" : continue - fields = line.split('\t') - try: - info_type = fields.pop(0) - - if info_type.upper()=="DATA": - uid = fields.pop(0) - org_num = fields.pop(0) - chr_acc = fields.pop(0) - feature = fields.pop(0) - filetype = fields.pop(0) - path = fields.pop(0).replace("\r","").replace("\n","") - - file_type = filetype - build = org_num - description = uid - else: - continue - except: - continue - - available_files[uid]=(description,path,build,file_type,chr_acc) -except: - print >>sys.stderr, "It appears that the configuration file for this tool is missing." - -#create list of tuples of (displayName,FileName,build) for desired files -desired_files = [] -for uid in uids: - try: - desired_files.append(available_files[uid]) - except: - continue - -#copy first file to contents of given output file -file1_copied = False -while not file1_copied: - try: - first_file = desired_files.pop(0) - except: - print >>sys.stderr, "There were no valid files requested." - sys.exit() - file1_desc, file1_path, file1_build, file1_type,file1_chr_acc = first_file - try: - copyfile(file1_path,out_file1) - print "#File1\t"+file1_desc+"\t"+file1_chr_acc+"\t"+file1_build+"\t"+file1_type - file1_copied = True - except: - print >>sys.stderr, "The file specified is missing." - continue - #print >>sys.stderr, "The file specified is missing." - - -#Tell post-process filter where remaining files reside -for extra_output in desired_files: - file_desc, file_path, file_build, file_type,file_chr_acc = extra_output - print "#NewFile\t"+file_desc+"\t"+file_chr_acc+"\t"+file_build+"\t"+file_path+"\t"+file_type
--- a/tools/data_source/microbial_import.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -<tool id="microbial_import1" name="Get Microbial Data"> - <command interpreter="python">microbial_import.py $CDS,$tRNA,$rRNA,$sequence,$GeneMark,$GeneMarkHMM,$Glimmer3 $output ${GALAXY_DATA_INDEX_DIR}/microbial_data.loc</command> - <inputs> - <param name="kingdom" type="select" label="Select the Desired Kingdom"> - <options from_file="microbial_data.loc" startswith="ORG"> - <column name="name" index="3"/> - <column name="value" index="3"/> - <filter type="unique_value" name="unique" column="3"/> - </options> - </param> - <param name="org" type="select" label="Select the Desired Organism"> - <options from_file="microbial_data.loc" startswith="ORG"> - <column name="name" index="2"/> - <column name="value" index="1"/> - <filter type="param_value" ref="kingdom" name="kingdom" column="3"/> - <filter type="sort_by" column="2"/> - </options> - </param> - <param name="CDS" type="select" label="Select Desired Coding Sequences" display="checkboxes" multiple="True"> - <options from_file="microbial_data.loc" startswith="DATA"> - <column name="name" index="3"/> - <column name="value" index="1"/> - <column name="feature" index="4"/> - <filter type="param_value" ref="org" name="kingdom" column="2"/> - <filter type="static_value" name="feature" value="CDS" column="4"/> - </options> - </param> - <param name="tRNA" type="select" label="Select Desired tRNA" display="checkboxes" multiple="True"> - <options from_file="microbial_data.loc" startswith="DATA"> - <column name="name" index="3"/> - <column name="value" index="1"/> - <column name="feature" index="4"/> - <filter type="param_value" ref="org" name="kingdom" column="2"/> - <filter type="static_value" name="feature" value="tRNA" column="4"/> - </options> - </param> - <param name="rRNA" type="select" label="Select Desired rRNA" display="checkboxes" multiple="True"> - <options from_file="microbial_data.loc" startswith="DATA"> - <column name="name" index="3"/> - <column name="value" index="1"/> - <column name="feature" index="4"/> - <filter type="param_value" ref="org" name="kingdom" column="2"/> - <filter type="static_value" name="feature" value="rRNA" column="4"/> - </options> - </param> - <param name="sequence" type="select" label="Select Desired DNA Sequences" display="checkboxes" multiple="True"> - <options from_file="microbial_data.loc" startswith="DATA"> - <column name="name" index="3"/> - <column name="value" index="1"/> - <column name="feature" index="4"/> - <filter type="param_value" ref="org" name="kingdom" column="2"/> - <filter type="static_value" name="feature" value="sequence" column="4"/> - </options> - </param> - <param name="GeneMark" type="select" label="Select Desired GeneMark Annotations" display="checkboxes" multiple="True"> - <options from_file="microbial_data.loc" startswith="DATA"> - <column name="name" index="3"/> - <column name="value" index="1"/> - <column name="feature" index="4"/> - <filter type="param_value" ref="org" name="kingdom" column="2"/> - <filter type="static_value" name="feature" value="GeneMark" column="4"/> - </options> - </param> - <param name="GeneMarkHMM" type="select" label="Select Desired GeneMarkHMM Annotations" display="checkboxes" multiple="True"> - <options from_file="microbial_data.loc" startswith="DATA"> - <column name="name" index="3"/> - <column name="value" index="1"/> - <column name="feature" index="4"/> - <filter type="param_value" ref="org" name="kingdom" column="2"/> - <filter type="static_value" name="feature" value="GeneMarkHMM" column="4"/> - </options> - </param> - <param name="Glimmer3" type="select" label="Select Desired Glimmer3 Annotations" display="checkboxes" multiple="True"> - <options from_file="microbial_data.loc" startswith="DATA"> - <column name="name" index="3"/> - <column name="value" index="1"/> - <column name="feature" index="4"/> - <filter type="param_value" ref="org" name="kingdom" column="2"/> - <filter type="static_value" name="feature" value="Glimmer3" column="4"/> - </options> - </param> - </inputs> - <outputs> - <data format="bed" name="output"/> - </outputs> - <code file="microbial_import_code.py"/> - <help> - -This tool will allow you to obtain various genomic datasets for any completed Microbial Genome Project as listed at NCBI_. - -.. _NCBI: http://www.ncbi.nlm.nih.gov/genomes/lproks.cgi?view=1 - -Current datasets available include - 1. CDS - 2. tRNA - 3. rRNA - 4. FASTA Sequences - 5. GeneMark Annotations - 6. GeneMarkHMM Annotations - 7. Glimmer3 Annotations - ------ - -Organisms in **bold** are available at the UCSC Browser. - ------ - -.. class:: infomark - -**Note:** Having trouble locating your organism? Click here_ for a list of available species and their location. - -.. _here: http://wiki.g2.bx.psu.edu/Main/Data%20Libraries/Microbes - - </help> -</tool>
--- a/tools/data_source/microbial_import_code.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,154 +0,0 @@ - -def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ): - # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to - # set data.name in exec_after_process(). - microbe_info= {} - orgs = {} - - filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR - for i, line in enumerate( open( filename ) ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - fields = line.split( sep ) - #read each line, if not enough fields, go to next line - try: - info_type = fields.pop(0) - if info_type.upper() == "ORG": - #ORG 12521 Clostridium perfringens SM101 bacteria Firmicutes CP000312,CP000313,CP000314,CP000315 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521 - org_num = fields.pop(0) - name = fields.pop(0) - kingdom = fields.pop(0) - group = fields.pop(0) - chromosomes = fields.pop(0) - info_url = fields.pop(0) - link_site = fields.pop(0) - if org_num not in orgs: - orgs[ org_num ] = {} - orgs[ org_num ][ 'chrs' ] = {} - orgs[ org_num ][ 'name' ] = name - orgs[ org_num ][ 'kingdom' ] = kingdom - orgs[ org_num ][ 'group' ] = group - orgs[ org_num ][ 'chromosomes' ] = chromosomes - orgs[ org_num ][ 'info_url' ] = info_url - orgs[ org_num ][ 'link_site' ] = link_site - elif info_type.upper() == "CHR": - #CHR 12521 CP000315 Clostridium perfringens phage phiSM101, complete genome 38092 110684521 CP000315.1 - org_num = fields.pop(0) - chr_acc = fields.pop(0) - name = fields.pop(0) - length = fields.pop(0) - gi = fields.pop(0) - gb = fields.pop(0) - info_url = fields.pop(0) - chr = {} - chr[ 'name' ] = name - chr[ 'length' ] = length - chr[ 'gi' ] = gi - chr[ 'gb' ] = gb - chr[ 'info_url' ] = info_url - if org_num not in orgs: - orgs[ org_num ] = {} - orgs[ org_num ][ 'chrs' ] = {} - orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr - elif info_type.upper() == "DATA": - #DATA 12521_12521_CDS 12521 CP000315 CDS bed /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed - uid = fields.pop(0) - org_num = fields.pop(0) - chr_acc = fields.pop(0) - feature = fields.pop(0) - filetype = fields.pop(0) - path = fields.pop(0) - data = {} - data[ 'filetype' ] = filetype - data[ 'path' ] = path - data[ 'feature' ] = feature - - if org_num not in orgs: - orgs[ org_num ] = {} - orgs[ org_num ][ 'chrs' ] = {} - if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]: - orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {} - orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data - else: continue - except: continue - for org_num in orgs: - org = orgs[ org_num ] - if org[ 'kingdom' ] not in microbe_info: - microbe_info[ org[ 'kingdom' ] ] = {} - if org_num not in microbe_info[ org[ 'kingdom' ] ]: - microbe_info[ org[ 'kingdom' ] ][org_num] = org - return microbe_info - -#post processing, set build for data and add additional data to history -from galaxy import datatypes, config, jobs, tools -from shutil import copyfile - -def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr): - base_dataset = out_data.items()[0][1] - history = base_dataset.history - if history == None: - print "unknown history!" - return - kingdom = param_dict.get( 'kingdom', None ) - #group = param_dict.get( 'group', None ) - org = param_dict.get( 'org', None ) - - #if not (kingdom or group or org): - if not (kingdom or org): - print "Parameters are not available." - #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values - if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ): - kingdom = kingdom.value - if isinstance( org, tools.parameters.basic.UnvalidatedValue ): - org = org.value - - GALAXY_DATA_INDEX_DIR = app.config.tool_data_path - microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ) - new_stdout = "" - split_stdout = stdout.split("\n") - basic_name = "" - for line in split_stdout: - fields = line.split("\t") - if fields[0] == "#File1": - description = fields[1] - chr = fields[2] - dbkey = fields[3] - file_type = fields[4] - name, data = out_data.items()[0] - data.set_size() - basic_name = data.name - data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")" - data.dbkey = dbkey - data.info = data.name - data = app.datatypes_registry.change_datatype( data, file_type ) - data.init_meta() - data.set_peek() - app.model.context.add( data ) - app.model.context.flush() - elif fields[0] == "#NewFile": - description = fields[1] - chr = fields[2] - dbkey = fields[3] - filepath = fields[4] - file_type = fields[5] - newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library - newdata.set_size() - newdata.extension = file_type - newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")" - app.model.context.add( newdata ) - app.model.context.flush() - app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset ) - history.add_dataset( newdata ) - app.model.context.add( history ) - app.model.context.flush() - try: - copyfile(filepath,newdata.file_name) - newdata.info = newdata.name - newdata.state = jobs.JOB_OK - except: - newdata.info = "The requested file is missing from the system." - newdata.state = jobs.JOB_ERROR - newdata.dbkey = dbkey - newdata.init_meta() - newdata.set_peek() - app.model.context.flush()
--- a/tools/data_source/modmine.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="modENCODE modMine" id="modmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://intermine.modencode.org/" check_values="false" method="get"> - <display>go to modENCODE modMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> -
--- a/tools/data_source/ratmine.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Ratmine" id="ratmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get"> - <display>go to Ratmine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Ratmine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/ucsc_archaea.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -<?xml version="1.0"?> -<tool name="UCSC Archaea" id="ucsc_proxy"> - - <description>table browser</description> - - <command interpreter="python"> - ucsc_proxy.py $param_file $output - </command> - - <inputs action="/ucsc_proxy/index" check_values="false"> - <display>go to UCSC $init $hgta_outputType</display> - <param type="hidden" name="init" value="3"/> - <param type="hidden" name="hgta_outputType" value="bed"/> - </inputs> - - <code file="ucsc_filter.py"/> - - <outputs> - <data name="output" format="bed" /> - </outputs> - -</tool> -
--- a/tools/data_source/ucsc_filter.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -# runs after the job (and after the default post-filter) -from galaxy import datatypes, jobs - -def validate(incoming): - """Validator""" - #raise Exception, 'not quite right' - pass - -def exec_before_job( app, inp_data, out_data, param_dict, tool=None): - """Sets the name of the data""" - outputType = param_dict.get( 'hgta_outputType', None ) - if isinstance(outputType, list) and len(outputType)>0: outputType = outputType[-1] - items = out_data.items() - - for name, data in items: - data.name = param_dict.get('display', data.name) - data.dbkey = param_dict.get('dbkey', '???') - - if outputType == 'wigData': - ext = "wig" - elif outputType == 'maf': - ext = "maf" - elif outputType == 'gff': - ext = "gff" - elif outputType == 'gff3': - ext = "gff3" - else: - if 'hgta_doPrintSelectedFields' in param_dict: - ext = "interval" - elif 'hgta_doGetBed' in param_dict: - ext = "bed" - elif 'hgta_doGenomicDna' in param_dict: - ext = "fasta" - elif 'hgta_doGenePredSequence' in param_dict: - ext = "fasta" - else: - ext = "interval" - - data = app.datatypes_registry.change_datatype(data, ext) - out_data[name] = data - -def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): - """Verifies the data after the run""" - items = out_data.items() - for name, data in items: - data.set_size() - try: - err_msg, err_flag = 'Errors:', False - line_count = 0 - num_lines = len(file(data.file_name).readlines()) - for line in file(data.file_name): - line_count += 1 - if line and line[0] == '-': - if line_count + 3 == num_lines and not err_flag: - err_flag = True - err_msg = "Warning: It appears that your results have been truncated by UCSC. View the bottom of your result file for details." - break - err_flag = True - err_msg = err_msg +" (line "+str(line_count)+")"+line - data.set_peek() - if isinstance(data.datatype, datatypes.interval.Interval) and data.missing_meta(): - data = app.datatypes_registry.change_datatype(data, 'tabular') - out_data[name] = data - if err_flag: - raise Exception(err_msg) - except Exception, exc: - data.info = data.info + "\n" + str(exc) - data.blurb = "error"
--- a/tools/data_source/ucsc_proxy.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -#!/usr/bin/env python -import urllib -import sys, os - -assert sys.version_info[:2] >= ( 2, 4 ) - -CHUNK = 2**20 # 1Mb -MAXSIZE = CHUNK * 100 -if __name__ == '__main__': - - if len(sys.argv) != 3: - print 'Usage ucsc.py input_params output_file' - sys.exit() - - inp_file = sys.argv[1] - out_file = sys.argv[2] - - DEFAULT_URL = "http://genome.ucsc.edu/hgTables?" - - # this must stay a list to allow multiple selections for the same widget name (checkboxes) - params = [] - for line in file(inp_file): - line = line.strip() - if line: - parts = line.split('=') - if len(parts) == 0: - key = "" - value = "" - elif len(parts) == 1: - key = parts[0] - value = "" - else: - key = parts[0] - value = parts[1] - if key == 'display': - print value - # get url from params, refered from proxy.py, initialized by the tool xml - elif key == 'proxy_url': - DEFAULT_URL = value - else: - params.append( (key, value) ) - - #print params - - encoded_params = urllib.urlencode(params) - url = DEFAULT_URL + encoded_params - - #print url - - page = urllib.urlopen(url) - - fp = open(out_file, 'wt') - size = 0 - while 1: - data = page.read(CHUNK) - if not data: - break - if size > MAXSIZE: - fp.write('----- maximum datasize exceeded ---\n') - break - size += len(data) - fp.write(data) - - fp.close() -
--- a/tools/data_source/ucsc_proxy.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -<?xml version="1.0"?> -<tool name="UCSC Main" id="ucsc_proxy"> - - <description>table browser proxy</description> - - <command interpreter="python"> - ucsc_proxy.py $param_file $output - </command> - - <inputs action="/ucsc_proxy/index" check_values="false"> - <display>go to UCSC $init $hgta_outputType</display> - <param type="hidden" name="init" value="1"/> - <param type="hidden" name="hgta_outputType" value="bed"/> - </inputs> - - <code file="ucsc_filter.py"/> - - <outputs> - <data name="output" format="bed" /> - </outputs> - -</tool> -
--- a/tools/data_source/ucsc_tablebrowser.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source"> - <description>table browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to UCSC Table Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="ucsc_table_direct1" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="primaryTable" /> - <value galaxy_value="auto" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - <value galaxy_value="gtf" remote_value="gff" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/ucsc_tablebrowser_archaea.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source"> - <description>table browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to UCSC Table Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="ucsc_table_direct_archaea1" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="primaryTable" /> - <value galaxy_value="auto" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - <value galaxy_value="gtf" remote_value="gff" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/ucsc_tablebrowser_test.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source"> - <description>table browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to UCSC Table Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="ucsc_table_direct_test1" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="primaryTable" /> - <value galaxy_value="auto" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - <value galaxy_value="gtf" remote_value="gff" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" label="${tool.name} on ${organism}: ${table} (#if $description == 'range' then $getVar( 'position', 'unknown position' ) else $description#)"/> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/ucsc_testproxy.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -<?xml version="1.0"?> -<tool name="UCSC Test" id="ucsc_testproxy"> - - <description>table browser proxy</description> - - <command interpreter="python"> - ucsc_proxy.py $param_file $output - </command> - - <inputs action="/ucsc_proxy/index" check_values="false"> - <display>go to UCSC genome-test $init $hgta_outputType</display> - <param type="hidden" name="init" value="2"/> - <param type="hidden" name="hgta_outputType" value="bed"/> - </inputs> - - <code file="ucsc_filter.py"/> - - <outputs> - <data name="output" format="bed" /> - </outputs> - -</tool> -
--- a/tools/data_source/upload.py Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,394 +0,0 @@ -#!/usr/bin/env python -#Processes uploads from the user. - -# WARNING: Changes in this tool (particularly as related to parsing) may need -# to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools - -import urllib, sys, os, gzip, tempfile, shutil, re, gzip, zipfile, codecs, binascii -from galaxy import eggs -# need to import model before sniff to resolve a circular import dependency -import galaxy.model -from galaxy.datatypes.checkers import * -from galaxy.datatypes import sniff -from galaxy.datatypes.binary import * -from galaxy.datatypes.images import Pdf -from galaxy.datatypes.registry import Registry -from galaxy import util -from galaxy.datatypes.util.image_util import * -from galaxy.util.json import * - -try: - import Image as PIL -except ImportError: - try: - from PIL import Image as PIL - except: - PIL = None - -try: - import bz2 -except: - bz2 = None - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg, ret=1 ): - sys.stderr.write( msg ) - sys.exit( ret ) -def file_err( msg, dataset, json_file ): - json_file.write( to_json_string( dict( type = 'dataset', - ext = 'data', - dataset_id = dataset.dataset_id, - stderr = msg ) ) + "\n" ) - # never remove a server-side upload - if dataset.type in ( 'server_dir', 'path_paste' ): - return - try: - os.remove( dataset.path ) - except: - pass -def safe_dict(d): - """ - Recursively clone json structure with UTF-8 dictionary keys - http://mellowmachines.com/blog/2009/06/exploding-dictionary-with-unicode-keys-as-python-arguments/ - """ - if isinstance(d, dict): - return dict([(k.encode('utf-8'), safe_dict(v)) for k,v in d.iteritems()]) - elif isinstance(d, list): - return [safe_dict(x) for x in d] - else: - return d -def check_bam( file_path ): - return Bam().sniff( file_path ) -def check_sff( file_path ): - return Sff().sniff( file_path ) -def check_pdf( file_path ): - return Pdf().sniff( file_path ) -def check_bigwig( file_path ): - return BigWig().sniff( file_path ) -def check_bigbed( file_path ): - return BigBed().sniff( file_path ) -def parse_outputs( args ): - rval = {} - for arg in args: - id, files_path, path = arg.split( ':', 2 ) - rval[int( id )] = ( path, files_path ) - return rval -def add_file( dataset, registry, json_file, output_path ): - data_type = None - line_count = None - converted_path = None - stdout = None - link_data_only = dataset.get( 'link_data_only', 'copy_files' ) - - try: - ext = dataset.file_type - except AttributeError: - file_err( 'Unable to process uploaded file, missing file_type parameter.', dataset, json_file ) - return - - if dataset.type == 'url': - try: - temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dataset.path ), prefix='url_paste' ) - except Exception, e: - file_err( 'Unable to fetch %s\n%s' % ( dataset.path, str( e ) ), dataset, json_file ) - return - dataset.path = temp_name - # See if we have an empty file - if not os.path.exists( dataset.path ): - file_err( 'Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file ) - return - if not os.path.getsize( dataset.path ) > 0: - file_err( 'The uploaded file is empty', dataset, json_file ) - return - if not dataset.type == 'url': - # Already set is_multi_byte above if type == 'url' - try: - dataset.is_multi_byte = util.is_multi_byte( codecs.open( dataset.path, 'r', 'utf-8' ).read( 100 ) ) - except UnicodeDecodeError, e: - dataset.is_multi_byte = False - # Is dataset an image? - image = check_image( dataset.path ) - if image: - if not PIL: - image = None - # get_image_ext() returns None if nor a supported Image type - ext = get_image_ext( dataset.path, image ) - data_type = ext - # Is dataset content multi-byte? - elif dataset.is_multi_byte: - data_type = 'multi-byte char' - ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) - # Is dataset content supported sniffable binary? - elif check_bam( dataset.path ): - ext = 'bam' - data_type = 'bam' - elif check_sff( dataset.path ): - ext = 'sff' - data_type = 'sff' - elif check_pdf( dataset.path ): - ext = 'pdf' - data_type = 'pdf' - elif check_bigwig( dataset.path ): - ext = 'bigwig' - data_type = 'bigwig' - elif check_bigbed( dataset.path ): - ext = 'bigbed' - data_type = 'bigbed' - if not data_type: - # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress - is_gzipped, is_valid = check_gzip( dataset.path ) - if is_gzipped and not is_valid: - file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) - return - elif is_gzipped and is_valid: - if link_data_only == 'copy_files': - # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format - CHUNK_SIZE = 2**20 # 1Mb - fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) - gzipped_file = gzip.GzipFile( dataset.path, 'rb' ) - while 1: - try: - chunk = gzipped_file.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - os.remove( uncompressed ) - file_err( 'Problem decompressing gzipped data', dataset, json_file ) - return - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - gzipped_file.close() - # Replace the gzipped file with the decompressed file if it's safe to do so - if dataset.type in ( 'server_dir', 'path_paste' ): - dataset.path = uncompressed - else: - shutil.move( uncompressed, dataset.path ) - dataset.name = dataset.name.rstrip( '.gz' ) - data_type = 'gzip' - if not data_type and bz2 is not None: - # See if we have a bz2 file, much like gzip - is_bzipped, is_valid = check_bz2( dataset.path ) - if is_bzipped and not is_valid: - file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) - return - elif is_bzipped and is_valid: - if link_data_only == 'copy_files': - # We need to uncompress the temp_name file - CHUNK_SIZE = 2**20 # 1Mb - fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) - bzipped_file = bz2.BZ2File( dataset.path, 'rb' ) - while 1: - try: - chunk = bzipped_file.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - os.remove( uncompressed ) - file_err( 'Problem decompressing bz2 compressed data', dataset, json_file ) - return - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - bzipped_file.close() - # Replace the bzipped file with the decompressed file if it's safe to do so - if dataset.type in ( 'server_dir', 'path_paste' ): - dataset.path = uncompressed - else: - shutil.move( uncompressed, dataset.path ) - dataset.name = dataset.name.rstrip( '.bz2' ) - data_type = 'bz2' - if not data_type: - # See if we have a zip archive - is_zipped = check_zip( dataset.path ) - if is_zipped: - if link_data_only == 'copy_files': - CHUNK_SIZE = 2**20 # 1Mb - uncompressed = None - uncompressed_name = None - unzipped = False - z = zipfile.ZipFile( dataset.path ) - for name in z.namelist(): - if name.endswith('/'): - continue - if unzipped: - stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' - break - fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) - if sys.version_info[:2] >= ( 2, 6 ): - zipped_file = z.open( name ) - while 1: - try: - chunk = zipped_file.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - os.remove( uncompressed ) - file_err( 'Problem decompressing zipped data', dataset, json_file ) - return - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - zipped_file.close() - uncompressed_name = name - unzipped = True - else: - # python < 2.5 doesn't have a way to read members in chunks(!) - try: - outfile = open( uncompressed, 'wb' ) - outfile.write( z.read( name ) ) - outfile.close() - uncompressed_name = name - unzipped = True - except IOError: - os.close( fd ) - os.remove( uncompressed ) - file_err( 'Problem decompressing zipped data', dataset, json_file ) - return - z.close() - # Replace the zipped file with the decompressed file if it's safe to do so - if uncompressed is not None: - if dataset.type in ( 'server_dir', 'path_paste' ): - dataset.path = uncompressed - else: - shutil.move( uncompressed, dataset.path ) - dataset.name = uncompressed_name - data_type = 'zip' - if not data_type: - if check_binary( dataset.path ): - # We have a binary dataset, but it is not Bam, Sff or Pdf - data_type = 'binary' - #binary_ok = False - parts = dataset.name.split( "." ) - if len( parts ) > 1: - ext = parts[1].strip().lower() - if ext not in unsniffable_binary_formats: - file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file ) - return - elif ext in unsniffable_binary_formats and dataset.file_type != ext: - err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext ) - file_err( err_msg, dataset, json_file ) - return - if not data_type: - # We must have a text file - if check_html( dataset.path ): - file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file ) - return - if data_type != 'binary': - if link_data_only == 'copy_files': - in_place = True - if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]: - in_place = False - if dataset.space_to_tab: - line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place ) - else: - line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place ) - if dataset.file_type == 'auto': - ext = sniff.guess_ext( dataset.path, registry.sniff_order ) - else: - ext = dataset.file_type - data_type = ext - # Save job info for the framework - if ext == 'auto' and dataset.ext: - ext = dataset.ext - if ext == 'auto': - ext = 'data' - datatype = registry.get_datatype_by_extension( ext ) - if dataset.type in ( 'server_dir', 'path_paste' ) and link_data_only == 'link_to_files': - # Never alter a file that will not be copied to Galaxy's local file store. - if datatype.dataset_content_needs_grooming( dataset.path ): - err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ - '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' - file_err( err_msg, dataset, json_file ) - return - if link_data_only == 'copy_files' and dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]: - # Move the dataset to its "real" path - if converted_path is not None: - shutil.copy( converted_path, output_path ) - try: - os.remove( converted_path ) - except: - pass - else: - # This should not happen, but it's here just in case - shutil.copy( dataset.path, output_path ) - elif link_data_only == 'copy_files': - shutil.move( dataset.path, output_path ) - # Write the job info - stdout = stdout or 'uploaded %s file' % data_type - info = dict( type = 'dataset', - dataset_id = dataset.dataset_id, - ext = ext, - stdout = stdout, - name = dataset.name, - line_count = line_count ) - json_file.write( to_json_string( info ) + "\n" ) - if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path ): - # Groom the dataset content if necessary - datatype.groom_dataset_content( output_path ) -def add_composite_file( dataset, registry, json_file, output_path, files_path ): - if dataset.composite_files: - os.mkdir( files_path ) - for name, value in dataset.composite_files.iteritems(): - value = util.bunch.Bunch( **value ) - if dataset.composite_file_paths[ value.name ] is None and not value.optional: - file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file ) - break - elif dataset.composite_file_paths[value.name] is not None: - dp = dataset.composite_file_paths[value.name][ 'path' ] - isurl = dp.find('://') <> -1 # todo fixme - if isurl: - try: - temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dp ), prefix='url_paste' ) - except Exception, e: - file_err( 'Unable to fetch %s\n%s' % ( dp, str( e ) ), dataset, json_file ) - return - dataset.path = temp_name - dp = temp_name - if not value.is_binary: - if dataset.composite_file_paths[ value.name ].get( 'space_to_tab', value.space_to_tab ): - sniff.convert_newlines_sep2tabs( dp ) - else: - sniff.convert_newlines( dp ) - shutil.move( dp, os.path.join( files_path, name ) ) - # Move the dataset to its "real" path - shutil.move( dataset.primary_file, output_path ) - # Write the job info - info = dict( type = 'dataset', - dataset_id = dataset.dataset_id, - stdout = 'uploaded %s file' % dataset.file_type ) - json_file.write( to_json_string( info ) + "\n" ) - -def __main__(): - - if len( sys.argv ) < 4: - print >>sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...' - sys.exit( 1 ) - - output_paths = parse_outputs( sys.argv[4:] ) - json_file = open( 'galaxy.json', 'w' ) - - registry = Registry( sys.argv[1], sys.argv[2] ) - - for line in open( sys.argv[3], 'r' ): - dataset = from_json_string( line ) - dataset = util.bunch.Bunch( **safe_dict( dataset ) ) - try: - output_path = output_paths[int( dataset.dataset_id )][0] - except: - print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id - sys.exit( 1 ) - if dataset.type == 'composite': - files_path = output_paths[int( dataset.dataset_id )][1] - add_composite_file( dataset, registry, json_file, output_path, files_path ) - else: - add_file( dataset, registry, json_file, output_path ) - # clean up paramfile - try: - os.remove( sys.argv[3] ) - except: - pass - -if __name__ == '__main__': - __main__()
--- a/tools/data_source/upload.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,213 +0,0 @@ -<?xml version="1.0"?> - -<tool name="Upload File" id="upload1" version="1.1.3"> - <description> - from your computer - </description> - <action module="galaxy.tools.actions.upload" class="UploadToolAction"/> - <command interpreter="python"> - upload.py $GALAXY_ROOT_DIR $GALAXY_DATATYPES_CONF_FILE $paramfile - #set $outnum = 0 - #while $varExists('output%i' % $outnum): - #set $output = $getVar('output%i' % $outnum) - #set $outnum += 1 - #set $file_name = $output.file_name - ## FIXME: This is not future-proof for other uses of external_filename (other than for use by the library upload's "link data" feature) - #if $output.dataset.dataset.external_filename: - #set $file_name = "None" - #end if - ${output.dataset.dataset.id}:${output.files_path}:${file_name} - #end while - </command> - <inputs nginx_upload="true"> - <param name="file_type" type="select" label="File Format" help="Which format? See help below"> - <options from_parameter="tool.app.datatypes_registry.upload_file_formats" transform_lines="[ "%s%s%s" % ( line, self.separator, line ) for line in obj ]"> - <column name="value" index="1"/> - <column name="name" index="0"/> - <filter type="sort_by" column="0"/> - <filter type="add_value" name="Auto-detect" value="auto" index="0"/> - </options> - </param> - <param name="async_datasets" type="hidden" value="None"/> - <upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type" metadata_ref="files_metadata"> - <param name="file_data" type="file" size="30" label="File" ajax-upload="true" help="TIP: Due to browser limitations, uploading files larger than 2GB is guaranteed to fail. To upload large files, use the URL method (below) or FTP (if enabled by the site administrator)."> - <validator type="expression" message="You will need to reselect the file you specified (%s)." substitute_value_in_message="True">not ( ( isinstance( value, unicode ) or isinstance( value, str ) ) and value != "" )</validator> <!-- use validator to post message to user about needing to reselect the file, since most browsers won't accept the value attribute for file inputs --> - </param> - <param name="url_paste" type="text" area="true" size="5x35" label="URL/Text" help="Here you may specify a list of URLs (one per line) or paste the contents of a file."/> - <param name="ftp_files" type="ftpfile" label="Files uploaded via FTP"/> - <param name="space_to_tab" type="select" display="checkboxes" multiple="True" label="Convert spaces to tabs" help="Use this option if you are entering intervals by hand."> - <option value="Yes">Yes</option> - </param> - </upload_dataset> - <param name="dbkey" type="genomebuild" label="Genome" /> - <conditional name="files_metadata" title="Specify metadata" value_from="self:app.datatypes_registry.get_upload_metadata_params" value_ref="file_type" value_ref_in_group="False" /> - <!-- <param name="other_dbkey" type="text" label="Or user-defined Genome" /> --> - </inputs> - <help> - -**Auto-detect** - -The system will attempt to detect Axt, Fasta, Fastqsolexa, Gff, Gff3, Html, Lav, Maf, Tabular, Wiggle, Bed and Interval (Bed with headers) formats. If your file is not detected properly as one of the known formats, it most likely means that it has some format problems (e.g., different number of columns on different rows). You can still coerce the system to set your data to the format you think it should be. You can also upload compressed files, which will automatically be decompressed. - ------ - -**Ab1** - -A binary sequence file in 'ab1' format with a '.ab1' file extension. You must manually select this 'File Format' when uploading the file. - ------ - -**Axt** - -blastz pairwise alignment format. Each alignment block in an axt file contains three lines: a summary line and 2 sequence lines. Blocks are separated from one another by blank lines. The summary line contains chromosomal position and size information about the alignment. It consists of 9 required fields. - ------ - -**Bam** - -A binary file compressed in the BGZF format with a '.bam' file extension. - ------ - -**Bed** - -* Tab delimited format (tabular) -* Does not require header line -* Contains 3 required fields: - - - chrom - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1). - - chromStart - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0. - - chromEnd - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99. - -* May contain 9 additional optional BED fields: - - - name - Defines the name of the BED line. This label is displayed to the left of the BED line in the Genome Browser window when the track is open to full display mode or directly to the left of the item in pack mode. - - score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). - - strand - Defines the strand - either '+' or '-'. - - thickStart - The starting position at which the feature is drawn thickly (for example, the start codon in gene displays). - - thickEnd - The ending position at which the feature is drawn thickly (for example, the stop codon in gene displays). - - itemRgb - An RGB value of the form R,G,B (e.g. 255,0,0). If the track line itemRgb attribute is set to "On", this RBG value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser. - - blockCount - The number of blocks (exons) in the BED line. - - blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount. - - blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount. - -* Example:: - - chr22 1000 5000 cloneA 960 + 1000 5000 0 2 567,488, 0,3512 - chr22 2000 6000 cloneB 900 - 2000 6000 0 2 433,399, 0,3601 - ------ - -**Fasta** - -A sequence in FASTA format consists of a single-line description, followed by lines of sequence data. The first character of the description line is a greater-than (">") symbol in the first column. All lines should be shorter than 80 characters:: - - >sequence1 - atgcgtttgcgtgc - gtcggtttcgttgc - >sequence2 - tttcgtgcgtatag - tggcgcggtga - ------ - -**FastqSolexa** - -FastqSolexa is the Illumina (Solexa) variant of the Fastq format, which stores sequences and quality scores in a single file:: - - @seq1 - GACAGCTTGGTTTTTAGTGAGTTGTTCCTTTCTTT - +seq1 - hhhhhhhhhhhhhhhhhhhhhhhhhhPW@hhhhhh - @seq2 - GCAATGACGGCAGCAATAAACTCAACAGGTGCTGG - +seq2 - hhhhhhhhhhhhhhYhhahhhhWhAhFhSIJGChO - -Or:: - - @seq1 - GAATTGATCAGGACATAGGACAACTGTAGGCACCAT - +seq1 - 40 40 40 40 35 40 40 40 25 40 40 26 40 9 33 11 40 35 17 40 40 33 40 7 9 15 3 22 15 30 11 17 9 4 9 4 - @seq2 - GAGTTCTCGTCGCCTGTAGGCACCATCAATCGTATG - +seq2 - 40 15 40 17 6 36 40 40 40 25 40 9 35 33 40 14 14 18 15 17 19 28 31 4 24 18 27 14 15 18 2 8 12 8 11 9 - ------ - -**Gff** - -GFF lines have nine required fields that must be tab-separated. - ------ - -**Gff3** - -The GFF3 format addresses the most common extensions to GFF, while preserving backward compatibility with previous formats. - ------ - -**Interval (Genomic Intervals)** - -- Tab delimited format (tabular) -- File must start with definition line in the following format (columns may be in any order).:: - - #CHROM START END STRAND - -- CHROM - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1). -- START - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0. -- END - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99. -- STRAND - Defines the strand - either '+' or '-'. - -- Example:: - - #CHROM START END STRAND NAME COMMENT - chr1 10 100 + exon myExon - chrX 1000 10050 - gene myGene - ------ - -**Lav** - -Lav is the primary output format for BLASTZ. The first line of a .lav file begins with #:lav.. - ------ - -**MAF** - -TBA and multiz multiple alignment format. The first line of a .maf file begins with ##maf. This word is followed by white-space-separated "variable=value" pairs. There should be no white space surrounding the "=". - ------ - -**Scf** - -A binary sequence file in 'scf' format with a '.scf' file extension. You must manually select this 'File Format' when uploading the file. - ------ - -**Sff** - -A binary file in 'Standard Flowgram Format' with a '.sff' file extension. - ------ - -**Tabular (tab delimited)** - -Any data in tab delimited format (tabular) - ------ - -**Wig** - -The wiggle format is line-oriented. Wiggle data is preceded by a track definition line, which adds a number of options for controlling the default display of this track. - ------ - -**Other text type** - -Any text file - - </help> -</tool>
--- a/tools/data_source/worm_modencode.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -<?xml version="1.0"?> -<tool name="modENCODE worm" id="modENCODEworm" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://modencode.oicr.on.ca/fgb2/gbrowse/worm" check_values="false" target="_top"> - <display>go to modENCODE worm server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=modENCODEworm" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="dbkey" remote_name="dbkey" missing="ce6" > - <value_translation> - <value galaxy_value="ce6" remote_value="worm" /> - </value_translation> - </request_param> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="d" missing="" /> - <value name="dbkey" missing="ce6" /> - <value name="q" missing="" /> - <value name="s" missing="" /> - <value name="t" missing="" /> - </append_param> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/wormbase.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -<?xml version="1.0"?> -<tool name="Wormbase" id="wormbase" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.wormbase.org/db/seq/gbgff/c_elegans/" check_values="false" target="_top"> - <display>go to Wormbase server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="d" missing="" /> - <value name="dbkey" missing="" /> - <value name="q" missing="" /> - <value name="s" missing="" /> - <value name="t" missing="" /> - </append_param> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/wormbase_test.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -<?xml version="1.0"?> -<tool name="Wormbase" id="wormbase_test" tool_type="data_source"> - <description>test server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://dev.wormbase.org/db/seq/gbrowse/c_elegans/" check_values="false" target="_top"> - <display>go to Wormbase test server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase_test" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing=""> - <append_param separator="&" first_separator="?" join="="> - <value name="d" missing="" /> - <value name="dbkey" missing="" /> - <value name="q" missing="" /> - <value name="s" missing="" /> - <value name="t" missing="" /> - </append_param> - </request_param> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" label="${tool.name} on $getVar( 'q', 'unknown position' )"/> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/data_source/yeastmine.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -<?xml version="1.0"?> -<tool name="YeastMine" id="yeastmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://yeastmine.yeastgenome.org/yeastmine/begin.do" check_values="false" method="get"> - <display>go to yeastMine server $GALAXY_URL</display> - </inputs> - <request_param_translation> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /> <!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool>
--- a/tools/discreteWavelet/execute_dwt_IvC_all.pl Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -#!/usr/bin/perl -w -use warnings; -use IO::Handle; - -$usage = "execute_dwt_IvC_all.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out] \n"; -die $usage unless @ARGV == 4; - -#get the input arguments -my $firstInputFile = $ARGV[0]; -my $secondInputFile = $ARGV[1]; -my $firstOutputFile = $ARGV[2]; -my $secondOutputFile = $ARGV[3]; - -open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n"); -open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n"); -open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n"); -open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n"); -open (ERROR, ">", "error.txt") or die ("Could not open file error.txt \n"); - -#save all error messages into the error file $errorFile using the error file handle ERROR -STDERR -> fdopen( \*ERROR, "w" ) or die ("Could not direct errors to the error file error.txt \n"); - - -print "There are two input data files: \n"; -print "The input data file is: $firstInputFile \n"; -print "The control data file is: $secondInputFile \n"; - -# IvC test -$test = "IvC"; - -# construct an R script to implement the IvC test -print "\n"; - -$r_script = "get_dwt_IvC_test.r"; -print "$r_script \n"; - -# R script -open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n"; -print Rcmd " - ########################################################################################### - # code to do wavelet Indel vs. Control - # signal is the difference I-C; function is second moment i.e. variance from zero not mean - # to perform wavelet transf. of signal, scale-by-scale analysis of the function - # create null bands by permuting the original data series - # generate plots and table matrix of correlation coefficients including p-values - ############################################################################################ - library(\"Rwave\"); - library(\"wavethresh\"); - library(\"waveslim\"); - - options(echo = FALSE) - - # normalize data - norm <- function(data){ - v <- (data - mean(data))/sd(data); - if(sum(is.na(v)) >= 1){ - v <- data; - } - return(v); - } - - dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", wf = \"haar\", boundary = \"reflection\") { - print(test); - print(pdf); - print(table); - - pdf(file = pdf); - final_pvalue = NULL; - title = NULL; - - short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels; - title <- c(\"motif\"); - for (i in 1:short.levels){ - title <- c(title, paste(i, \"moment2\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"), paste(i, \"test\", sep = \"_\")); - } - print(title); - - # loop to compare a vs a - for(i in 1:length(names.short)){ - wave1.dwt = NULL; - m2.dwt = diff = var.dwt = NULL; - out = NULL; - out <- vector(length = length(title)); - - print(names.short[i]); - print(names.long[i]); - - # need exit if not comparing motif(a) vs motif(a) - if (names.short[i] != names.long[i]){ - stop(paste(\"motif\", names.short[i], \"is not the same as\", names.long[i], sep = \" \")); - } - else { - # signal is the difference I-C data sets - diff<-data.short[,i]-data.long[,i]; - - # normalize the signal - diff<-norm(diff); - - # function is 2nd moment - # 2nd moment m_j = 1/N[sum_N(W_j + V_J)^2] = 1/N sum_N(W_j)^2 + (X_bar)^2 - wave1.dwt <- dwt(diff, wf = wf, short.levels, boundary = boundary); - var.dwt <- wave.variance(wave1.dwt); - m2.dwt <- vector(length = short.levels) - for(level in 1:short.levels){ - m2.dwt[level] <- var.dwt[level, 1] + (mean(diff)^2); - } - - # CI bands by permutation of time series - feature1 = feature2 = NULL; - feature1 = data.short[, i]; - feature2 = data.long[, i]; - null = results = med = NULL; - m2_25 = m2_975 = NULL; - - for (k in 1:1000) { - nk_1 = nk_2 = NULL; - m2_null = var_null = NULL; - null.levels = null_wave1 = null_diff = NULL; - nk_1 <- sample(feature1, length(feature1), replace = FALSE); - nk_2 <- sample(feature2, length(feature2), replace = FALSE); - null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels; - null_diff <- nk_1-nk_2; - null_diff <- norm(null_diff); - null_wave1 <- dwt(null_diff, wf = wf, short.levels, boundary = boundary); - var_null <- wave.variance(null_wave1); - m2_null <- vector(length = null.levels); - for(level in 1:null.levels){ - m2_null[level] <- var_null[level, 1] + (mean(null_diff)^2); - } - null= rbind(null, m2_null); - } - - null <- apply(null, 2, sort, na.last = TRUE); - m2_25 <- null[25,]; - m2_975 <- null[975,]; - med <- apply(null, 2, median, na.rm = TRUE); - - # plot - results <- cbind(m2.dwt, m2_25, m2_975); - matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), xlab = \"Wavelet Scale\", ylab = c(\"Wavelet 2nd Moment\", test), main = (names.short[i]), cex.main = 0.75); - abline(h = 1); - - # get pvalues by comparison to null distribution - out <- c(names.short[i]); - for (m in 1:length(m2.dwt)){ - print(paste(\"scale\", m, sep = \" \")); - print(paste(\"m2\", m2.dwt[m], sep = \" \")); - print(paste(\"median\", med[m], sep = \" \")); - out <- c(out, format(m2.dwt[m], digits = 4)); - pv = NULL; - if(is.na(m2.dwt[m])){ - pv <- \"NA\"; - } - else { - if (m2.dwt[m] >= med[m]){ - # R tail test - tail <- \"R\"; - pv <- (length(which(null[, m] >= m2.dwt[m])))/(length(na.exclude(null[, m]))); - } - else{ - if (m2.dwt[m] < med[m]){ - # L tail test - tail <- \"L\"; - pv <- (length(which(null[, m] <= m2.dwt[m])))/(length(na.exclude(null[, m]))); - } - } - } - out <- c(out, pv); - print(pv); - out <- c(out, tail); - } - final_pvalue <-rbind(final_pvalue, out); - print(out); - } - } - - colnames(final_pvalue) <- title; - write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE); - dev.off(); - }\n"; - -print Rcmd " - # execute - # read in data - - inputData <- read.delim(\"$firstInputFile\"); - inputDataNames <- colnames(inputData); - - controlData <- read.delim(\"$secondInputFile\"); - controlDataNames <- colnames(controlData); - - # call the test function to implement IvC test - dwt_cor(inputData, inputDataNames, controlData, controlDataNames, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\"); - print (\"done with the correlation test\"); -\n"; - -print Rcmd "#eof\n"; - -close Rcmd; - -system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n"); -system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n"); -system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n"); - -#close the input and output and error files -close(ERROR); -close(OUTPUT2); -close(OUTPUT1); -close(INPUT2); -close(INPUT1); \ No newline at end of file
--- a/tools/discreteWavelet/execute_dwt_IvC_all.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -<tool id="compute_p-values_second_moments_feature_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Second Moments for Feature Occurrences" version="1.0.0"> - <description>between two datasets using Discrete Wavelet Transfoms</description> - - <command interpreter="perl"> - execute_dwt_IvC_all.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2 - </command> - - <inputs> - <param format="tabular" name="inputFile1" type="data" label="Select the first input file"/> - <param format="tabular" name="inputFile2" type="data" label="Select the second input file"/> - </inputs> - - <outputs> - <data format="tabular" name="outputFile1"/> - <data format="pdf" name="outputFile2"/> - </outputs> - - <help> - -.. class:: infomark - -**What it does** - -This program generates plots and computes table matrix of second moments, p-values, and test orientations at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique. - -The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales. - -The program has two input files obtained as follows: - -For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program. - -The program gives two output files: - -- The first output file is a TABULAR format file representing the second moments, p-values, and test orientations for each feature at each scale. -- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the second moment for that feature at every scale. - ------ - -.. class:: warningmark - -**Note** - -In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file. - ------ - -**Example** - -Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file:: - - deletionHoptspot insertionHoptspot dnaPolPauseFrameshift topoisomeraseCleavageSite translinTarget - 226 403 416 221 1165 - 236 444 380 241 1223 - 242 496 391 195 1116 - 243 429 364 191 1118 - 244 410 371 236 1063 - 230 386 370 217 1087 - 275 404 402 214 1044 - 265 443 365 231 1086 - 255 390 354 246 1114 - 281 384 406 232 1102 - 263 459 369 251 1135 - 280 433 400 251 1159 - 278 385 382 231 1147 - 248 393 389 211 1162 - 251 403 385 246 1114 - 239 383 347 227 1172 - -And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file:: - - deletionHoptspot insertionHoptspot dnaPolPauseFrameshift topoisomeraseCleavageSite translinTarget - 235 374 407 257 1159 - 244 356 353 212 1128 - 233 343 322 204 1110 - 222 329 398 253 1054 - 216 325 328 253 1129 - 257 368 352 221 1115 - 238 360 346 224 1102 - 225 350 377 248 1107 - 230 330 365 236 1132 - 241 389 357 220 1120 - 274 354 392 235 1120 - 250 379 354 210 1102 - 254 329 320 251 1080 - 221 355 406 279 1127 - 224 330 390 249 1129 - 246 366 364 218 1176 - - -We notice that the number of scales here is 4 because 16 = 2^4. Runnig the program on the above input files gives the following output: - -The first output file:: - - motif 1_moment2 1_pval 1_test 2_moment2 2_pval 2_test 3_moment2 3_pval 3_test 4_moment2 4_pval 4_test - - deletionHoptspot 0.8751 0.376 L 1.549 0.168 R 0.6152 0.434 L 0.5735 0.488 R - insertionHoptspot 0.902 0.396 L 1.172 0.332 R 0.6843 0.456 L 1.728 0.213 R - dnaPolPauseFrameshift 1.65 0.013 R 0.267 0.055 L 0.1387 0.124 L 0.4516 0.498 L - topoisomeraseCleavageSite 0.7443 0.233 L 1.023 0.432 R 1.933 0.155 R 1.09 0.3 R - translinTarget 0.5084 0.057 L 0.8219 0.446 L 3.604 0.019 R 0.4377 0.492 L - -The second output file: - -.. image:: ./static/operation_icons/dwt_IvC_1.png -.. image:: ./static/operation_icons/dwt_IvC_2.png -.. image:: ./static/operation_icons/dwt_IvC_3.png -.. image:: ./static/operation_icons/dwt_IvC_4.png -.. image:: ./static/operation_icons/dwt_IvC_5.png - - </help> - -</tool>
--- a/tools/discreteWavelet/execute_dwt_cor_aVa_perClass.pl Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,221 +0,0 @@ -#!/usr/bin/perl -w - -use warnings; -use IO::Handle; - -$usage = "execute_dwt_cor_aVa_perClass.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out] \n"; -die $usage unless @ARGV == 4; - -#get the input arguments -my $firstInputFile = $ARGV[0]; -my $secondInputFile = $ARGV[1]; -my $firstOutputFile = $ARGV[2]; -my $secondOutputFile = $ARGV[3]; - -open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n"); -open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n"); -open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n"); -open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n"); -open (ERROR, ">", "error.txt") or die ("Could not open file error.txt \n"); - -#save all error messages into the error file $errorFile using the error file handle ERROR -STDERR -> fdopen( \*ERROR, "w" ) or die ("Could not direct errors to the error file error.txt \n"); - -print "There are two input data files: \n"; -print "The input data file is: $firstInputFile \n"; -print "The control data file is: $secondInputFile \n"; - -# IvC test -$test = "cor_aVa"; - -# construct an R script to implement the IvC test -print "\n"; - -$r_script = "get_dwt_cor_aVa_test.r"; -print "$r_script \n"; - -open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n"; -print Rcmd " - ################################################################################## - # code to do all correlation tests of form: motif(a) vs. motif(a) - # add code to create null bands by permuting the original data series - # generate plots and table matrix of correlation coefficients including p-values - ################################################################################## - library(\"Rwave\"); - library(\"wavethresh\"); - library(\"waveslim\"); - - options(echo = FALSE) - - # normalize data - norm <- function(data){ - v <- (data - mean(data))/sd(data); - if(sum(is.na(v)) >= 1){ - v <- data; - } - return(v); - } - - dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") { - print(test); - print(pdf); - print(table); - - pdf(file = pdf); - final_pvalue = NULL; - title = NULL; - - short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels; - title <- c(\"motif\"); - for (i in 1:short.levels){ - title <- c(title, paste(i, \"cor\", sep = \"_\"), paste(i, \"pval\", sep = \"_\")); - } - print(title); - - # normalize the raw data - data.short <- apply(data.short, 2, norm); - data.long <- apply(data.long, 2, norm); - - for(i in 1:length(names.short)){ - # Kendall Tau - # DWT wavelet correlation function - # include significance to compare - wave1.dwt = wave2.dwt = NULL; - tau.dwt = NULL; - out = NULL; - - print(names.short[i]); - print(names.long[i]); - - # need exit if not comparing motif(a) vs motif(a) - if (names.short[i] != names.long[i]){ - stop(paste(\"motif\", names.short[i], \"is not the same as\", names.long[i], sep = \" \")); - } - else { - wave1.dwt <- dwt(data.short[, i], wf = wf, short.levels, boundary = boundary); - wave2.dwt <- dwt(data.long[, i], wf = wf, short.levels, boundary = boundary); - tau.dwt <- vector(length=short.levels) - - #perform cor test on wavelet coefficients per scale - for(level in 1:short.levels){ - w1_level = w2_level = NULL; - w1_level <- (wave1.dwt[[level]]); - w2_level <- (wave2.dwt[[level]]); - tau.dwt[level] <- cor.test(w1_level, w2_level, method = method)\$estimate; - } - - # CI bands by permutation of time series - feature1 = feature2 = NULL; - feature1 = data.short[, i]; - feature2 = data.long[, i]; - null = results = med = NULL; - cor_25 = cor_975 = NULL; - - for (k in 1:1000) { - nk_1 = nk_2 = NULL; - null.levels = NULL; - cor = NULL; - null_wave1 = null_wave2 = NULL; - - nk_1 <- sample(feature1, length(feature1), replace = FALSE); - nk_2 <- sample(feature2, length(feature2), replace = FALSE); - null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels; - cor <- vector(length = null.levels); - null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary); - null_wave2 <- dwt(nk_2, wf = wf, short.levels, boundary = boundary); - - for(level in 1:null.levels){ - null_level1 = null_level2 = NULL; - null_level1 <- (null_wave1[[level]]); - null_level2 <- (null_wave2[[level]]); - cor[level] <- cor.test(null_level1, null_level2, method = method)\$estimate; - } - null = rbind(null, cor); - } - - null <- apply(null, 2, sort, na.last = TRUE); - print(paste(\"NAs\", length(which(is.na(null))), sep = \" \")); - cor_25 <- null[25,]; - cor_975 <- null[975,]; - med <- (apply(null, 2, median, na.rm = TRUE)); - - # plot - results <- cbind(tau.dwt, cor_25, cor_975); - matplot(results, type = \"b\", pch = \"*\" , lty = 1, col = c(1, 2, 2), ylim = c(-1, 1), xlab = \"Wavelet Scale\", ylab = \"Wavelet Correlation Kendall's Tau\", main = (paste(test, names.short[i], sep = \" \")), cex.main = 0.75); - abline(h = 0); - - # get pvalues by comparison to null distribution - ### modify pval calculation for error type II of T test #### - out <- (names.short[i]); - for (m in 1:length(tau.dwt)){ - print(paste(\"scale\", m, sep = \" \")); - print(paste(\"tau\", tau.dwt[m], sep = \" \")); - print(paste(\"med\", med[m], sep = \" \")); - out <- c(out, format(tau.dwt[m], digits = 3)); - pv = NULL; - if(is.na(tau.dwt[m])){ - pv <- \"NA\"; - } - else { - if (tau.dwt[m] >= med[m]){ - # R tail test - print(paste(\"R\")); - ### per sv ok to use inequality not strict - pv <- (length(which(null[, m] >= tau.dwt[m])))/(length(na.exclude(null[, m]))); - if (tau.dwt[m] == med[m]){ - print(\"tau == med\"); - print(summary(null[, m])); - } - } - else if (tau.dwt[m] < med[m]){ - # L tail test - print(paste(\"L\")); - pv <- (length(which(null[, m] <= tau.dwt[m])))/(length(na.exclude(null[, m]))); - } - } - out <- c(out, pv); - print(paste(\"pval\", pv, sep = \" \")); - } - final_pvalue <- rbind(final_pvalue, out); - print(out); - } - } - colnames(final_pvalue) <- title; - write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE) - dev.off(); - }\n"; - -print Rcmd " - # execute - # read in data - - inputData1 = inputData2 = NULL; - inputData.short1 = inputData.short2 = NULL; - inputDataNames.short1 = inputDataNames.short2 = NULL; - - inputData1 <- read.delim(\"$firstInputFile\"); - inputData.short1 <- inputData1[, +c(1:ncol(inputData1))]; - inputDataNames.short1 <- colnames(inputData.short1); - - inputData2 <- read.delim(\"$secondInputFile\"); - inputData.short2 <- inputData2[, +c(1:ncol(inputData2))]; - inputDataNames.short2 <- colnames(inputData.short2); - - # cor test for motif(a) in inputData1 vs motif(a) in inputData2 - dwt_cor(inputData.short1, inputDataNames.short1, inputData.short2, inputDataNames.short2, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\"); - print (\"done with the correlation test\"); - - #eof\n"; -close Rcmd; - -system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n"); -system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n"); -system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n"); - -#close the input and output and error files -close(ERROR); -close(OUTPUT2); -close(OUTPUT1); -close(INPUT2); -close(INPUT1); -
--- a/tools/discreteWavelet/execute_dwt_cor_aVa_perClass.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -<tool id="compute_p-values_correlation_coefficients_feature_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Correlation Coefficients for Feature Occurrences" version="1.0.0"> - <description>between two datasets using Discrete Wavelet Transfoms</description> - - <command interpreter="perl"> - execute_dwt_cor_aVa_perClass.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2 - </command> - - <inputs> - <param format="tabular" name="inputFile1" type="data" label="Select the first input file"/> - <param format="tabular" name="inputFile2" type="data" label="Select the second input file"/> - </inputs> - - <outputs> - <data format="tabular" name="outputFile1"/> - <data format="pdf" name="outputFile2"/> - </outputs> - - <help> - -.. class:: infomark - -**What it does** - -This program generates plots and computes table matrix of coefficient correlations and p-values at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique. - -The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales. - -The program has two input files obtained as follows: - -For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program. - -The program gives two output files: - -- The first output file is a TABULAR format file representing the coefficient correlations and p-values for each feature at each scale. -- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the coefficient correlation for that feature at every scale. - ------ - -.. class:: warningmark - -**Note** - -In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file. - ------ - -**Example** - -Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file:: - - deletionHoptspot insertionHoptspot dnaPolPauseFrameshift topoisomeraseCleavageSite translinTarget - 269 366 330 238 1129 - 239 328 327 283 1188 - 254 351 358 297 1151 - 262 371 355 256 1107 - 254 361 352 234 1192 - 265 354 367 240 1182 - 255 359 333 235 1217 - 271 389 387 272 1241 - 240 305 341 249 1159 - 272 351 337 257 1169 - 275 351 337 233 1158 - 305 331 361 253 1172 - 277 341 343 253 1113 - 266 362 355 267 1162 - 235 326 329 241 1230 - 254 335 360 251 1172 - -And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file:: - - deletionHoptspot insertionHoptspot dnaPolPauseFrameshift topoisomeraseCleavageSite translinTarget - 104 146 142 113 478 - 89 146 151 94 495 - 100 176 151 88 435 - 96 163 128 114 468 - 99 138 144 91 513 - 112 126 162 106 468 - 86 127 145 83 491 - 104 145 171 110 496 - 91 121 147 104 469 - 103 141 145 98 458 - 92 134 142 117 468 - 97 146 145 107 471 - 115 121 136 109 470 - 113 135 138 101 491 - 111 150 138 102 451 - 94 128 151 138 481 - - -We notice that the number of scales here is 4 because 16 = 2^4. Running the program on the above input files gives the following output: - -The first output file:: - - motif 1_cor 1_pval 2_cor 2_pval 3_cor 3_pval 4_cor 4_pval - - deletionHoptspot 0.4 0.072 0.143 0.394 -0.667 0.244 1 0.491 - insertionHoptspot 0.343 0.082 -0.0714 0.446 -1 0.12 1 0.502 - dnaPolPauseFrameshift 0.617 0.004 -0.5 0.13 0.667 0.234 1 0.506 - topoisomeraseCleavageSite -0.183 0.242 -0.286 0.256 0.333 0.353 -1 0.489 - translinTarget 0.0167 0.503 -0.0714 0.469 1 0.136 1 0.485 - -The second output file: - -.. image:: ./static/operation_icons/dwt_cor_aVa_1.png -.. image:: ./static/operation_icons/dwt_cor_aVa_2.png -.. image:: ./static/operation_icons/dwt_cor_aVa_3.png -.. image:: ./static/operation_icons/dwt_cor_aVa_4.png -.. image:: ./static/operation_icons/dwt_cor_aVa_5.png - - </help> - -</tool>
--- a/tools/discreteWavelet/execute_dwt_cor_aVb_all.pl Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,223 +0,0 @@ -#!/usr/bin/perl -w - -use warnings; -use IO::Handle; - -$usage = "execute_dwt_cor_aVb_all.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] [PDF.out] \n"; -die $usage unless @ARGV == 4; - -#get the input arguments -my $firstInputFile = $ARGV[0]; -my $secondInputFile = $ARGV[1]; -my $firstOutputFile = $ARGV[2]; -my $secondOutputFile = $ARGV[3]; - -open (INPUT1, "<", $firstInputFile) || die("Could not open file $firstInputFile \n"); -open (INPUT2, "<", $secondInputFile) || die("Could not open file $secondInputFile \n"); -open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n"); -open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n"); -open (ERROR, ">", "error.txt") or die ("Could not open file error.txt \n"); - -#save all error messages into the error file $errorFile using the error file handle ERROR -STDERR -> fdopen( \*ERROR, "w" ) or die ("Could not direct errors to the error file error.txt \n"); - -print "There are two input data files: \n"; -print "The input data file is: $firstInputFile \n"; -print "The control data file is: $secondInputFile \n"; - -# IvC test -$test = "cor_aVb_all"; - -# construct an R script to implement the IvC test -print "\n"; - -$r_script = "get_dwt_cor_aVa_test.r"; -print "$r_script \n"; - - -# R script -open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n"; -print Rcmd " - ################################################################################# - # code to do all correlation tests of form: motif(a) vs. motif(b) - # add code to create null bands by permuting the original data series - # generate plots and table matrix of correlation coefficients including p-values - ################################################################################# - library(\"Rwave\"); - library(\"wavethresh\"); - library(\"waveslim\"); - - options(echo = FALSE) - - # normalize data - norm <- function(data){ - v <- (data - mean(data))/sd(data); - if(sum(is.na(v)) >= 1){ - v <- data; - } - return(v); - } - - dwt_cor <- function(data.short, names.short, data.long, names.long, test, pdf, table, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") { - print(test); - print(pdf); - print(table); - - pdf(file = pdf); - final_pvalue = NULL; - title = NULL; - - short.levels <- wd(data.short[, 1], filter.number = filter, bc = bc)\$nlevels; - title <- c(\"motif1\", \"motif2\"); - for (i in 1:short.levels){ - title <- c(title, paste(i, \"cor\", sep = \"_\"), paste(i, \"pval\", sep = \"_\")); - } - print(title); - - # normalize the raw data - data.short <- apply(data.short, 2, norm); - data.long <- apply(data.long, 2, norm); - - # loop to compare a vs b - for(i in 1:length(names.short)){ - for(j in 1:length(names.long)){ - if(i >= j){ - next; - } - else { - # Kendall Tau - # DWT wavelet correlation function - # include significance to compare - wave1.dwt = wave2.dwt = NULL; - tau.dwt = NULL; - out = NULL; - - print(names.short[i]); - print(names.long[j]); - - # need exit if not comparing motif(a) vs motif(a) - if (names.short[i] == names.long[j]){ - stop(paste(\"motif\", names.short[i], \"is the same as\", names.long[j], sep = \" \")); - } - else { - wave1.dwt <- dwt(data.short[, i], wf = wf, short.levels, boundary = boundary); - wave2.dwt <- dwt(data.long[, j], wf = wf, short.levels, boundary = boundary); - tau.dwt <-vector(length = short.levels) - - # perform cor test on wavelet coefficients per scale - for(level in 1:short.levels){ - w1_level = w2_level = NULL; - w1_level <- (wave1.dwt[[level]]); - w2_level <- (wave2.dwt[[level]]); - tau.dwt[level] <- cor.test(w1_level, w2_level, method = method)\$estimate; - } - - # CI bands by permutation of time series - feature1 = feature2 = NULL; - feature1 = data.short[, i]; - feature2 = data.long[, j]; - null = results = med = NULL; - cor_25 = cor_975 = NULL; - - for (k in 1:1000) { - nk_1 = nk_2 = NULL; - null.levels = NULL; - cor = NULL; - null_wave1 = null_wave2 = NULL; - - nk_1 <- sample(feature1, length(feature1), replace = FALSE); - nk_2 <- sample(feature2, length(feature2), replace = FALSE); - null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels; - cor <- vector(length = null.levels); - null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary); - null_wave2 <- dwt(nk_2, wf = wf, short.levels, boundary = boundary); - - for(level in 1:null.levels){ - null_level1 = null_level2 = NULL; - null_level1 <- (null_wave1[[level]]); - null_level2 <- (null_wave2[[level]]); - cor[level] <- cor.test(null_level1, null_level2, method = method)\$estimate; - } - null = rbind(null, cor); - } - - null <- apply(null, 2, sort, na.last = TRUE); - cor_25 <- null[25, ]; - cor_975 <- null[975, ]; - med <- (apply(null, 2, median, na.rm = TRUE)); - - # plot - results <- cbind(tau.dwt, cor_25, cor_975); - matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), ylim = c(-1, 1), xlab = \"Wavelet Scale\", ylab = \"Wavelet Correlation Kendall's Tau\", main = (paste(test, names.short[i], \"vs.\", names.long[j], sep = \" \")), cex.main = 0.75); - abline(h = 0); - - # get pvalues by comparison to null distribution - ### modify pval calculation for error type II of T test #### - out <- c(names.short[i],names.long[j]); - for (m in 1:length(tau.dwt)){ - print(m); - print(tau.dwt[m]); - out <- c(out, format(tau.dwt[m], digits = 3)); - pv = NULL; - if(is.na(tau.dwt[m])){ - pv <- \"NA\"; - } - else{ - if (tau.dwt[m] >= med[m]){ - # R tail test - pv <- (length(which(null[, m] >= tau.dwt[m])))/(length(na.exclude(null[, m]))); - } - else{ - if (tau.dwt[m] < med[m]){ - # L tail test - pv <- (length(which(null[, m] <= tau.dwt[m])))/(length(na.exclude(null[, m]))); - } - } - } - out <- c(out, pv); - print(pv); - } - final_pvalue <-rbind(final_pvalue, out); - print(out); - } - } - } - } - colnames(final_pvalue) <- title; - write.table(final_pvalue, file = table, sep = \"\\t\", quote = FALSE, row.names = FALSE) - dev.off(); - }\n"; - -print Rcmd " - # execute - # read in data - - inputData1 = inputData2 = NULL; - inputData.short1 = inputData.short2 = NULL; - inputDataNames.short1 = inputDataNames.short2 = NULL; - - inputData1 <- read.delim(\"$firstInputFile\"); - inputData.short1 <- inputData1[, +c(1:ncol(inputData1))]; - inputDataNames.short1 <- colnames(inputData.short1); - - inputData2 <- read.delim(\"$secondInputFile\"); - inputData.short2 <- inputData2[, +c(1:ncol(inputData2))]; - inputDataNames.short2 <- colnames(inputData.short2); - - # cor test for motif(a) in inputData1 vs motif(b) in inputData2 - dwt_cor(inputData.short1, inputDataNames.short1, inputData.short2, inputDataNames.short2, test = \"$test\", pdf = \"$secondOutputFile\", table = \"$firstOutputFile\"); - print (\"done with the correlation test\"); - - #eof\n"; -close Rcmd; - -system("echo \"wavelet IvC test started on \`hostname\` at \`date\`\"\n"); -system("R --no-restore --no-save --no-readline < $r_script > $r_script.out\n"); -system("echo \"wavelet IvC test ended on \`hostname\` at \`date\`\"\n"); - -#close the input and output and error files -close(ERROR); -close(OUTPUT2); -close(OUTPUT1); -close(INPUT2); -close(INPUT1);
--- a/tools/discreteWavelet/execute_dwt_cor_aVb_all.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ -<tool id="compute_p-values_correlation_coefficients_featureA_featureB_occurrences_between_two_datasets_using_discrete_wavelet_transfom" name="Compute P-values and Correlation Coefficients for Occurrences of Two Set of Features" version="1.0.0"> - <description>between two datasets using Discrete Wavelet Transfoms</description> - - <command interpreter="perl"> - execute_dwt_cor_aVb_all.pl $inputFile1 $inputFile2 $outputFile1 $outputFile2 - </command> - - <inputs> - <param format="tabular" name="inputFile1" type="data" label="Select the first input file"/> - <param format="tabular" name="inputFile2" type="data" label="Select the second input file"/> - </inputs> - - <outputs> - <data format="tabular" name="outputFile1"/> - <data format="pdf" name="outputFile2"/> - </outputs> - - <help> - -.. class:: infomark - -**What it does** - -This program generates plots and computes table matrix of coefficient correlations and p-values at multiple scales for the correlation between the occurrences of features in one dataset and their occurrences in another using multiscale wavelet analysis technique. - -The program assumes that the user has two sets of DNA sequences, S1 and S1, each of which consists of one or more sequences of equal length. Each sequence in each set is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales. - -The program has two input files obtained as follows: - -For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S1 and S1, and builds two tabular files representing the count results in each interval of S1 and S1. These are the input files of the program. - -The program gives two output files: - -- The first output file is a TABULAR format file representing the coefficient correlations and p-values for each feature at each scale. -- The second output file is a PDF file consisting of as many figures as the number of features, such that each figure represents the values of the coefficient correlations for that feature at every scale. - ------ - -.. class:: warningmark - -**Note** - -In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file. - ------ - -**Example** - -Counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S1 gives the following tabular file:: - - deletionHoptspot insertionHoptspot dnaPolPauseFrameshift topoisomeraseCleavageSite translinTarget - 82 162 158 79 459 - 111 196 154 75 459 - 98 178 160 79 475 - 113 201 170 113 436 - 113 173 147 95 446 - 107 150 155 84 436 - 106 166 175 96 448 - 113 176 135 106 514 - 113 170 152 87 450 - 95 152 167 93 467 - 91 171 169 118 426 - 84 139 160 100 459 - 92 154 164 104 440 - 100 145 154 98 472 - 91 161 152 71 461 - 117 164 139 97 463 - -And counting the occurrences of 5 features (motifs) in 16 intervals (one line per interval) of the DNA sequences in S2 gives the following tabular file:: - - deletionHoptspot insertionHoptspot dnaPolPauseFrameshift topoisomeraseCleavageSite translinTarget - 269 366 330 238 1129 - 239 328 327 283 1188 - 254 351 358 297 1151 - 262 371 355 256 1107 - 254 361 352 234 1192 - 265 354 367 240 1182 - 255 359 333 235 1217 - 271 389 387 272 1241 - 240 305 341 249 1159 - 272 351 337 257 1169 - 275 351 337 233 1158 - 305 331 361 253 1172 - 277 341 343 253 1113 - 266 362 355 267 1162 - 235 326 329 241 1230 - 254 335 360 251 1172 - - -We notice that the number of scales here is 4 because 16 = 2^4. Running the program on the above input files gives the following output: - -The first output file:: - - motif1 motif2 1_cor 1_pval 2_cor 2_pval 3_cor 3_pval 4_cor 4_pval - - deletionHoptspot insertionHoptspot -0.1 0.346 -0.214 0.338 1 0.127 1 0.467 - deletionHoptspot dnaPolPauseFrameshift 0.167 0.267 -0.214 0.334 1 0.122 1 0.511 - deletionHoptspot topoisomeraseCleavageSite 0.167 0.277 0.143 0.412 -0.667 0.243 1 0.521 - deletionHoptspot translinTarget 0 0.505 0.0714 0.441 1 0.124 1 0.518 - insertionHoptspot dnaPolPauseFrameshift -0.202 0.238 0.143 0.379 -1 0.122 1 0.517 - insertionHoptspot topoisomeraseCleavageSite -0.0336 0.457 0.214 0.29 0.667 0.252 1 0.503 - insertionHoptspot translinTarget 0.0672 0.389 0.429 0.186 -1 0.119 1 0.506 - dnaPolPauseFrameshift topoisomeraseCleavageSite -0.353 0.101 0.357 0.228 0 0.612 -1 0.49 - dnaPolPauseFrameshift translinTarget -0.151 0.303 -0.571 0.09 -0.333 0.37 -1 1 - topoisomeraseCleavageSite translinTarget -0.37 0.077 -0.222 0.297 0.667 0.234 -1 0.471 - -The second output file: - -.. image:: ./static/operation_icons/dwt_cor_aVb_all_1.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_2.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_3.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_4.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_5.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_6.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_7.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_8.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_9.png -.. image:: ./static/operation_icons/dwt_cor_aVb_all_10.png - - - </help> - -</tool>
--- a/tools/discreteWavelet/execute_dwt_var_perClass.pl Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,320 +0,0 @@ -#!/usr/bin/perl -w - -use warnings; -use IO::Handle; -use POSIX qw(floor ceil); - -# example: perl execute_dwt_var_perClass.pl hg18_NCNR_10bp_3flanks_deletionHotspot_data_del.txt deletionHotspot 3flanks del - -$usage = "execute_dwt_var_perClass.pl [TABULAR.in] [TABULAR.out] [TABULAR.out] [PDF.out] \n"; -die $usage unless @ARGV == 4; - -#get the input arguments -my $inputFile = $ARGV[0]; -my $firstOutputFile = $ARGV[1]; -my $secondOutputFile = $ARGV[2]; -my $thirdOutputFile = $ARGV[3]; - -open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n"); -open (OUTPUT1, ">", $firstOutputFile) || die("Could not open file $firstOutputFile \n"); -open (OUTPUT2, ">", $secondOutputFile) || die("Could not open file $secondOutputFile \n"); -open (OUTPUT3, ">", $thirdOutputFile) || die("Could not open file $thirdOutputFile \n"); -open (ERROR, ">", "error.txt") or die ("Could not open file error.txt \n"); - -#save all error messages into the error file $errorFile using the error file handle ERROR -STDERR -> fdopen( \*ERROR, "w" ) or die ("Could not direct errors to the error file error.txt \n"); - -# choosing meaningful names for the output files -$max_dwt = $firstOutputFile; -$pvalue = $secondOutputFile; -$pdf = $thirdOutputFile; - -# count the number of columns in the input file -while($buffer = <INPUT>){ - #if ($buffer =~ m/interval/){ - chomp($buffer); - $buffer =~ s/^#\s*//; - @contrl = split(/\t/, $buffer); - last; - #} -} -print "The number of columns in the input file is: " . (@contrl) . "\n"; -print "\n"; - -# count the number of motifs in the input file -$count = 0; -for ($i = 0; $i < @contrl; $i++){ - $count++; - print "# $contrl[$i]\n"; -} -print "The number of motifs in the input file is: $count \n"; - -# check if the number of motifs is not a multiple of 12, and round up is so -$count2 = ($count/12); -if ($count2 =~ m/(\D)/){ - print "the number of motifs is not a multiple of 12 \n"; - $count2 = ceil($count2); -} -else { - print "the number of motifs is a multiple of 12 \n"; -} -print "There will be $count2 subfiles\n\n"; - -# split infile into subfiles only 12 motif per file for R plotting -for ($x = 1; $x <= $count2; $x++){ - $a = (($x - 1) * 12 + 1); - $b = $x * 12; - - if ($x < $count2){ - print "# data.short $x <- data_test[, +c($a:$b)]; \n"; - } - else{ - print "# data.short $x <- data_test[, +c($a:ncol(data_test)]; \n"; - } -} - -print "\n"; -print "There are 4 output files: \n"; -print "The first output file is a pdf file\n"; -print "The second output file is a max_dwt file\n"; -print "The third output file is a pvalues file\n"; -print "The fourth output file is a test_final_pvalues file\n"; - -# write R script -$r_script = "get_dwt_varPermut_getMax.r"; -print "The R file name is: $r_script \n"; - -open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n"; - -print Rcmd " - ###################################################################### - # plot power spectra, i.e. wavelet variance by class - # add code to create null bands by permuting the original data series - # get class of maximum significant variance per feature - # generate plots and table matrix of variance including p-values - ###################################################################### - library(\"Rwave\"); - library(\"wavethresh\"); - library(\"waveslim\"); - - options(echo = FALSE) - - # normalize data - norm <- function(data){ - v <- (data-mean(data))/sd(data); - if(sum(is.na(v)) >= 1){ - v<-data; - } - return(v); - } - - dwt_var_permut_getMax <- function(data, names, filter = 4, bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") { - max_var = NULL; - matrix = NULL; - title = NULL; - final_pvalue = NULL; - short.levels = NULL; - scale = NULL; - - print(names); - - par(mfcol = c(length(names), length(names)), mar = c(0, 0, 0, 0), oma = c(4, 3, 3, 2), xaxt = \"s\", cex = 1, las = 1); - - short.levels <- wd(data[, 1], filter.number = filter, bc = bc)\$nlevels; - - title <- c(\"motif\"); - for (i in 1:short.levels){ - title <- c(title, paste(i, \"var\", sep = \"_\"), paste(i, \"pval\", sep = \"_\"), paste(i, \"test\", sep = \"_\")); - } - print(title); - - # normalize the raw data - data<-apply(data,2,norm); - - for(i in 1:length(names)){ - for(j in 1:length(names)){ - temp = NULL; - results = NULL; - wave1.dwt = NULL; - out = NULL; - - out <- vector(length = length(title)); - temp <- vector(length = short.levels); - - if(i < j) { - plot(temp, type = \"n\", axes = FALSE, xlab = NA, ylab = NA); - box(col = \"grey\"); - grid(ny = 0, nx = NULL); - } else { - if (i > j){ - plot(temp, type = \"n\", axes = FALSE, xlab = NA, ylab = NA); - box(col = \"grey\"); - grid(ny = 0, nx = NULL); - } else { - - wave1.dwt <- dwt(data[, i], wf = wf, short.levels, boundary = boundary); - - temp_row = (short.levels + 1 ) * -1; - temp_col = 1; - temp <- wave.variance(wave1.dwt)[temp_row, temp_col]; - - #permutations code : - feature1 = NULL; - null = NULL; - var_25 = NULL; - var_975 = NULL; - med = NULL; - - feature1 = data[, i]; - for (k in 1:1000) { - nk_1 = NULL; - null.levels = NULL; - var = NULL; - null_wave1 = NULL; - - nk_1 = sample(feature1, length(feature1), replace = FALSE); - null.levels <- wd(nk_1, filter.number = filter, bc = bc)\$nlevels; - var <- vector(length = length(null.levels)); - null_wave1 <- dwt(nk_1, wf = wf, short.levels, boundary = boundary); - var<- wave.variance(null_wave1)[-8, 1]; - null= rbind(null, var); - } - null <- apply(null, 2, sort, na.last = TRUE); - var_25 <- null[25, ]; - var_975 <- null[975, ]; - med <- (apply(null, 2, median, na.rm = TRUE)); - - # plot - results <- cbind(temp, var_25, var_975); - matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2), axes = F); - - # get pvalues by comparison to null distribution - out <- (names[i]); - for (m in 1:length(temp)){ - print(paste(\"scale\", m, sep = \" \")); - print(paste(\"var\", temp[m], sep = \" \")); - print(paste(\"med\", med[m], sep = \" \")); - pv = tail = NULL; - out <- c(out, format(temp[m], digits = 3)); - if (temp[m] >= med[m]){ - # R tail test - print(\"R\"); - tail <- \"R\"; - pv <- (length(which(null[, m] >= temp[m])))/(length(na.exclude(null[, m]))); - - } else { - if (temp[m] < med[m]){ - # L tail test - print(\"L\"); - tail <- \"L\"; - pv <- (length(which(null[, m] <= temp[m])))/(length(na.exclude(null[, m]))); - } - } - out <- c(out, pv); - print(pv); - out <- c(out, tail); - } - final_pvalue <-rbind(final_pvalue, out); - - - # get variances outside null bands by comparing temp to null - ## temp stores variance for each scale, and null stores permuted variances for null bands - for (n in 1:length(temp)){ - if (temp[n] <= var_975[n]){ - temp[n] <- NA; - } else { - temp[n] <- temp[n]; - } - } - matrix <- rbind(matrix, temp) - } - } - # labels - if (i == 1){ - mtext(names[j], side = 2, line = 0.5, las = 3, cex = 0.25); - } - if (j == 1){ - mtext(names[i], side = 3, line = 0.5, cex = 0.25); - } - if (j == length(names)){ - axis(1, at = (1:short.levels), las = 3, cex.axis = 0.5); - } - } - } - colnames(final_pvalue) <- title; - #write.table(final_pvalue, file = \"test_final_pvalue.txt\", sep = \"\\t\", quote = FALSE, row.names = FALSE, append = TRUE); - - # get maximum variance larger than expectation by comparison to null bands - varnames <- vector(); - for(i in 1:length(names)){ - name1 = paste(names[i], \"var\", sep = \"_\") - varnames <- c(varnames, name1) - } - rownames(matrix) <- varnames; - colnames(matrix) <- (1:short.levels); - max_var <- names; - scale <- vector(length = length(names)); - for (x in 1:nrow(matrix)){ - if (length(which.max(matrix[x, ])) == 0){ - scale[x] <- NA; - } - else{ - scale[x] <- colnames(matrix)[which.max(matrix[x, ])]; - } - } - max_var <- cbind(max_var, scale); - write.table(max_var, file = \"$max_dwt\", sep = \"\\t\", quote = FALSE, row.names = FALSE, append = TRUE); - return(final_pvalue); - }\n"; - -print Rcmd " - # execute - # read in data - - data_test = NULL; - data_test <- read.delim(\"$inputFile\"); - - pdf(file = \"$pdf\", width = 11, height = 8); - - # loop to read and execute on all $count2 subfiles - final = NULL; - for (x in 1:$count2){ - sub = NULL; - sub_names = NULL; - a = NULL; - b = NULL; - - a = ((x - 1) * 12 + 1); - b = x * 12; - - if (x < $count2){ - sub <- data_test[, +c(a:b)]; - sub_names <- colnames(data_test)[a:b]; - final <- rbind(final, dwt_var_permut_getMax(sub, sub_names)); - } - else{ - sub <- data_test[, +c(a:ncol(data_test))]; - sub_names <- colnames(data_test)[a:ncol(data_test)]; - final <- rbind(final, dwt_var_permut_getMax(sub, sub_names)); - - } - } - - dev.off(); - - write.table(final, file = \"$pvalue\", sep = \"\\t\", quote = FALSE, row.names = FALSE); - - #eof\n"; - -close Rcmd; - -system("echo \"wavelet ANOVA started on \`hostname\` at \`date\`\"\n"); -system("R --no-restore --no-save --no-readline < $r_script > $r_script.out"); -system("echo \"wavelet ANOVA ended on \`hostname\` at \`date\`\"\n"); - -#close the input and output and error files -close(ERROR); -close(OUTPUT3); -close(OUTPUT2); -close(OUTPUT1); -close(INPUT); \ No newline at end of file
--- a/tools/discreteWavelet/execute_dwt_var_perClass.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -<tool id="compute_p-values_max_variances_feature_occurrences_in_one_dataset_using_discrete_wavelet_transfom" name="Compute P-values and Max Variances for Feature Occurrences" version="1.0.0"> - <description>in one dataset using Discrete Wavelet Transfoms</description> - - <command interpreter="perl"> - execute_dwt_var_perClass.pl $inputFile $outputFile1 $outputFile2 $outputFile3 - </command> - - <inputs> - <param format="tabular" name="inputFile" type="data" label="Select the input file"/> - </inputs> - - <outputs> - <data format="tabular" name="outputFile1"/> - <data format="tabular" name="outputFile2"/> - <data format="pdf" name="outputFile3"/> - </outputs> - - <help> - -.. class:: infomark - -**What it does** - -This program generates plots and computes table matrix of maximum variances, p-values, and test orientations at multiple scales for the occurrences of a class of features in one dataset of DNA sequences using multiscale wavelet analysis technique. - -The program assumes that the user has one set of DNA sequences, S, which consists of one or more sequences of equal length. Each sequence in S is divided into the same number of multiple intervals n such that n = 2^k, where k is a positive integer and k >= 1. Thus, n could be any value of the set {2, 4, 8, 16, 32, 64, 128, ...}. k represents the number of scales. - -The program has one input file obtained as follows: - -For a given set of features, say motifs, the user counts the number of occurrences of each feature in each interval of each sequence in S, and builds a tabular file representing the count results in each interval of S. This is the input file of the program. - -The program gives three output files: - -- The first output file is a TABULAR format file giving the scales at which each features has a maximum variances. -- The second output file is a TABULAR format file representing the variances, p-values, and test orientation for the occurrences of features at each scale based on a random permutation test and using multiscale wavelet analysis technique. -- The third output file is a PDF file plotting the wavelet variances of each feature at each scale. - ------ - -.. class:: warningmark - -**Note** - -- If the number of features is greater than 12, the program will divide each output file into subfiles, such that each subfile represents the results of a group of 12 features except the last subfile that will represents the results of the rest. For example, if the number of features is 17, the p-values file will consists of two subfiles, the first for the features 1-12 and the second for the features 13-17. As for the PDF file, it will consists of two pages in this case. -- In order to obtain empirical p-values, a random perumtation test is implemented by the program, which results in the fact that the program gives slightly different results each time it is run on the same input file. - ------ - - -**Example** - -Counting the occurrences of 8 features (motifs) in 16 intervals (one line per interval) of set of DNA sequences in S gives the following tabular file:: - - deletionHoptspot insertionHoptspot dnaPolPauseFrameshift indelHotspot topoisomeraseCleavageSite translinTarget vDjRecombinationSignal x-likeSite - 226 403 416 221 1165 832 749 1056 - 236 444 380 241 1223 746 782 1207 - 242 496 391 195 1116 643 770 1219 - 243 429 364 191 1118 694 783 1223 - 244 410 371 236 1063 692 805 1233 - 230 386 370 217 1087 657 787 1215 - 275 404 402 214 1044 697 831 1188 - 265 443 365 231 1086 694 782 1184 - 255 390 354 246 1114 642 773 1176 - 281 384 406 232 1102 719 787 1191 - 263 459 369 251 1135 643 810 1215 - 280 433 400 251 1159 701 777 1151 - 278 385 382 231 1147 697 707 1161 - 248 393 389 211 1162 723 759 1183 - 251 403 385 246 1114 752 776 1153 - 239 383 347 227 1172 759 789 1141 - -We notice that the number of scales here is 4 because 16 = 2^4. Runnig the program on the above input file gives the following 3 output files: - -The first output file:: - - motifs max_var at scale - deletionHoptspot NA - insertionHoptspot NA - dnaPolPauseFrameshift NA - indelHotspot NA - topoisomeraseCleavageSite 3 - translinTarget NA - vDjRecombinationSignal NA - x.likeSite NA - -The second output file:: - - motif 1_var 1_pval 1_test 2_var 2_pval 2_test 3_var 3_pval 3_test 4_var 4_pval 4_test - - deletionHoptspot 0.457 0.048 L 1.18 0.334 R 1.61 0.194 R 3.41 0.055 R - insertionHoptspot 0.556 0.109 L 1.34 0.272 R 1.59 0.223 R 2.02 0.157 R - dnaPolPauseFrameshift 1.42 0.089 R 0.66 0.331 L 0.421 0.305 L 0.121 0.268 L - indelHotspot 0.373 0.021 L 1.36 0.254 R 1.24 0.301 R 4.09 0.047 R - topoisomeraseCleavageSite 0.305 0.002 L 0.936 0.489 R 3.78 0.01 R 1.25 0.272 R - translinTarget 0.525 0.061 L 1.69 0.11 R 2.02 0.131 R 0.00891 0.069 L - vDjRecombinationSignal 0.68 0.138 L 0.957 0.46 R 2.35 0.071 R 1.03 0.357 R - x.likeSite 0.928 0.402 L 1.33 0.261 R 0.735 0.431 L 0.783 0.422 R - -The third output file: - -.. image:: ./static/operation_icons/dwt_var_perClass.png - - </help> - -</tool>
--- a/tools/discreteWavelet/execute_dwt_var_perFeature.pl Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,199 +0,0 @@ -#!/usr/bin/perl -w -# Author: Erika Kvikstad - -use warnings; -use IO::Handle; -use POSIX qw(floor ceil); - -$usage = "execute_dwt_var_perFeature.pl [TABULAR.in] [FEATURE] [ALPHA] [TABULAR.out] [PDF.out] \n"; -die $usage unless @ARGV == 5; - -#get the input arguments -my $inputFile = $ARGV[0]; -my @features = split(/,/,$ARGV[1]); -my $features_count = scalar(@features); -my $alpha = $ARGV[2]; -my $outFile1 = $ARGV[3]; -my $outFile2 = $ARGV[4]; - -open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n"); -open (OUTPUT2, ">", $outFile1) || die("Could not open file $outFile1 \n"); -open (OUTPUT3, ">", $outFile2) || die("Could not open file $outFile2 \n"); -#open (ERROR, ">", "error.txt") or die ("Could not open file error.txt \n"); - -# choosing meaningful names for the output files -$pvalue = $outFile1; -$pdf = $outFile2; - -# write R script -$r_script = "get_dwt_varPermut.r"; - -open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n"; - -print Rcmd " - ###################################################################### - # plot multiscale wavelet variance - # create null bands by permuting the original data series - # generate plots and table of wavelet variance including p-values - ###################################################################### - options(echo = FALSE) - #library(\"Rwave\"); - #library(\"wavethresh\"); - #library(\"waveslim\"); - # turn off diagnostics for de-bugging only, turn back on for functional tests on test - require(\"Rwave\",quietly=TRUE,warn.conflicts = FALSE); - require(\"wavethresh\",quietly=TRUE,warn.conflicts = FALSE); - require(\"waveslim\",quietly=TRUE,warn.conflicts = FALSE); - require(\"bitops\",quietly=TRUE,warn.conflicts = FALSE); - - # to determine if data is properly formatted 2^N observations - is.power2<- function(x){x && !(bitAnd(x,x - 1));} - - # dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function - dwt_var_permut_getMax <- function(data, names, alpha, filter = 1,family=\"DaubExPhase\", bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") { - max_var = NULL; - matrix = NULL; - title = NULL; - final_pvalue = NULL; - J = NULL; - scale = NULL; - out = NULL; - - print(class(data)); - print(names); - print(alpha); - - par(mar=c(5,4,4,3),oma = c(4, 4, 3, 2), xaxt = \"s\", cex = 1, las = 1); - - title<-c(\"Wavelet\",\"Variance\",\"Pvalue\",\"Test\"); - print(title); - - for(i in 1:length(names)){ - temp = NULL; - results = NULL; - wave1.dwt = NULL; - - # if data fails formatting check, do something - - print(is.numeric(as.matrix(data)[, i])); - if(!is.numeric(as.matrix(data)[, i])) - stop(\"data must be a numeric vector\"); - - print(length(as.matrix(data)[, i])); - print(is.power2(length(as.matrix(data)[, i]))); - if(!is.power2(length(as.matrix(data)[, i]))) - stop(\"data length must be a power of two\"); - - - J <- wd(as.matrix(data)[, i], filter.number = filter, family=family, bc = bc)\$nlevels; - print(J); - temp <- vector(length = J); - wave1.dwt <- dwt(as.matrix(data)[, i], wf = wf, J, boundary = boundary); - #print(wave1.dwt); - - temp <- wave.variance(wave1.dwt)[-(J+1), 1]; - print(temp); - - #permutations code : - feature1 = NULL; - null = NULL; - var_lower=limit_lower=NULL; - var_upper=limit_upper=NULL; - med = NULL; - - limit_lower = alpha/2*1000; - print(limit_lower); - limit_upper = (1-alpha/2)*1000; - print(limit_upper); - - feature1 = as.matrix(data)[,i]; - for (k in 1:1000) { - nk_1 = NULL; - null.levels = NULL; - var = NULL; - null_wave1 = NULL; - - nk_1 = sample(feature1, length(feature1), replace = FALSE); - null.levels <- wd(nk_1, filter.number = filter,family=family ,bc = bc)\$nlevels; - var <- vector(length = length(null.levels)); - null_wave1 <- dwt(nk_1, wf = wf, J, boundary = boundary); - var<- wave.variance(null_wave1)[-(null.levels+1), 1]; - null= rbind(null, var); - } - null <- apply(null, 2, sort, na.last = TRUE); - var_lower <- null[limit_lower, ]; - var_upper <- null[limit_upper, ]; - med <- (apply(null, 2, median, na.rm = TRUE)); - - # plot - results <- cbind(temp, var_lower, var_upper); - print(results); - matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2),xaxt='n',xlab=\"Wavelet Scale\",ylab=\"Wavelet variance\" ); - mtext(names[i], side = 3, line = 0.5, cex = 1); - axis(1, at = 1:J , labels=c(2^(0:(J-1))), las = 3, cex.axis = 1); - - # get pvalues by comparison to null distribution - #out <- (names[i]); - for (m in 1:length(temp)){ - print(paste(\"scale\", m, sep = \" \")); - print(paste(\"var\", temp[m], sep = \" \")); - print(paste(\"med\", med[m], sep = \" \")); - pv = tail =scale = NULL; - scale=2^(m-1); - #out <- c(out, format(temp[m], digits = 3)); - if (temp[m] >= med[m]){ - # R tail test - print(\"R\"); - tail <- \"R\"; - pv <- (length(which(null[, m] >= temp[m])))/(length(na.exclude(null[, m]))); - - } else { - if (temp[m] < med[m]){ - # L tail test - print(\"L\"); - tail <- \"L\"; - pv <- (length(which(null[, m] <= temp[m])))/(length(na.exclude(null[, m]))); - } - } - print(pv); - out<-rbind(out,c(paste(\"Scale\", scale, sep=\"_\"),format(temp[m], digits = 3),pv,tail)); - } - final_pvalue <-rbind(final_pvalue, out); - } - colnames(final_pvalue) <- title; - return(final_pvalue); -}\n"; - -print Rcmd " -# execute -# read in data -data_test = final = NULL; -sub = sub_names = NULL; -data_test <- read.delim(\"$inputFile\",header=FALSE); -pdf(file = \"$pdf\", width = 11, height = 8)\n"; - -for ($x=0;$x<$features_count;$x++){ - $feature=$features[$x]; -print Rcmd " - if ($feature > ncol(data_test)) - stop(\"column $feature doesn't exist\"); - sub<-data_test[,$feature]; - #sub_names <- colnames(data_test); - sub_names<-colnames(data_test)[$feature]; - final <- rbind(final,dwt_var_permut_getMax(sub, sub_names,$alpha));\n"; -} - -print Rcmd " - - dev.off(); - write.table(final, file = \"$pvalue\", sep = \"\\t\", quote = FALSE, row.names = FALSE); - -#eof\n"; - -close Rcmd; -system("R --no-restore --no-save --no-readline < $r_script > $r_script.out"); - -#close the input and output and error files -close(OUTPUT3); -close(OUTPUT2); -close(INPUT);
--- a/tools/discreteWavelet/execute_dwt_var_perFeature.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -<tool id="dwt_var1" name="Wavelet variance" version="1.0.0"> - <description>using Discrete Wavelet Transfoms</description> - - <command interpreter="perl"> - execute_dwt_var_perFeature.pl $inputFile $feature $alpha $outputFile1 $outputFile2 - </command> - - <inputs> - <param format="tabular" name="inputFile" type="data" label="Select data"/> - <param name="feature" label="Feature column" type="data_column" data_ref="inputFile" multiple="true" help="Please select at least one column"/> - <param name="alpha" size="10" type="float" value="0.05" label="alpha (significance level)" /> - </inputs> - - <outputs> - <data format="tabular" name="outputFile1"/> - <data format="pdf" name="outputFile2"/> - </outputs> - <tests> - <test> - <param name="inputFile" value="discreteWavelet/dwt_var1/dwt_var_in.interval"/> - <param name="feature" value="4"/> - <param name="alpha" value="0.05"/> - <output name="outputFile1" file="discreteWavelet/dwt_var1/dwt_var_out1.tabular" compare="re_match"/> - <output name="outputFile2" file="discreteWavelet/dwt_var1/dwt_var_out2.pdf" compare="sim_size"/> - </test> - </tests> - - <help> - -.. class:: infomark - -**What it does** - -This tool computes the scale-specific variance in wavelet coeffients obtained from the discrete wavelet transform of a feature of interest. - -Input data consists of an ordered series of data, S, equispaced and of sample size N, where N is of the form N = 2^k, and k is a positive integer and represents the number of levels of wavelet decomposition. S could be a time series, or a set of DNA sequences. The user calculates a statistic of interest for each feature in each interval of S: say, expression level of a particular gene in a time course, or the number of LINE elements per window across a chromosome. This tool then performs a discrete wavelet transform of the feature of interest, and plots the resulting variance in wavelet coefficients per wavelet scale. In addition, statistical significance of variances are determined by 1,000 random permutations of the intervals in S, to generate null bands (representing the user provided alpha value) corresponding to the empirical distribution of wavelet variances under the null hypothesis of no inherent order to the series in S. - -This tool generates two output files: - -- The first output file is a TABULAR format file representing the variances, p-values, and test orientation for the features at each wavelet scale based on a random permutation test. -- The second output file is a PDF image plotting the wavelet variances of each feature at each scale. - ------ - -.. class:: warningmark - -**Note** -In order to obtain empirical p-values, a random perumtation scheme is implemented by the tool, such that the output may generate slightly variations in results each time it is run on the same input file. - ------ - - -**Example** - - </help> - -</tool>
--- a/tools/emboss_5/emboss_antigenic.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -<tool id="EMBOSS: antigenic1" name="antigenic" version="5.0.0"> - <description>Predicts potentially antigenic regions of a protein sequence, using the method of Kolaskar and Tongaonkar.</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>antigenic -sequence $input1 -outfile $out_file1 -minlen $minlen -rformat2 $out_format1 -auto</command> - <inputs> - <param format="data" name="input1" type="data"> - <label>On query</label> - </param> - <param name="minlen" size="4" type="text" value="6"> - <label>Minimum Length of region</label> - </param> - <param name="out_format1" type="select"> - <label>Output format</label> - <option value="gff">GFF</option> - <option value="pir">PIR</option> - <option value="swiss">SwissProt</option> - <option value="dbmotif">DbMotif</option> - <option value="diffseq">diffseq</option> - <option value="excel">Excel (TAB Delimited)</option> - <option value="feattable">FeatTable</option> - <option value="motif">Motif</option> - <option value="nametable">NameTable</option> - <option value="regions">Regions</option> - <option value="seqtable">SeqTable</option> - <option value="simple">SRS simple</option> - <option value="srs">SRS</option> - <option value="table">Table</option> - <option value="tagseq">Tagseq</option> - <option value="antigenic">Antigenic Output File</option> - </param> - </inputs> - <outputs> - <data format="antigenic" name="out_file1" /> - </outputs> - <tests> - <test> - <param name="input1" value="2.fasta"/> - <param name="minlen" value="6"/> - <param name="out_format1" value="excel"/> - <output name="out_file1" file="emboss_antigenic_out.tabular"/> - </test> - </tests> - <code file="emboss_format_corrector.py" /> - <help> - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/antigenic.html - </help> -</tool> \ No newline at end of file
--- a/tools/emboss_5/emboss_backtranseq.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,220 +0,0 @@ -<tool id="EMBOSS: backtranseq2" name="backtranseq" version="5.0.0"> - <description>Back translate a protein sequence</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>backtranseq -sequence $input1 -outfile $out_file1 -cfile $cfile -osformat2 $out_format1 -auto</command> - <inputs> - <param format="fasta" name="input1" type="data"> - <label>On query</label> - </param> - <param name="cfile" type="select"> - <label>Codon Usage File</label> - <option value="Ehum.cut">Ehum.cut</option> - <option value="Eacc.cut">Eacc.cut</option> - <option value="Eadenovirus5.cut">Eadenovirus5.cut</option> - <option value="Eadenovirus7.cut">Eadenovirus7.cut</option> - <option value="Eaidlav.cut">Eaidlav.cut</option> - <option value="Eanasp.cut">Eanasp.cut</option> - <option value="Eani.cut">Eani.cut</option> - <option value="Eani_h.cut">Eani_h.cut</option> - <option value="Eanidmit.cut">Eanidmit.cut</option> - <option value="Easn.cut">Easn.cut</option> - <option value="Eath.cut">Eath.cut</option> - <option value="Eatu.cut">Eatu.cut</option> - <option value="Eavi.cut">Eavi.cut</option> - <option value="Ebja.cut">Ebja.cut</option> - <option value="Ebly.cut">Ebly.cut</option> - <option value="Ebme.cut">Ebme.cut</option> - <option value="Ebmo.cut">Ebmo.cut</option> - <option value="Ebna.cut">Ebna.cut</option> - <option value="Ebov.cut">Ebov.cut</option> - <option value="Ebovsp.cut">Ebovsp.cut</option> - <option value="Ebst.cut">Ebst.cut</option> - <option value="Ebsu.cut">Ebsu.cut</option> - <option value="Ebsu_h.cut">Ebsu_h.cut</option> - <option value="Ecac.cut">Ecac.cut</option> - <option value="Ecal.cut">Ecal.cut</option> - <option value="Eccr.cut">Eccr.cut</option> - <option value="Ecel.cut">Ecel.cut</option> - <option value="Echi.cut">Echi.cut</option> - <option value="Echicken.cut">Echicken.cut</option> - <option value="Echisp.cut">Echisp.cut</option> - <option value="Echk.cut">Echk.cut</option> - <option value="Echmp.cut">Echmp.cut</option> - <option value="Echnt.cut">Echnt.cut</option> - <option value="Echos.cut">Echos.cut</option> - <option value="Echzm.cut">Echzm.cut</option> - <option value="Echzmrubp.cut">Echzmrubp.cut</option> - <option value="Ecpx.cut">Ecpx.cut</option> - <option value="Ecre.cut">Ecre.cut</option> - <option value="Ecrisp.cut">Ecrisp.cut</option> - <option value="Ectr.cut">Ectr.cut</option> - <option value="Edayhoff.cut">Edayhoff.cut</option> - <option value="Eddi.cut">Eddi.cut</option> - <option value="Eddi_h.cut">Eddi_h.cut</option> - <option value="Edog.cut">Edog.cut</option> - <option value="Edro.cut">Edro.cut</option> - <option value="Edro_h.cut">Edro_h.cut</option> - <option value="Edrosophila.cut">Edrosophila.cut</option> - <option value="Eeca.cut">Eeca.cut</option> - <option value="Eeco.cut">Eeco.cut</option> - <option value="Eeco_h.cut">Eeco_h.cut</option> - <option value="Eecoli.cut">Eecoli.cut</option> - <option value="Ef1.cut">Ef1.cut</option> - <option value="Efish.cut">Efish.cut</option> - <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option> - <option value="Eham.cut">Eham.cut</option> - <option value="Ehha.cut">Ehha.cut</option> - <option value="Ehin.cut">Ehin.cut</option> - <option value="Ehma.cut">Ehma.cut</option> - <option value="Ehuman.cut">Ehuman.cut</option> - <option value="Ekla.cut">Ekla.cut</option> - <option value="Ekpn.cut">Ekpn.cut</option> - <option value="Ella.cut">Ella.cut</option> - <option value="Emac.cut">Emac.cut</option> - <option value="Emaize.cut">Emaize.cut</option> - <option value="Emam_h.cut">Emam_h.cut</option> - <option value="Emixlg.cut">Emixlg.cut</option> - <option value="Emouse.cut">Emouse.cut</option> - <option value="Emsa.cut">Emsa.cut</option> - <option value="Emse.cut">Emse.cut</option> - <option value="Emta.cut">Emta.cut</option> - <option value="Emtu.cut">Emtu.cut</option> - <option value="Emus.cut">Emus.cut</option> - <option value="Emussp.cut">Emussp.cut</option> - <option value="Emva.cut">Emva.cut</option> - <option value="Emze.cut">Emze.cut</option> - <option value="Emzecp.cut">Emzecp.cut</option> - <option value="Encr.cut">Encr.cut</option> - <option value="Eneu.cut">Eneu.cut</option> - <option value="Engo.cut">Engo.cut</option> - <option value="Eoncsp.cut">Eoncsp.cut</option> - <option value="Epae.cut">Epae.cut</option> - <option value="Epea.cut">Epea.cut</option> - <option value="Epet.cut">Epet.cut</option> - <option value="Epfa.cut">Epfa.cut</option> - <option value="Ephix174.cut">Ephix174.cut</option> - <option value="Ephv.cut">Ephv.cut</option> - <option value="Ephy.cut">Ephy.cut</option> - <option value="Epig.cut">Epig.cut</option> - <option value="Epolyomaa2.cut">Epolyomaa2.cut</option> - <option value="Epombe.cut">Epombe.cut</option> - <option value="Epombecai.cut">Epombecai.cut</option> - <option value="Epot.cut">Epot.cut</option> - <option value="Eppu.cut">Eppu.cut</option> - <option value="Epse.cut">Epse.cut</option> - <option value="Epsy.cut">Epsy.cut</option> - <option value="Epvu.cut">Epvu.cut</option> - <option value="Erab.cut">Erab.cut</option> - <option value="Erabbit.cut">Erabbit.cut</option> - <option value="Erabsp.cut">Erabsp.cut</option> - <option value="Erat.cut">Erat.cut</option> - <option value="Eratsp.cut">Eratsp.cut</option> - <option value="Erca.cut">Erca.cut</option> - <option value="Erhm.cut">Erhm.cut</option> - <option value="Eric.cut">Eric.cut</option> - <option value="Erle.cut">Erle.cut</option> - <option value="Erme.cut">Erme.cut</option> - <option value="Ersp.cut">Ersp.cut</option> - <option value="Esalsp.cut">Esalsp.cut</option> - <option value="Esau.cut">Esau.cut</option> - <option value="Esco.cut">Esco.cut</option> - <option value="Esgi.cut">Esgi.cut</option> - <option value="Eshp.cut">Eshp.cut</option> - <option value="Eshpsp.cut">Eshpsp.cut</option> - <option value="Esli.cut">Esli.cut</option> - <option value="Eslm.cut">Eslm.cut</option> - <option value="Esma.cut">Esma.cut</option> - <option value="Esmi.cut">Esmi.cut</option> - <option value="Esmu.cut">Esmu.cut</option> - <option value="Esoy.cut">Esoy.cut</option> - <option value="Espi.cut">Espi.cut</option> - <option value="Espn.cut">Espn.cut</option> - <option value="Espo.cut">Espo.cut</option> - <option value="Espo_h.cut">Espo_h.cut</option> - <option value="Espu.cut">Espu.cut</option> - <option value="Esta.cut">Esta.cut</option> - <option value="Esty.cut">Esty.cut</option> - <option value="Esus.cut">Esus.cut</option> - <option value="Esv40.cut">Esv40.cut</option> - <option value="Esyhsp.cut">Esyhsp.cut</option> - <option value="Esynsp.cut">Esynsp.cut</option> - <option value="Etbr.cut">Etbr.cut</option> - <option value="Etcr.cut">Etcr.cut</option> - <option value="Eter.cut">Eter.cut</option> - <option value="Etetsp.cut">Etetsp.cut</option> - <option value="Etob.cut">Etob.cut</option> - <option value="Etobcp.cut">Etobcp.cut</option> - <option value="Etom.cut">Etom.cut</option> - <option value="Etrb.cut">Etrb.cut</option> - <option value="Evco.cut">Evco.cut</option> - <option value="Ewht.cut">Ewht.cut</option> - <option value="Exel.cut">Exel.cut</option> - <option value="Exenopus.cut">Exenopus.cut</option> - <option value="Eyeast.cut">Eyeast.cut</option> - <option value="Eyeastcai.cut">Eyeastcai.cut</option> - <option value="Eyen.cut">Eyen.cut</option> - <option value="Eysc.cut">Eysc.cut</option> - <option value="Eysc_h.cut">Eysc_h.cut</option> - <option value="Eyscmt.cut">Eyscmt.cut</option> - <option value="Eysp.cut">Eysp.cut</option> - <option value="Ezebrafish.cut">Ezebrafish.cut</option> - <option value="Ezma.cut">Ezma.cut</option> - </param> - <param name="out_format1" type="select"> - <label>Output Sequence File Format</label> - <option value="fasta">FASTA (m)</option> - <option value="acedb">ACeDB (m)</option> - <option value="asn1">ASN.1 (m)</option> - <option value="clustal">Clustal (m)</option> - <option value="codata">CODATA (m)</option> - <option value="embl">EMBL (m)</option> - <option value="fitch">Fitch (m)</option> - <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option> - <option value="genbank">GENBANK (m)</option> - <option value="gff">GFF (m)</option> - <option value="hennig86">Hennig86 (m)</option> - <option value="ig">Intelligenetics (m)</option> - <option value="jackknifer">Jackknifer (m)</option> - <option value="jackknifernon">Jackknifernon (m)</option> - <option value="mega">Mega (m)</option> - <option value="meganon">Meganon (m)</option> - <option value="msf">Wisconsin Package GCG's MSF (m)</option> - <option value="pir">NBRF (PIR) (m)</option> - <option value="ncbi">NCBI style FASTA (m)</option> - <option value="nexus">Nexus/PAUP (m)</option> - <option value="nexusnon">Nexusnon/PAUPnon (m)</option> - <option value="phylip">PHYLIP interleaved (m)</option> - <option value="phylipnon">PHYLIP non-interleaved (m)</option> - <option value="selex">SELEX (m)</option> - <option value="staden">Staden (s)</option> - <option value="strider">DNA strider (m)</option> - <option value="swiss">SwisProt entry (m)</option> - <option value="text">Plain sequence (s)</option> - <option value="treecon">Treecon (m)</option> - </param> - </inputs> - <outputs> - <data format="txt" name="out_file1" /> - </outputs> - <tests> - <test> - <param name="input1" value="2.fasta"/> - <param name="cfile" value="Ehum.cut"/> - <param name="out_format1" value="fasta"/> - <output name="out_file1" file="emboss_backtranseq_out.fasta"/> - </test> - </tests> - <code file="emboss_format_corrector.py" /> - <help> - -.. class:: warningmark - -The input dataset needs to be sequences. - ------ - - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/backtranseq.html - </help> -</tool>
--- a/tools/emboss_5/emboss_banana.pl Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -#! /usr/bin/perl -w -use strict; - -my $cmd_string = join (" ",@ARGV); -#my $cmd_string = "/home/djb396/temp/emboss/bin/banana -sequence /home/djb396/universe-prototype/test.fasta -outfile result.txt -graph png -goutfile results -auto"; -my $results = `$cmd_string`; -my @files = split("\n",$results); -foreach my $thisLine (@files) -{ - if ($thisLine =~ /Created /i) - { - $thisLine =~ /[\w|\.]+$/; - $thisLine =$&; - print "outfile: $thisLine\n"; - } -}
--- a/tools/emboss_5/emboss_banana.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="EMBOSS: banana3" name="banana" version="5.0.0"> - <description>Bending and curvature plot in B-DNA</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>banana -sequence $input1 -outfile $out_file1 -graph none -auto</command> - <inputs> - <param format="data" name="input1" type="data"> - <label>On query</label> - </param> - </inputs> - <outputs> - <data format="txt" name="out_file1" /> - </outputs> - <tests> - <test> - <param name="input1" value="2.fasta"/> - <output name="out_file1" file="emboss_banana_out.txt"/> - </test> - </tests> - <help> - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/banana.html - </help> -</tool> \ No newline at end of file
--- a/tools/emboss_5/emboss_biosed.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -<tool id="EMBOSS: biosed4" name="biosed" version="5.0.0"> - <description>Replace or delete sequence sections</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>biosed -sequence $input1 -outseq $out_file1 -target $target -replace $replace -osformat2 $out_format1 -auto</command> - <inputs> - <param format="fasta" name="input1" type="data"> - <label>On query</label> - </param> - <param name="target" size="6" type="text" value="atg"> - <label>Replace all</label> - </param> - <param name="replace" size="6" type="text" value="atg"> - <label>with</label> - </param> - <param name="out_format1" type="select"> - <label>Output Sequence File Format</label> - <option value="fasta">FASTA (m)</option> - <option value="acedb">ACeDB (m)</option> - <option value="asn1">ASN.1 (m)</option> - <option value="clustal">Clustal (m)</option> - <option value="codata">CODATA (m)</option> - <option value="embl">EMBL (m)</option> - <option value="fitch">Fitch (m)</option> - <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option> - <option value="genbank">GENBANK (m)</option> - <option value="gff">GFF (m)</option> - <option value="hennig86">Hennig86 (m)</option> - <option value="ig">Intelligenetics (m)</option> - <option value="jackknifer">Jackknifer (m)</option> - <option value="jackknifernon">Jackknifernon (m)</option> - <option value="mega">Mega (m)</option> - <option value="meganon">Meganon (m)</option> - <option value="msf">Wisconsin Package GCG's MSF (m)</option> - <option value="pir">NBRF (PIR) (m)</option> - <option value="ncbi">NCBI style FASTA (m)</option> - <option value="nexus">Nexus/PAUP (m)</option> - <option value="nexusnon">Nexusnon/PAUPnon (m)</option> - <option value="phylip">PHYLIP interleaved (m)</option> - <option value="phylipnon">PHYLIP non-interleaved (m)</option> - <option value="selex">SELEX (m)</option> - <option value="staden">Staden (s)</option> - <option value="strider">DNA strider (m)</option> - <option value="swiss">SwisProt entry (m)</option> - <option value="text">Plain sequence (s)</option> - <option value="treecon">Treecon (m)</option> - </param> - </inputs> - <outputs> - <data format="txt" name="out_file1" /> - </outputs> - <tests> - <test> - <param name="input1" value="2.fasta"/> - <param name="target" value="atg"/> - <param name="replace" value="agt"/> - <param name="out_format1" value="fasta"/> - <output name="out_file1" file="emboss_biosed_out.fasta"/> - </test> - </tests> - <code file="emboss_format_corrector.py" /> - <help> - -.. class:: warningmark - -The input dataset needs to be sequences. - ------ - - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/biosed.html - </help> -</tool>
--- a/tools/emboss_5/emboss_btwisted.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="EMBOSS: btwisted5" name="btwisted" version="5.0.0"> - <description>Calculates the twisting in a B-DNA sequence</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>btwisted -sequence $input1 -outfile $out_file1 -auto</command> - <inputs> - <param format="data" name="input1" type="data"> - <label>On query</label> - </param> - </inputs> - <outputs> - <data format="btwisted" name="out_file1" /> - </outputs> - <tests> - <test> - <param name="input1" value="2.fasta"/> - <output name="out_file1" file="emboss_btwisted_out.btwisted"/> - </test> - </tests> - <help> - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/btwisted.html - </help> -</tool> \ No newline at end of file
--- a/tools/emboss_5/emboss_cai.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,185 +0,0 @@ -<tool id="EMBOSS: cai6" name="cai" version="5.0.0"> - <description>CAI codon adaptation index</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>cai -seqall $input1 -outfile $out_file1 -cfile $cfile -auto</command> - <inputs> - <param format="fasta" name="input1" type="data"> - <label>On query</label> - </param> - <param name="cfile" type="select"> - <label>Codon Usage File</label> - <option value="Eyeastcai.cut">Eyeastcai.cut</option> - <option value="Ehum.cut">Ehum.cut</option> - <option value="Eacc.cut">Eacc.cut</option> - <option value="Eadenovirus5.cut">Eadenovirus5.cut</option> - <option value="Eadenovirus7.cut">Eadenovirus7.cut</option> - <option value="Eaidlav.cut">Eaidlav.cut</option> - <option value="Eanasp.cut">Eanasp.cut</option> - <option value="Eani.cut">Eani.cut</option> - <option value="Eani_h.cut">Eani_h.cut</option> - <option value="Eanidmit.cut">Eanidmit.cut</option> - <option value="Easn.cut">Easn.cut</option> - <option value="Eath.cut">Eath.cut</option> - <option value="Eatu.cut">Eatu.cut</option> - <option value="Eavi.cut">Eavi.cut</option> - <option value="Ebja.cut">Ebja.cut</option> - <option value="Ebly.cut">Ebly.cut</option> - <option value="Ebme.cut">Ebme.cut</option> - <option value="Ebmo.cut">Ebmo.cut</option> - <option value="Ebna.cut">Ebna.cut</option> - <option value="Ebov.cut">Ebov.cut</option> - <option value="Ebovsp.cut">Ebovsp.cut</option> - <option value="Ebst.cut">Ebst.cut</option> - <option value="Ebsu.cut">Ebsu.cut</option> - <option value="Ebsu_h.cut">Ebsu_h.cut</option> - <option value="Ecac.cut">Ecac.cut</option> - <option value="Ecal.cut">Ecal.cut</option> - <option value="Eccr.cut">Eccr.cut</option> - <option value="Ecel.cut">Ecel.cut</option> - <option value="Echi.cut">Echi.cut</option> - <option value="Echicken.cut">Echicken.cut</option> - <option value="Echisp.cut">Echisp.cut</option> - <option value="Echk.cut">Echk.cut</option> - <option value="Echmp.cut">Echmp.cut</option> - <option value="Echnt.cut">Echnt.cut</option> - <option value="Echos.cut">Echos.cut</option> - <option value="Echzm.cut">Echzm.cut</option> - <option value="Echzmrubp.cut">Echzmrubp.cut</option> - <option value="Ecpx.cut">Ecpx.cut</option> - <option value="Ecre.cut">Ecre.cut</option> - <option value="Ecrisp.cut">Ecrisp.cut</option> - <option value="Ectr.cut">Ectr.cut</option> - <option value="Edayhoff.cut">Edayhoff.cut</option> - <option value="Eddi.cut">Eddi.cut</option> - <option value="Eddi_h.cut">Eddi_h.cut</option> - <option value="Edog.cut">Edog.cut</option> - <option value="Edro.cut">Edro.cut</option> - <option value="Edro_h.cut">Edro_h.cut</option> - <option value="Edrosophila.cut">Edrosophila.cut</option> - <option value="Eeca.cut">Eeca.cut</option> - <option value="Eeco.cut">Eeco.cut</option> - <option value="Eeco_h.cut">Eeco_h.cut</option> - <option value="Eecoli.cut">Eecoli.cut</option> - <option value="Ef1.cut">Ef1.cut</option> - <option value="Efish.cut">Efish.cut</option> - <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option> - <option value="Eham.cut">Eham.cut</option> - <option value="Ehha.cut">Ehha.cut</option> - <option value="Ehin.cut">Ehin.cut</option> - <option value="Ehma.cut">Ehma.cut</option> - <option value="Ehuman.cut">Ehuman.cut</option> - <option value="Ekla.cut">Ekla.cut</option> - <option value="Ekpn.cut">Ekpn.cut</option> - <option value="Ella.cut">Ella.cut</option> - <option value="Emac.cut">Emac.cut</option> - <option value="Emaize.cut">Emaize.cut</option> - <option value="Emam_h.cut">Emam_h.cut</option> - <option value="Emixlg.cut">Emixlg.cut</option> - <option value="Emouse.cut">Emouse.cut</option> - <option value="Emsa.cut">Emsa.cut</option> - <option value="Emse.cut">Emse.cut</option> - <option value="Emta.cut">Emta.cut</option> - <option value="Emtu.cut">Emtu.cut</option> - <option value="Emus.cut">Emus.cut</option> - <option value="Emussp.cut">Emussp.cut</option> - <option value="Emva.cut">Emva.cut</option> - <option value="Emze.cut">Emze.cut</option> - <option value="Emzecp.cut">Emzecp.cut</option> - <option value="Encr.cut">Encr.cut</option> - <option value="Eneu.cut">Eneu.cut</option> - <option value="Engo.cut">Engo.cut</option> - <option value="Eoncsp.cut">Eoncsp.cut</option> - <option value="Epae.cut">Epae.cut</option> - <option value="Epea.cut">Epea.cut</option> - <option value="Epet.cut">Epet.cut</option> - <option value="Epfa.cut">Epfa.cut</option> - <option value="Ephix174.cut">Ephix174.cut</option> - <option value="Ephv.cut">Ephv.cut</option> - <option value="Ephy.cut">Ephy.cut</option> - <option value="Epig.cut">Epig.cut</option> - <option value="Epolyomaa2.cut">Epolyomaa2.cut</option> - <option value="Epombe.cut">Epombe.cut</option> - <option value="Epombecai.cut">Epombecai.cut</option> - <option value="Epot.cut">Epot.cut</option> - <option value="Eppu.cut">Eppu.cut</option> - <option value="Epse.cut">Epse.cut</option> - <option value="Epsy.cut">Epsy.cut</option> - <option value="Epvu.cut">Epvu.cut</option> - <option value="Erab.cut">Erab.cut</option> - <option value="Erabbit.cut">Erabbit.cut</option> - <option value="Erabsp.cut">Erabsp.cut</option> - <option value="Erat.cut">Erat.cut</option> - <option value="Eratsp.cut">Eratsp.cut</option> - <option value="Erca.cut">Erca.cut</option> - <option value="Erhm.cut">Erhm.cut</option> - <option value="Eric.cut">Eric.cut</option> - <option value="Erle.cut">Erle.cut</option> - <option value="Erme.cut">Erme.cut</option> - <option value="Ersp.cut">Ersp.cut</option> - <option value="Esalsp.cut">Esalsp.cut</option> - <option value="Esau.cut">Esau.cut</option> - <option value="Esco.cut">Esco.cut</option> - <option value="Esgi.cut">Esgi.cut</option> - <option value="Eshp.cut">Eshp.cut</option> - <option value="Eshpsp.cut">Eshpsp.cut</option> - <option value="Esli.cut">Esli.cut</option> - <option value="Eslm.cut">Eslm.cut</option> - <option value="Esma.cut">Esma.cut</option> - <option value="Esmi.cut">Esmi.cut</option> - <option value="Esmu.cut">Esmu.cut</option> - <option value="Esoy.cut">Esoy.cut</option> - <option value="Espi.cut">Espi.cut</option> - <option value="Espn.cut">Espn.cut</option> - <option value="Espo.cut">Espo.cut</option> - <option value="Espo_h.cut">Espo_h.cut</option> - <option value="Espu.cut">Espu.cut</option> - <option value="Esta.cut">Esta.cut</option> - <option value="Esty.cut">Esty.cut</option> - <option value="Esus.cut">Esus.cut</option> - <option value="Esv40.cut">Esv40.cut</option> - <option value="Esyhsp.cut">Esyhsp.cut</option> - <option value="Esynsp.cut">Esynsp.cut</option> - <option value="Etbr.cut">Etbr.cut</option> - <option value="Etcr.cut">Etcr.cut</option> - <option value="Eter.cut">Eter.cut</option> - <option value="Etetsp.cut">Etetsp.cut</option> - <option value="Etob.cut">Etob.cut</option> - <option value="Etobcp.cut">Etobcp.cut</option> - <option value="Etom.cut">Etom.cut</option> - <option value="Etrb.cut">Etrb.cut</option> - <option value="Evco.cut">Evco.cut</option> - <option value="Ewht.cut">Ewht.cut</option> - <option value="Exel.cut">Exel.cut</option> - <option value="Exenopus.cut">Exenopus.cut</option> - <option value="Eyeast.cut">Eyeast.cut</option> - <option value="Eyen.cut">Eyen.cut</option> - <option value="Eysc.cut">Eysc.cut</option> - <option value="Eysc_h.cut">Eysc_h.cut</option> - <option value="Eyscmt.cut">Eyscmt.cut</option> - <option value="Eysp.cut">Eysp.cut</option> - <option value="Ezebrafish.cut">Ezebrafish.cut</option> - <option value="Ezma.cut">Ezma.cut</option> - </param> - </inputs> - <outputs> - <data format="cai" name="out_file1" /> - </outputs> - <tests> - <test> - <param name="input1" value="2.fasta"/> - <param name="cfile" value="Eyeastcai.cut"/> - <output name="out_file1" file="emboss_cai_out.cai"/> - </test> - </tests> - <help> -.. class:: warningmark - -The input dataset needs to be sequences. - ------ - - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cai.html - </help> -</tool>
--- a/tools/emboss_5/emboss_cai_custom.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -<tool id="EMBOSS: cai_custom6" name="cai custom" version="5.0.0"> - <description>CAI codon adaptation index using custom codon usage file</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>cai -seqall $input1 -outfile $out_file1 -cfile $input2 -auto</command> - <inputs> - <param format="fasta" name="input1" type="data"> - <label>On query</label> - </param> - <param format="txt" name="input2" type="data"> - <label>Codon Usage File</label> - </param> - </inputs> - <outputs> - <data format="txt" name="out_file1" /> - </outputs> - <help> -.. class:: warningmark - -The input dataset needs to be sequences. - ------ - - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cai_custom.html - </help> -</tool>
--- a/tools/emboss_5/emboss_chaos.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -<tool id="EMBOSS: chaos7" name="chaos" version="5.0.0"> - <description>Create a chaos game representation plot for a sequence</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command interpreter="perl">emboss_single_outputfile_wrapper.pl chaos -sequence $input1 -graph png -goutfile $out_file1 -auto</command> - <inputs> - <param format="data" name="input1" type="data"> - <label>On query</label> - </param> - </inputs> - <outputs> - <data format="png" name="out_file1" /> - </outputs> -<!-- <tests> - <test> - puts name of file into the png - </test> - </tests> --> - <help> - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/chaos.html - </help> -</tool> \ No newline at end of file
--- a/tools/emboss_5/emboss_charge.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -<tool id="EMBOSS: charge8" name="charge" version="5.0.0"> - <description>Protein charge plot</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>charge -seqall $input1 -outfile $out_file1 -window $window -auto</command> - <inputs> - <param format="fasta" name="input1" type="data"> - <label>On query</label> - </param> - <param name="window" size="4" type="text" value="5"> - <label>Window Size</label> - </param> - </inputs> - <outputs> - <data format="charge" name="out_file1" /> - </outputs> - <!-- <tests> - <test> - <param name="input1" value="2.fasta"/> - <param name="window" value="5"/> - <output name="out_file1" file="emboss_charge_out.charge"/> - </test> - </tests>--> - <help> - -.. class:: warningmark - -The input dataset needs to be sequences. - ------ - - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/charge.html - </help> -</tool>
--- a/tools/emboss_5/emboss_checktrans.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -<tool id="EMBOSS: checktrans9" name="checktrans" version="5.0.0"> - <description>Reports STOP codons and ORF statistics of a protein</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>checktrans -sequence $input1 -outfile $out_file1 -outseq $out_file2 -osformat3 $out_format2 -outfeat $out_file3 -offormat4 $out_format3 -orfml $orfml -addlast $addlast -auto</command> - <inputs> - <param format="fasta" name="input1" type="data"> - <label>On query</label> - </param> - <param name="orfml" size="4" type="text" value="100"> - <label>Minimum ORF Length to report</label> - </param> - <param name="addlast" type="select"> - <label>An asterisk in the protein sequence indicates the position of a STOP codon. Checktrans assumes that all ORFs end in a STOP codon. Forcing the sequence to end with an asterisk, if there - is not one there already, makes checktrans treat the end as a potential ORF. If an asterisk is added, it is not included in the reported count of STOPs</label> - <option value="yes">Yes</option> - <option value="no">No</option> - </param> - <param name="out_format2" type="select"> - <label>Output Sequence File Format</label> - <option value="fasta">FASTA (m)</option> - <option value="acedb">ACeDB (m)</option> - <option value="asn1">ASN.1 (m)</option> - <option value="clustal">Clustal (m)</option> - <option value="codata">CODATA (m)</option> - <option value="embl">EMBL (m)</option> - <option value="fitch">Fitch (m)</option> - <option value="gcg">Wisconsin Package GCG 9.x and 10.x (s)</option> - <option value="genbank">GENBANK (m)</option> - <option value="gff">GFF (m)</option> - <option value="hennig86">Hennig86 (m)</option> - <option value="ig">Intelligenetics (m)</option> - <option value="jackknifer">Jackknifer (m)</option> - <option value="jackknifernon">Jackknifernon (m)</option> - <option value="mega">Mega (m)</option> - <option value="meganon">Meganon (m)</option> - <option value="msf">Wisconsin Package GCG's MSF (m)</option> - <option value="pir">NBRF (PIR) (m)</option> - <option value="ncbi">NCBI style FASTA (m)</option> - <option value="nexus">Nexus/PAUP (m)</option> - <option value="nexusnon">Nexusnon/PAUPnon (m)</option> - <option value="phylip">PHYLIP interleaved (m)</option> - <option value="phylipnon">PHYLIP non-interleaved (m)</option> - <option value="selex">SELEX (m)</option> - <option value="staden">Staden (s)</option> - <option value="strider">DNA strider (m)</option> - <option value="swiss">SwisProt entry (m)</option> - <option value="text">Plain sequence (s)</option> - <option value="treecon">Treecon (m)</option> - </param> - <param name="out_format3" type="select"> - <label>Output Feature File Format</label> - <option value="gff">GFF</option> - <option value="embl">EMBL</option> - <option value="swiss">SwissProt</option> - </param> - </inputs> - <outputs> - <data format="checktrans" name="out_file1" /> - <data format="fasta" name="out_file2" /> - <data format="gff" name="out_file3" /> - </outputs> - <!-- <tests> - <test> - <param name="input1" value="2.fasta"/> - <param name="orfml" value="100"/> - <param name="addlast" value="yes"/> - <param name="out_format2" value="fasta"/> - <param name="out_format3" value="gff"/> - <output name="out_file1" file="emboss_checktrans_out1.txt"/> - <output name="out_file2" file="emboss_checktrans_out2.fasta"/> - <output name="out_file3" file="emboss_checktrans_out3.gff"/> - </test> - </tests> --> - <code file="emboss_format_corrector.py" /> - <help> - -.. class:: warningmark - -The input dataset needs to be sequences. - ------ - - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/checktrans.html - </help> -</tool>
--- a/tools/emboss_5/emboss_chips.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -<tool id="EMBOSS: chips10" name="chips" version="5.0.0"> - <description>Codon usage statistics</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>chips -seqall $input1 -outfile $out_file1 -sum $sum -auto</command> - <inputs> - <param format="data" name="input1" type="data"> - <label>On query</label> - </param> - <param name="sum" type="select"> - <label>Sum codons over all sequences</label> - <option value="yes">Yes</option> - <option value="no">No</option> - </param> - </inputs> - <outputs> - <data format="chips" name="out_file1" /> - </outputs> - <tests> - <test> - <param name="input1" value="2.fasta"/> - <param name="sum" value="yes"/> - <output name="out_file1" file="emboss_chips_out.chips"/> - </test> - </tests> - <help> - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/chips.html - </help> -</tool> \ No newline at end of file
--- a/tools/emboss_5/emboss_cirdna.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -<tool id="EMBOSS: cirdna11" name="cirdna" version="5.0.0"> - <description>Draws circular maps of DNA constructs</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command interpreter="perl">emboss_single_outputfile_wrapper.pl cirdna -infile $input1 -graphout png -goutfile $out_file1 -auto</command> - <inputs> - <param format="data" name="input1" type="data"> - <label>On query</label> - </param> - </inputs> - <outputs> - <data format="png" name="out_file1" /> - </outputs> - <!-- <tests> - <test> - puts name of file into the png - </test> - </tests> --> - <help> - You can view the original documentation here_. - - .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/cirdna.html - </help> -</tool> \ No newline at end of file
--- a/tools/emboss_5/emboss_codcmp.xml Fri Mar 09 19:45:42 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,330 +0,0 @@ -<tool id="EMBOSS: codcmp12" name="codcmp" version="5.0.0"> - <description>Codon usage table comparison</description> - <requirements><requirement type="package" version="5.0.0">emboss</requirement></requirements> - <command>codcmp -first $cfile1 -second $cfile2 -outfile $out_file1 -auto</command> - <inputs> - <param name="cfile1" type="select"> - <label>Codon Usage File 1</label> - <option value="Ehum.cut">Ehum.cut</option> - <option value="Eacc.cut">Eacc.cut</option> - <option value="Eadenovirus5.cut">Eadenovirus5.cut</option> - <option value="Eadenovirus7.cut">Eadenovirus7.cut</option> - <option value="Eaidlav.cut">Eaidlav.cut</option> - <option value="Eanasp.cut">Eanasp.cut</option> - <option value="Eani.cut">Eani.cut</option> - <option value="Eani_h.cut">Eani_h.cut</option> - <option value="Eanidmit.cut">Eanidmit.cut</option> - <option value="Easn.cut">Easn.cut</option> - <option value="Eath.cut">Eath.cut</option> - <option value="Eatu.cut">Eatu.cut</option> - <option value="Eavi.cut">Eavi.cut</option> - <option value="Ebja.cut">Ebja.cut</option> - <option value="Ebly.cut">Ebly.cut</option> - <option value="Ebme.cut">Ebme.cut</option> - <option value="Ebmo.cut">Ebmo.cut</option> - <option value="Ebna.cut">Ebna.cut</option> - <option value="Ebov.cut">Ebov.cut</option> - <option value="Ebovsp.cut">Ebovsp.cut</option> - <option value="Ebst.cut">Ebst.cut</option> - <option value="Ebsu.cut">Ebsu.cut</option> - <option value="Ebsu_h.cut">Ebsu_h.cut</option> - <option value="Ecac.cut">Ecac.cut</option> - <option value="Ecal.cut">Ecal.cut</option> - <option value="Eccr.cut">Eccr.cut</option> - <option value="Ecel.cut">Ecel.cut</option> - <option value="Echi.cut">Echi.cut</option> - <option value="Echicken.cut">Echicken.cut</option> - <option value="Echisp.cut">Echisp.cut</option> - <option value="Echk.cut">Echk.cut</option> - <option value="Echmp.cut">Echmp.cut</option> - <option value="Echnt.cut">Echnt.cut</option> - <option value="Echos.cut">Echos.cut</option> - <option value="Echzm.cut">Echzm.cut</option> - <option value="Echzmrubp.cut">Echzmrubp.cut</option> - <option value="Ecpx.cut">Ecpx.cut</option> - <option value="Ecre.cut">Ecre.cut</option> - <option value="Ecrisp.cut">Ecrisp.cut</option> - <option value="Ectr.cut">Ectr.cut</option> - <option value="Edayhoff.cut">Edayhoff.cut</option> - <option value="Eddi.cut">Eddi.cut</option> - <option value="Eddi_h.cut">Eddi_h.cut</option> - <option value="Edog.cut">Edog.cut</option> - <option value="Edro.cut">Edro.cut</option> - <option value="Edro_h.cut">Edro_h.cut</option> - <option value="Edrosophila.cut">Edrosophila.cut</option> - <option value="Eeca.cut">Eeca.cut</option> - <option value="Eeco.cut">Eeco.cut</option> - <option value="Eeco_h.cut">Eeco_h.cut</option> - <option value="Eecoli.cut">Eecoli.cut</option> - <option value="Ef1.cut">Ef1.cut</option> - <option value="Efish.cut">Efish.cut</option> - <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option> - <option value="Eham.cut">Eham.cut</option> - <option value="Ehha.cut">Ehha.cut</option> - <option value="Ehin.cut">Ehin.cut</option> - <option value="Ehma.cut">Ehma.cut</option> - <option value="Ehuman.cut">Ehuman.cut</option> - <option value="Ekla.cut">Ekla.cut</option> - <option value="Ekpn.cut">Ekpn.cut</option> - <option value="Ella.cut">Ella.cut</option> - <option value="Emac.cut">Emac.cut</option> - <option value="Emaize.cut">Emaize.cut</option> - <option value="Emam_h.cut">Emam_h.cut</option> - <option value="Emixlg.cut">Emixlg.cut</option> - <option value="Emouse.cut">Emouse.cut</option> - <option value="Emsa.cut">Emsa.cut</option> - <option value="Emse.cut">Emse.cut</option> - <option value="Emta.cut">Emta.cut</option> - <option value="Emtu.cut">Emtu.cut</option> - <option value="Emus.cut">Emus.cut</option> - <option value="Emussp.cut">Emussp.cut</option> - <option value="Emva.cut">Emva.cut</option> - <option value="Emze.cut">Emze.cut</option> - <option value="Emzecp.cut">Emzecp.cut</option> - <option value="Encr.cut">Encr.cut</option> - <option value="Eneu.cut">Eneu.cut</option> - <option value="Engo.cut">Engo.cut</option> - <option value="Eoncsp.cut">Eoncsp.cut</option> - <option value="Epae.cut">Epae.cut</option> - <option value="Epea.cut">Epea.cut</option> - <option value="Epet.cut">Epet.cut</option> - <option value="Epfa.cut">Epfa.cut</option> - <option value="Ephix174.cut">Ephix174.cut</option> - <option value="Ephv.cut">Ephv.cut</option> - <option value="Ephy.cut">Ephy.cut</option> - <option value="Epig.cut">Epig.cut</option> - <option value="Epolyomaa2.cut">Epolyomaa2.cut</option> - <option value="Epombe.cut">Epombe.cut</option> - <option value="Epombecai.cut">Epombecai.cut</option> - <option value="Epot.cut">Epot.cut</option> - <option value="Eppu.cut">Eppu.cut</option> - <option value="Epse.cut">Epse.cut</option> - <option value="Epsy.cut">Epsy.cut</option> - <option value="Epvu.cut">Epvu.cut</option> - <option value="Erab.cut">Erab.cut</option> - <option value="Erabbit.cut">Erabbit.cut</option> - <option value="Erabsp.cut">Erabsp.cut</option> - <option value="Erat.cut">Erat.cut</option> - <option value="Eratsp.cut">Eratsp.cut</option> - <option value="Erca.cut">Erca.cut</option> - <option value="Erhm.cut">Erhm.cut</option> - <option value="Eric.cut">Eric.cut</option> - <option value="Erle.cut">Erle.cut</option> - <option value="Erme.cut">Erme.cut</option> - <option value="Ersp.cut">Ersp.cut</option> - <option value="Esalsp.cut">Esalsp.cut</option> - <option value="Esau.cut">Esau.cut</option> - <option value="Esco.cut">Esco.cut</option> - <option value="Esgi.cut">Esgi.cut</option> - <option value="Eshp.cut">Eshp.cut</option> - <option value="Eshpsp.cut">Eshpsp.cut</option> - <option value="Esli.cut">Esli.cut</option> - <option value="Eslm.cut">Eslm.cut</option> - <option value="Esma.cut">Esma.cut</option> - <option value="Esmi.cut">Esmi.cut</option> - <option value="Esmu.cut">Esmu.cut</option> - <option value="Esoy.cut">Esoy.cut</option> - <option value="Espi.cut">Espi.cut</option> - <option value="Espn.cut">Espn.cut</option> - <option value="Espo.cut">Espo.cut</option> - <option value="Espo_h.cut">Espo_h.cut</option> - <option value="Espu.cut">Espu.cut</option> - <option value="Esta.cut">Esta.cut</option> - <option value="Esty.cut">Esty.cut</option> - <option value="Esus.cut">Esus.cut</option> - <option value="Esv40.cut">Esv40.cut</option> - <option value="Esyhsp.cut">Esyhsp.cut</option> - <option value="Esynsp.cut">Esynsp.cut</option> - <option value="Etbr.cut">Etbr.cut</option> - <option value="Etcr.cut">Etcr.cut</option> - <option value="Eter.cut">Eter.cut</option> - <option value="Etetsp.cut">Etetsp.cut</option> - <option value="Etob.cut">Etob.cut</option> - <option value="Etobcp.cut">Etobcp.cut</option> - <option value="Etom.cut">Etom.cut</option> - <option value="Etrb.cut">Etrb.cut</option> - <option value="Evco.cut">Evco.cut</option> - <option value="Ewht.cut">Ewht.cut</option> - <option value="Exel.cut">Exel.cut</option> - <option value="Exenopus.cut">Exenopus.cut</option> - <option value="Eyeast.cut">Eyeast.cut</option> - <option value="Eyeastcai.cut">Eyeastcai.cut</option> - <option value="Eyen.cut">Eyen.cut</option> - <option value="Eysc.cut">Eysc.cut</option> - <option value="Eysc_h.cut">Eysc_h.cut</option> - <option value="Eyscmt.cut">Eyscmt.cut</option> - <option value="Eysp.cut">Eysp.cut</option> - <option value="Ezebrafish.cut">Ezebrafish.cut</option> - <option value="Ezma.cut">Ezma.cut</option> - </param> - <param name="cfile2" type="select"> - <label>Codon Usage File 2</label> - <option value="Ehum.cut">Ehum.cut</option> - <option value="Eacc.cut">Eacc.cut</option> - <option value="Eadenovirus5.cut">Eadenovirus5.cut</option> - <option value="Eadenovirus7.cut">Eadenovirus7.cut</option> - <option value="Eaidlav.cut">Eaidlav.cut</option> - <option value="Eanasp.cut">Eanasp.cut</option> - <option value="Eani.cut">Eani.cut</option> - <option value="Eani_h.cut">Eani_h.cut</option> - <option value="Eanidmit.cut">Eanidmit.cut</option> - <option value="Easn.cut">Easn.cut</option> - <option value="Eath.cut">Eath.cut</option> - <option value="Eatu.cut">Eatu.cut</option> - <option value="Eavi.cut">Eavi.cut</option> - <option value="Ebja.cut">Ebja.cut</option> - <option value="Ebly.cut">Ebly.cut</option> - <option value="Ebme.cut">Ebme.cut</option> - <option value="Ebmo.cut">Ebmo.cut</option> - <option value="Ebna.cut">Ebna.cut</option> - <option value="Ebov.cut">Ebov.cut</option> - <option value="Ebovsp.cut">Ebovsp.cut</option> - <option value="Ebst.cut">Ebst.cut</option> - <option value="Ebsu.cut">Ebsu.cut</option> - <option value="Ebsu_h.cut">Ebsu_h.cut</option> - <option value="Ecac.cut">Ecac.cut</option> - <option value="Ecal.cut">Ecal.cut</option> - <option value="Eccr.cut">Eccr.cut</option> - <option value="Ecel.cut">Ecel.cut</option> - <option value="Echi.cut">Echi.cut</option> - <option value="Echicken.cut">Echicken.cut</option> - <option value="Echisp.cut">Echisp.cut</option> - <option value="Echk.cut">Echk.cut</option> - <option value="Echmp.cut">Echmp.cut</option> - <option value="Echnt.cut">Echnt.cut</option> - <option value="Echos.cut">Echos.cut</option> - <option value="Echzm.cut">Echzm.cut</option> - <option value="Echzmrubp.cut">Echzmrubp.cut</option> - <option value="Ecpx.cut">Ecpx.cut</option> - <option value="Ecre.cut">Ecre.cut</option> - <option value="Ecrisp.cut">Ecrisp.cut</option> - <option value="Ectr.cut">Ectr.cut</option> - <option value="Edayhoff.cut">Edayhoff.cut</option> - <option value="Eddi.cut">Eddi.cut</option> - <option value="Eddi_h.cut">Eddi_h.cut</option> - <option value="Edog.cut">Edog.cut</option> - <option value="Edro.cut">Edro.cut</option> - <option value="Edro_h.cut">Edro_h.cut</option> - <option value="Edrosophila.cut">Edrosophila.cut</option> - <option value="Eeca.cut">Eeca.cut</option> - <option value="Eeco.cut">Eeco.cut</option> - <option value="Eeco_h.cut">Eeco_h.cut</option> - <option value="Eecoli.cut">Eecoli.cut</option> - <option value="Ef1.cut">Ef1.cut</option> - <option value="Efish.cut">Efish.cut</option> - <option value="Efmdvpolyp.cut">Efmdvpolyp.cut</option> - <option value="Eham.cut">Eham.cut</option> - <option value="Ehha.cut">Ehha.cut</option> - <option value="Ehin.cut">Ehin.cut</option> - <option value="Ehma.cut">Ehma.cut</option> - <option value="Ehuman.cut">Ehuman.cut</option> - <option value="Ekla.cut">Ekla.cut</option> - <option value="Ekpn.cut">Ekpn.cut</option> - <option value="Ella.cut">Ella.cut</option> - <option value="Emac.cut">Emac.cut</option> - <option value="Emaize.cut">Emaize.cut</option> - <option value="Emam_h.cut">Emam_h.cut</option> - <option value="Emixlg.cut">Emixlg.cut</option> - <option value="Emouse.cut">Emouse.cut</option> - <option value="Emsa.cut">Emsa.cut</option> - <option value="Emse.cut">Emse.cut</option> - <option value="Emta.cut">Emta.cut</option> - <option value="Emtu.cut">Emtu.cut</option> - <option value="Emus.cut">Emus.cut</option> - <option value="Emussp.cut">Emussp.cut</option> - <option value="Emva.cut">Emva.cut</option> - <option value="Emze.cut">Emze.cut</option> - <option value="Emzecp.cut">Emzecp.cut</option> - <option value="Encr.cut">Encr.cut</option> - <option value="Eneu.cut">Eneu.cut</option> - <option value="Engo.cut">Engo.cut</option> - <option value="Eoncsp.cut">Eoncsp.cut</option> - <option value="Epae.cut">Epae.cut</option> - <option value="Epea.cut">Epea.cut</option> - <option value="Epet.cut">Epet.cut</option> - <option value="Epfa.cut">Epfa.cut</option> - <option value="Ephix174.cut">Ephix174.cut</option> - <option value="Ephv.cut">Ephv.cut</option> - <option value="Ephy.cut">Ephy.cut</option> - <option value="Epig.cut">Epig.cut</option> - <option value="Epolyomaa2.cut">Epolyomaa2.cut</option> - <option value="Epombe.cut">Epombe.cut</option> - <option value="Epombecai.cut">Epombecai.cut</option> - <option value="Epot.cut">Epot.cut</option> - <option value="Eppu.cut">Eppu.cut</option> - <option value="Epse.cut">Epse.cut</option> - <option value="Epsy.cut">Epsy.cut</option> - <option value="Epvu.cut">Epvu.cut</option> - <option value="Erab.cut">Erab.cut</option> - <option value="Erabbit.cut">Erabbit.cut</option> - <option value="Erabsp.cut">Erabsp.cut</option> - <option value="Erat.cut">Erat.cut</option> - <option value="Eratsp.cut">Eratsp.cut</option> - <option value="Erca.cut">Erca.cut</option> - <option value="Erhm.cut">Erhm.cut</option> - <option value="Eric.cut">Eric.cut</option> - <option value="Erle.cut">Erle.cut</option> - <option value="Erme.cut">Erme.cut</option> - <option value="Ersp.cut">Ersp.cut</option> - <option value="Esalsp.cut">Esalsp.cut</option> - <option value="Esau.cut">Esau.cut</option> - <option value="Esco.cut">Esco.cut</option> - <option value="Esgi.cut">Esgi.cut</option> - <option valu