Mercurial > repos > pjbriggs > rnachipintegrator
view rnachipintegrator_canonical_genes.xml @ 1:5f69a2c1b9c9 draft
Uploaded version 1.0.0.0.
author | pjbriggs |
---|---|
date | Wed, 24 Feb 2016 09:39:14 -0500 |
parents | d9c1f2133124 |
children | b695071de766 |
line wrap: on
line source
<tool id="rnachipintegrator_canonical_genes" name="Analyse canonical genes against 'peak' data" version="@VERSION@.0"> <description>using RnaChipIntegrator</description> <macros> <import>rnachipintegrator_macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="version_command" /> <command interpreter="bash"><![CDATA[ rnachipintegrator_wrapper.sh #if $peaks_in.metadata.chromCol --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol} #end if #if str( $cutoff ) != "" --cutoff=$cutoff #else --cutoff=0 #end if #if str( $number ) != "" --number=$number #end if --promoter_region=$promoter_start,$promoter_end --edge=$edge --xlsx_file "$xlsx_out" --output_files "$peaks_per_feature_out" "$features_per_peak_out" #if $output.compact_format --compact #else #if $output.summary --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary" #end if ${output.pad_output} #end if "${canonical_genes.fields.path}" "$peaks_in" ]]></command> <inputs> <param format="tabular" name="peaks_in" type="data" label="Peaks" /> <param name="canonical_genes" type="select" label="Canonical genes to analyse peaks against"> <options from_data_table="rnachipintegrator_canonical_genes"> </options> </param> <expand macro="analysis_options" /> <expand macro="output_options" /> </inputs> <outputs> <!-- Always produce XLS output --> <data format="xlsx" name="xlsx_out" label="All RnaChipIntegrator analyses: ${canonical_genes.fields.name} vs ${peaks_in.name} (Excel spreadsheet)" /> <data format="tabular" name="peaks_per_feature_out" label="Nearest peaks to each gene: ${canonical_genes.fields.name} vs ${peaks_in.name}" /> <data format="tabular" name="features_per_peak_out" label="Nearest genes to each peak: ${canonical_genes.fields.name} vs ${peaks_in.name}" /> <data format="tabular" name="peaks_per_feature_summary" label="Nearest peaks to each gene (summary): ${canonical_genes.fields.name} vs ${peaks_in.name}" > <filter>output['compact_format'] is False</filter> <filter>output['summary'] is True</filter> </data> <data format="tabular" name="features_per_peak_summary" label="Nearest genes to each peak (summary): ${canonical_genes.fields.name} vs ${peaks_in.name}" > <filter>output['compact_format'] is False</filter> <filter>output['summary'] is True</filter> </data> </outputs> <tests> <!-- RnaChipIntegrator +name=mm9 +cutoff=50000 +xlsx +summary mm9_canonical_genes.tsv mm9_summits.txt --> <test> <param name="peaks_in" value="mm9_summits.txt" ftype="tabular" /> <param name="canonical_genes" value="mm9_test" /> <param name="cutoff" value="50000" /> <output name="xlsx_out" file="mm9_summits.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="mm9_summits_per_feature.out" /> <output name="features_per_peak_out" ftype="tabular" file="mm9_features_per_summit.out" /> </test> <!-- RnaChipIntegrator +name=mm9 +cutoff=50000 +xlsx +compact mm9_canonical_genes.tsv mm9_peaks.txt --> <test> <param name="peaks_in" value="mm9_peaks.txt" ftype="tabular" /> <param name="canonical_genes" value="mm9_test" /> <param name="cutoff" value="50000" /> <output name="xlsx_out" file="mm9_peaks1.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="mm9_peaks_per_feature1.out" /> <output name="features_per_peak_out" ftype="tabular" file="mm9_features_per_peak1.out" /> </test> <!-- RnaChipIntegrator +name=mm9 +cutoff=50000 +xlsx +summary +pad mm9_canonical_genes.tsv mm9_peaks.txt --> <test> <param name="peaks_in" value="mm9_peaks.txt" ftype="tabular" /> <param name="canonical_genes" value="mm9_test" /> <param name="cutoff" value="50000" /> <param name="compact_format" value="false" /> <param name="summary" value="true" /> <param name="pad_output" value="true" /> <output name="xlsx_out" file="mm9_peaks3.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="mm9_peaks_per_feature3.out" /> <output name="features_per_peak_out" ftype="tabular" file="mm9_features_per_peak3.out" /> <output name="peaks_per_feature_summary" ftype="tabular" file="mm9_peaks_per_feature3.summary" /> <output name="features_per_peak_summary" ftype="tabular" file="mm9_features_per_peak3.summary" /> </test> </tests> <help> .. class:: infomark **What it does** Performs integrated analyses of a set of peaks (e.g. ChIP data) against a list of "canonical genes" for a specific organism and genome build, identifying the nearest peaks to each canonical gene (and vice versa). RnaChipIntegrator can be obtained from http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ ------------- .. class:: infomark **Input** The peak data must be in a tabular file with at least 3 columns of data for each peak (one peak per line): ====== ========== ================================= Column Name Description ====== ========== ================================= 1 chr Chromosome name 2 start Start position of the peak 3 end End position of the peak ====== ========== ================================= ------------- .. class:: infomark **Outputs** The key outputs from the tool are two lists compromising the nearest peaks for each gene, and the nearest gene for each peak (one dataset for each list). There are two formats for reporting: "compact" and "full": * **Compact output** reports all the hits for each peak or gene on a single line of output; * **Full output** reports each peak/gene pair on a separate line (i.e. a multi-line output format). In "full" output mode, additional options are available: * The output files can be "padded" with extra (empty) lines to ensure that there are always the same number of lines for each peak or gene, if fewer than the requested number of hits are found. * "Summary" datasets can also be requested, which include just the nearest peak reported for each gene (and vice versa). In either mode these data will also be output in a single MS Excel file, which contains one sheet per result set. .. class:: warning Using "compact" output with the number of hits limited to more than 4 peak/gene pairs (or with no limit at all) can result in a large number of columns in the output files, which in some versions of Galaxy will not be properly displayed. However the data files themselves should be okay. ------------- .. class:: informark **More information** It is recommended that you refer to the ``RnaChipIntegrator`` documentation for information on the contents of each output file: * http://rnachipintegrator.readthedocs.org/en/latest/ ------------- .. class:: infomark **Credits** This Galaxy tool has been developed within the Bioinformatics Core Facility at the University of Manchester. It runs the RnaChipIntegrator package which has also been developed by this group, and is documented at https://pypi.python.org/pypi/RnaChipIntegrator/ Please kindly acknowledge the Bioinformatics Core Facility if you use this tool. </help> <expand macro="citations" /> </tool>