Mercurial > repos > pjbriggs > rnachipintegrator
view rnachipintegrator_wrapper.xml @ 4:b695071de766 draft
Uploaded version 1.0.3.1 (switch to conda dependency resolution)
author | pjbriggs |
---|---|
date | Fri, 09 Mar 2018 05:07:31 -0500 |
parents | dc498b03ca9a |
children | 466c68008537 |
line wrap: on
line source
<?xml version="1.0" encoding="utf-8"?> <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@"> <description>Integrated analysis of 'gene' and 'peak' data</description> <macros> <import>rnachipintegrator_macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="version_command" /> <command interpreter="bash"><![CDATA[ rnachipintegrator_wrapper.sh #if $peaks_in.metadata.chromCol --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol} #end if #if str( $cutoff ) != "" --cutoff=$cutoff #else --cutoff=0 #end if #if str( $number ) != "" --number=$number #end if --promoter_region=$promoter_start,$promoter_end --edge=$edge $diff_expressed_only --xlsx_file "$xlsx_out" --output_files "$peaks_per_feature_out" "$features_per_peak_out" #if $output.compact_format --compact #else #if $output.summary --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary" #end if ${output.pad_output} #end if "$features_in" "$peaks_in" ]]></command> <inputs> <param format="tabular" name="features_in" type="data" label="Genes/genomic features" /> <param format="tabular" name="peaks_in" type="data" label="Peaks/regions" /> <expand macro="analysis_options" /> <param name="diff_expressed_only" type="boolean" truevalue="--only-DE" falsevalue="" checked="false" label="Only consider genes which are flagged as differentially expressed" help="NB input feature data must include differential expression flags (--only-DE)" /> <expand macro="output_options" /> </inputs> <outputs> <!-- Always produce XLSX output --> <data format="xlsx" name="xlsx_out" label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" /> <data format="tabular" name="peaks_per_feature_out" label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" /> <data format="tabular" name="features_per_peak_out" label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" /> <data format="tabular" name="peaks_per_feature_summary" label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" > <filter>output['compact_format'] is False</filter> <filter>output['summary'] is True</filter> </data> <data format="tabular" name="features_per_peak_summary" label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" > <filter>output['compact_format'] is False</filter> <filter>output['summary'] is True</filter> </data> </outputs> <tests> <!-- RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt --> <test> <param name="features_in" value="features.txt" ftype="tabular" /> <param name="peaks_in" value="summits.txt" ftype="tabular" /> <param name="cutoff" value="130000" /> <param name="promoter_start" value="-10000" /> <param name="promoter_end" value="2500" /> <output name="xlsx_out" file="summits.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="summits_per_feature.out" /> <output name="features_per_peak_out" ftype="tabular" file="features_per_summit.out" /> </test> <!-- RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt --> <test> <param name="features_in" value="features.txt" ftype="tabular" /> <param name="peaks_in" value="peaks.txt" ftype="tabular" /> <param name="cutoff" value="130000" /> <param name="promoter_start" value="-10000" /> <param name="promoter_end" value="2500" /> <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="peaks_per_feature1.out" /> <output name="features_per_peak_out" ftype="tabular" file="features_per_peak1.out" /> </test> <!-- RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt --> <test> <param name="features_in" value="features.txt" ftype="tabular" /> <param name="peaks_in" value="peaks.txt" ftype="tabular" /> <param name="cutoff" value="130000" /> <param name="compact_format" value="false" /> <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="peaks_per_feature2.out" /> <output name="features_per_peak_out" ftype="tabular" file="features_per_peak2.out" /> </test> <!-- RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt --> <test> <param name="features_in" value="features.txt" ftype="tabular" /> <param name="peaks_in" value="peaks.txt" ftype="tabular" /> <param name="cutoff" value="130000" /> <param name="diff_expressed_only" value="true" /> <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="peaks_per_feature3.out" /> <output name="features_per_peak_out" ftype="tabular" file="features_per_peak3.out" /> </test> <!-- RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt --> <test> <param name="features_in" value="features.txt" ftype="tabular" /> <param name="peaks_in" value="peaks.txt" ftype="tabular" /> <param name="cutoff" value="130000" /> <param name="compact_format" value="false" /> <param name="summary" value="true" /> <param name="pad_output" value="true" /> <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="peaks_per_feature4.out" /> <output name="features_per_peak_out" ftype="tabular" file="features_per_peak4.out" /> <output name="peaks_per_feature_summary" ftype="tabular" file="peaks_per_feature4.summary" /> <output name="features_per_peak_summary" ftype="tabular" file="features_per_peak4.summary" /> </test> <!-- RnaChipIntegrator +name=test +cutoff=0 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt --> <test> <param name="features_in" value="features.txt" ftype="tabular" /> <param name="peaks_in" value="peaks.txt" ftype="tabular" /> <param name="cutoff" value="" /> <param name="compact_format" value="false" /> <param name="summary" value="true" /> <param name="pad_output" value="true" /> <output name="xlsx_out" file="peaks6.xlsx" compare="sim_size" /> <output name="peaks_per_feature_out" ftype="tabular" file="peaks_per_feature6.out" /> <output name="features_per_peak_out" ftype="tabular" file="features_per_peak6.out" /> <output name="peaks_per_feature_summary" ftype="tabular" file="peaks_per_feature6.summary" /> <output name="features_per_peak_summary" ftype="tabular" file="features_per_peak6.summary" /> </test> </tests> <help> .. class:: infomark **What it does** Performs integrated analyses of genes (or other genomic feature data) gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to each feature and vice versa. The program was originally written specifically for ChIP-Seq and RNA-Seq data but works equally well for ChIP-chip and microarray expression data, and can also be used to integrate any set of genomic features (e.g. canonical genes, CpG islands) with expression data. RnaChipIntegrator can be obtained from https://pypi.python.org/pypi/RnaChipIntegrator/ ------------- .. class:: infomark **Input** The gene data must be in a tabular file with the following columns of data for each gene or genomic feature (one gene per line): ====== ========== ====================================================================== Column Name Description ====== ========== ====================================================================== 1 ID Name used to identify the gene in the output 2 chr Chromosome name 3 start Start position of the gene 4 end End position of the gene 5 strand Must be either '+' or '-' 6 diff_expr Optional: indicates gene is differentially expressed (1) or not (0) ====== ========== ====================================================================== The peak data must be in a tabular file with at least 3 columns of data for each peak (one peak per line): ====== ========== ================================= Column Name Description ====== ========== ================================= 1 chr Chromosome name 2 start Start position of the peak 3 end End position of the peak ====== ========== ================================= If peak data is in ``bed`` format then the tool will automatically assign the correct columns, otherwise the first three columns of data will be used. ------------- .. class:: infomark **Outputs** The key outputs from the tool are two lists compromising the nearest peaks for each gene, and the nearest gene for each peak (one dataset for each list). There are two formats for reporting: "compact" and "full": * **Compact output** reports all the hits for each peak or gene on a single line of output; * **Full output** reports each peak/gene pair on a separate line (i.e. a multi-line output format). In "full" output mode, additional options are available: * The output files can be "padded" with extra (empty) lines to ensure that there are always the same number of lines for each peak or gene, if fewer than the requested number of hits are found. * "Summary" datasets can also be requested, which include just the nearest peak reported for each gene (and vice versa). In either mode these data will also be output in a single MS Excel file, which contains one sheet per result set. .. class:: warning Using "compact" output with the number of hits limited to more than 4 peak/gene pairs (or with no limit at all) can result in a large number of columns in the output files, which in some versions of Galaxy will not be properly displayed. However the data files themselves should be okay. ------------- .. class:: informark **More information** It is recommended that you refer to the ``RnaChipIntegrator`` documentation for information on the contents of each output file: * http://rnachipintegrator.readthedocs.org/en/latest/ ------------- .. class:: infomark **Credits** This Galaxy tool has been developed within the Bioinformatics Core Facility at the University of Manchester. It runs the RnaChipIntegrator package which has also been developed by this group, and is documented at http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ Please kindly acknowledge the Bioinformatics Core Facility if you use this tool. </help> <expand macro="citations" /> </tool>