Mercurial > repos > greg > vsnp_add_zero_coverage
comparison vsnp_add_zero_coverage.xml @ 0:3cb0bf7e1b2d draft
Uploaded
| author | greg |
|---|---|
| date | Tue, 21 Apr 2020 09:44:38 -0400 |
| parents | |
| children | 01312f8a6ca9 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3cb0bf7e1b2d |
|---|---|
| 1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="1.0.0"> | |
| 2 <description></description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.76">biopython</requirement> | |
| 5 <requirement type="package" version="1.16.5">numpy</requirement> | |
| 6 <requirement type="package" version="0.25.3">pandas</requirement> | |
| 7 <requirement type="package" version="0.15.4">pysam</requirement> | |
| 8 </requirements> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 #import os | |
| 11 #import re | |
| 12 #set input_type = $input_type_cond.input_type | |
| 13 #set input_bam_dir = 'input_bam_dir' | |
| 14 #set input_vcf_dir = 'input_vcf_dir' | |
| 15 #set output_vcf_dir = 'output_vcf_dir' | |
| 16 #set output_metrics_dir = 'output_metrics_dir' | |
| 17 mkdir -p $input_bam_dir && | |
| 18 mkdir -p $input_vcf_dir && | |
| 19 mkdir -p $output_vcf_dir && | |
| 20 mkdir -p $output_metrics_dir && | |
| 21 #if str($input_type) == "single": | |
| 22 #set bam_input = $input_type_cond.bam_input | |
| 23 #set file_name = $bam_input.file_name | |
| 24 #set file_name_base = $os.path.basename($file_name) | |
| 25 ln -s $file_name $input_bam_dir/$file_name_base && | |
| 26 #set vcf_input = $input_type_cond.vcf_input | |
| 27 #set file_name = $vcf_input.file_name | |
| 28 #set file_name_base = $os.path.basename($file_name) | |
| 29 ln -s $file_name $input_vcf_dir/$file_name_base && | |
| 30 #else: | |
| 31 #for $i in $input_type_cond.bam_input_collection: | |
| 32 #set filename = $i.file_name | |
| 33 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | |
| 34 ln -s $filename $input_bam_dir/$identifier && | |
| 35 #end for | |
| 36 #for $i in $input_type_cond.vcf_input_collection: | |
| 37 #set filename = $i.file_name | |
| 38 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | |
| 39 ln -s $filename $input_vcf_dir/$identifier && | |
| 40 #end for | |
| 41 #end if | |
| 42 python '$__tool_directory__/vsnp_add_zero_coverage.py' | |
| 43 --processes $processes | |
| 44 #if str($reference_cond.reference_source) == "cached" | |
| 45 --reference '$reference_cond.reference.fields.path' | |
| 46 #else: | |
| 47 --reference '$reference_cond.reference' | |
| 48 #end if | |
| 49 #if str($input_type) == "single": | |
| 50 --output_metrics '$output_metrics' | |
| 51 --output_vcf '$output_vcf' | |
| 52 #end if | |
| 53 ]]></command> | |
| 54 <inputs> | |
| 55 <conditional name="input_type_cond"> | |
| 56 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> | |
| 57 <option value="single" selected="true">Single files</option> | |
| 58 <option value="collection">Collections of files</option> | |
| 59 </param> | |
| 60 <when value="single"> | |
| 61 <param name="bam_input" type="data" format="bam" label="BAM file"> | |
| 62 <validator type="unspecified_build"/> | |
| 63 </param> | |
| 64 <param name="vcf_input" type="data" format="vcf" label="VCF file"> | |
| 65 <validator type="unspecified_build"/> | |
| 66 </param> | |
| 67 </when> | |
| 68 <when value="collection"> | |
| 69 <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files"> | |
| 70 <validator type="unspecified_build"/> | |
| 71 </param> | |
| 72 <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files"> | |
| 73 <validator type="unspecified_build"/> | |
| 74 </param> | |
| 75 </when> | |
| 76 </conditional> | |
| 77 <conditional name="reference_cond"> | |
| 78 <param name="reference_source" type="select" label="Choose the source for the reference genome"> | |
| 79 <option value="cached" selected="true">locally cached</option> | |
| 80 <option value="history">from history</option> | |
| 81 </param> | |
| 82 <when value="cached"> | |
| 83 <param name="reference" type="select" label="Using reference genome"> | |
| 84 <options from_data_table="fasta_indexes"/> | |
| 85 <!-- No <filter> tag here! --> | |
| 86 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/> | |
| 87 </param> | |
| 88 </when> | |
| 89 <when value="history"> | |
| 90 <param name="reference" type="data" format="fasta" label="Using reference genome"> | |
| 91 <validator type="no_options" message="The current history does not include a fasta dataset"/> | |
| 92 </param> | |
| 93 </when> | |
| 94 </conditional> | |
| 95 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/> | |
| 96 </inputs> | |
| 97 <outputs> | |
| 98 <data name="output_vcf" format="vcf" label="${tool.name} (filtered VCF) on ${on_string}"> | |
| 99 <filter>input_type_cond['input_type'] == 'single'</filter> | |
| 100 </data> | |
| 101 <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}"> | |
| 102 <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" /> | |
| 103 <filter>input_type_cond['input_type'] == 'collection'</filter> | |
| 104 </collection> | |
| 105 <data name="output_metrics" format="tabular" label="${tool.name} (metrics) on ${on_string}"> | |
| 106 <filter>input_type_cond['input_type'] == 'single'</filter> | |
| 107 </data> | |
| 108 <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}"> | |
| 109 <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" /> | |
| 110 <filter>input_type_cond['input_type'] == 'collection'</filter> | |
| 111 </collection> | |
| 112 </outputs> | |
| 113 <tests> | |
| 114 <test> | |
| 115 <param name="input_type" value="collection"/> | |
| 116 <param name="bam_input_collection"> | |
| 117 <collection type="list"> | |
| 118 <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/> | |
| 119 <element name="bam_input2.bam" value="bam_input2.bam" dbkey="89"/> | |
| 120 </collection> | |
| 121 </param> | |
| 122 <param name="vcf_input_collection"> | |
| 123 <collection type="list"> | |
| 124 <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/> | |
| 125 <element name="vcf_input2.vcf" value="vcf_input2.vcf" dbkey="89"/> | |
| 126 </collection> | |
| 127 </param> | |
| 128 <param name="reference_source" value="history"/> | |
| 129 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/> | |
| 130 <output_collection name="output_vcf_collection" type="list"> | |
| 131 <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/> | |
| 132 <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/> | |
| 133 </output_collection> | |
| 134 <output_collection name="output_metrics_collection" type="list"> | |
| 135 <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/> | |
| 136 <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/> | |
| 137 </output_collection> | |
| 138 </test> | |
| 139 <test> | |
| 140 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/> | |
| 141 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/> | |
| 142 <param name="reference_source" value="history"/> | |
| 143 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/> | |
| 144 <param name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/> | |
| 145 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/> | |
| 146 </test> | |
| 147 </tests> | |
| 148 <help> | |
| 149 **What it does** | |
| 150 | |
| 151 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each | |
| 152 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions | |
| 153 along the reference with no coverage. | |
| 154 | |
| 155 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome | |
| 156 coverage percentage. | |
| 157 | |
| 158 **Required Options** | |
| 159 | |
| 160 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option. | |
| 161 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history. | |
| 162 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time. | |
| 163 </help> | |
| 164 <citations> | |
| 165 <citation type="bibtex"> | |
| 166 @misc{None, | |
| 167 journal = {None}, | |
| 168 author = {1. Stuber T}, | |
| 169 title = {Manuscript in preparation}, | |
| 170 year = {None}, | |
| 171 url = {https://github.com/USDA-VS/vSNP},} | |
| 172 </citation> | |
| 173 </citations> | |
| 174 </tool> | |
| 175 |
