Galaxy |

Changeset 0:78df8fc2b3ab (2023-01-11)

Next changeset 1:d9965e143053 (2023-09-21)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ucsc-tools/fatovcf commit 16910aa7b33e5ff73430be1ca9d7727f4ea5786a

added:
fatovcf.xml
test-data/excl.txt
test-data/input.fa
test-data/mask.vcf
test-data/out1.vcf
test-data/out2.vcf

diff -r 000000000000 -r 78df8fc2b3ab fatovcf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fatovcf.xml Wed Jan 11 13:14:29 2023 +0000

[

b'@@ -0,0 +1,143 @@\n+<tool id="fatovcf" name="faToVcf" version="@TOOL_VERSION@+galaxy0" profile="21.05" license="MIT">\n+ <description>\n+ Convert a FASTA alignment file to Variant Call Format (VCF) single-nucleotide diffs\n+ </description>\n+ <macros>\n+ <token name="@TOOL_VERSION@">426</token>\n+ </macros>\n+ <xrefs>\n+ <xref type="bio.tools">UCSC_Genome_Browser_Utilities</xref>\n+ </xrefs>\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">ucsc-fatovcf</requirement>\n+ </requirements>\n+ <version_command><![CDATA[ echo "@TOOL_VERSION@" ]]></version_command>\n+ <command detect_errors="exit_code"><![CDATA[\n+ #if $in_fasta\n+ ln -s \'$in_fasta\' in.fa &&\n+ #end if\n+ faToVcf\n+ in.fa\n+ \'$out\'\n+ #if $ref_seq.refSeq == "customRef"\n+ -ref=$ref_seq.ref\n+ #end if\n+ $ambiguous\n+ #if $excl_seq.excludeFile\n+ -excludeFile=\'$excl_seq.excludeFile\'\n+ #end if\n+ -maxDiff=$excl_seq.maxDiff\n+ #if $mask_sites.maskSites\n+ -maskSites=\'$mask_sites.maskSites\'\n+ #end if\n+ #if $mask_sites.windowSize > 0\n+ -windowSize=$mask_sites.windowSize\n+ -minAmbigInWindow=$mask_sites.minAmbigInWindow\n+ #end if\n+ $includeNoAltN\n+ -minAc=$minAc\n+ -minAf=$minAf\n+ #if $output.startOffset > 0\n+ -startOffset=$output.startOffset\n+ #end if\n+ $output.includeRef\n+ $output.noGenotypes\n+ #if $output.vcfChrom\n+ -vcfChrom=\'$output.vcfChrom\'\n+ #end if\n+ ]]></command>\n+ <inputs>\n+ <param name="in_fasta" format="fasta" type="data" label="FASTA Alignment" help="Must contain a series of sequences with different names and the same length. Both N and - are treated as missing information." />\n+ \n+ <conditional name="ref_seq">\n+ <param name="refSeq" type="select" label="Determine reference sequence" help="Which sequence from the FASTA file should be used as the reference sequence.">\n+ <option value="" selected="true">Use the first sequence as reference</option>\n+ <option value="customRef">Use a different sequence as reference</option>\n+ </param>\n+ <when value="customRef">\n+ <param argument="-ref" type="text" label="Name of sequence that should be used as reference sequence:" help="Must be present in the FASTA file." />\n+ </when>\n+ <when value="" />\n+ </conditional>\n+\n+ <param name="ambiguous" type="select" label="Treat ambiguous bases" help="If 1: Treat ambiguous bases as N, ambiguous bases (N, R, V etc.) are treated as N (no call). If 2: Resolve ambiguous characters, if the character represents two bases and one is the reference base, convert it to the non-reference base. Otherwise convert it to N. Default: 0: Don\'t treat ambiguous bases">\n+ <option value="" selected="true">0: Don\'t treat ambiguous bases</option>\n+ <option value="-ambiguousToN">1: Treat ambiguous bases as N (no call)</option>\n+ <option value="-resolveAmbiguous">2: Resolve ambiguous characters (convert)</option>\n+ </param>\n+\n+ <section name="excl_seq" title="Exclude sequences" expanded="true">\n+ <param argument="-excludeFile" format="txt" type="data" optional="true" label="Exclude sequences from text file" help="Exclude sequences named in file which has one sequence name per line." />\n+ <param argument="-maxDiff" type="integer" min="0" value="0" label="Maximum number of mismatches" help="Exclude sequences with more than N mismatches with the reference sequence. If -windowSize is used, sequences are masked accordingly before the mismatches are counted. Default: 0" />\n+ </section>\n+\n+ <section name="mask_sites" title="Mask sites" expanded="true">\n+ <param argument="-maskSites" format="vcf" type'..b'minAmbigWindow bases in a window of +-N bases around the base. Masking approach adapted from https://github.com/roblanf/sarscov2phylo/blob/master/scripts/mask_seq.py Use -windowSize=7 for same results. Default: 0" />\n+ <param argument="-minAmbigInWindow" type="integer" min="1" value="2" label="Minimum of ambiguous characters within the window given above" help="When -windowSize is provided, mask any base for which there are at least this many N, ambiguous or gap characters within the window. Default: 2" />\n+ </section>\n+ \n+ <param argument="-includeNoAltN" type="boolean" truevalue="-includeNoAltN" falsevalue="" label="Include positions without defined ALT allele" help="Include base positions with no alternate alleles observed, but at least one N (missing base/no-call). Default: false" />\n+ <param argument="-minAc" type="integer" min="0" value="0" label="Minimum allele count" help="Ignore alternate alleles observed fewer than N times. Default: 0" />\n+ <param argument="-minAf" type="float" min="0.0" max="1.0" value="0.0" label="Minimum allele frequency" help="Ignore alternate alleles observed in less than F of non-N bases. Default: 0.0" />\n+\n+ <section name="output" title="Output VCF options" expanded="true">\n+ <param argument="-startOffset" type="integer" min="0" value="0" label="Start offset" help="Add N bases to each position, e.g. for trimmed alignments. Default: 0" />\n+ <param argument="-includeRef" type="boolean" truevalue="-includeRef" falsevalue="" label="Include the reference in the genotype columns" help="Default: omitted as redundant (false)" />\n+ <param argument="-noGenotypes" type="boolean" truevalue="-noGenotypes" falsevalue="" label="Output 8-column VCF" help="VCF without the sample genotype columns. Default: false" />\n+ <param argument="-vcfChrom" type="text" optional="true" label="Use this sequence for the CHROM column in the VCF" help="Default: name of the reference sequence." />\n+ </section>\n+ </inputs>\n+ <outputs>\n+ <data name="out" format="vcf" />\n+ </outputs>\n+ <tests>\n+ <test expect_num_outputs="1"> \n+ <param name="in_fasta" value="input.fa" />\n+ <output name="out" ftype="vcf" file="out1.vcf" lines_diff="4" />\n+ </test>\n+ <test expect_num_outputs="1"> \n+ <param name="in_fasta" value="input.fa" />\n+\n+ <conditional name="ref_seq">\n+ <param name="refSeq" value="customRef" />\n+ <param name="ref" value="sample3" />\n+ </conditional>\n+\n+ <param name="ambiguous" value="-ambiguousToN" />\n+\n+ <section name="excl_seq">\n+ <param name="excludeFile" value="excl.txt" />\n+ <param name="maxDiff" value="3" />\n+ </section>\n+\n+ <section name="mask_sites">\n+ <param name="maskSites" value="mask.vcf" />\n+ <param name="windowSize" value="7" />\n+ <param name="minAmbigInWindow" value="3" />\n+ </section>\n+\n+ <param name="includeNoAltN" value="true" />\n+ <param name="minAc" value="1" />\n+ <param name="minAf" value="0.1" />\n+\n+ <section name="output">\n+ <param name="startOffset" value="1" />\n+ <param name="includeRef" value="true" />\n+ <param name="noGenotypes" value="true" />\n+ <param name="vcfChrom" value="sample1" />\n+ </section>\n+\n+ <output name="out" ftype="vcf" file="out2.vcf" lines_diff="4" />\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+**What it does**\n+\n+`faToVcf`_ is a tool to extract a VCF from a multi-sequence FASTA alignment.\n+\n+.. _faToVcf: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/FOOTER.txt \n+ ]]> </help>\n+ <citations>\n+ </citations>\n+</tool>\n'

diff -r 000000000000 -r 78df8fc2b3ab test-data/excl.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/excl.txt Wed Jan 11 13:14:29 2023 +0000

@@ -0,0 +1,1 @@
+sample2

diff -r 000000000000 -r 78df8fc2b3ab test-data/input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa Wed Jan 11 13:14:29 2023 +0000

@@ -0,0 +1,6 @@
+>sample1
+ANRGACACAGTCAC
+>sample2
+ARNGACAC----AC
+>sample3
+ARRAACGCATTCAN

diff -r 000000000000 -r 78df8fc2b3ab test-data/mask.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mask.vcf Wed Jan 11 13:14:29 2023 +0000

@@ -0,0 +1,7 @@
+##fileformat=VCFv4.3
+##INFO=<ID=GENE,Number=1,Type=String,Description="Position falls into range of this gene">
+##INFO=<ID=AA_POS,Number=1,Type=Integer,Description="Position of amino acid residue within gene">
+##INFO=<ID=AA_REF,Number=1,Type=String,Description="Reference amino acid residue">
+##INFO=<ID=AA_ALT,Number=.,Type=String,Description="List of alternative amino acid residues (IUPAC ambiguity code)">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+sample3 3 . T . . mask SUB=NDM;EXC=seq_end

diff -r 000000000000 -r 78df8fc2b3ab test-data/out1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out1.vcf Wed Jan 11 13:14:29 2023 +0000

@@ -0,0 +1,7 @@
+##fileformat=VCFv4.2
+##reference=test-data/input.fa:sample1
+##source=faToVcf test-data/input.fa test-data/out1.vcf
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample2 sample3
+sample1 4 G4A G A . . AC=1;AN=2 GT 0 1
+sample1 7 A7G A G . . AC=1;AN=2 GT 0 1
+sample1 10 G10T G T . . AC=1;AN=1 GT . 1

diff -r 000000000000 -r 78df8fc2b3ab test-data/out2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out2.vcf Wed Jan 11 13:14:29 2023 +0000

@@ -0,0 +1,8 @@
+##fileformat=VCFv4.2
+##reference=test-data/input.fa:sample3
+##source=faToVcf test-data/input.fa out-test.vcf
+#CHROM POS ID REF ALT QUAL FILTER INFO
+sample1 3 R3R R * . . AC=0;AN=0
+sample1 5 A5G A G . . AC=1;AN=2
+sample1 8 G8A G A . . AC=1;AN=2
+sample1 11 T11G T G . . AC=1;AN=2