Repository 'fermi2'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/fermi2

Changeset 0:b59546214e63 (2017-01-05)
Next changeset 1:06d22a2d3c64 (2022-06-27)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fermikit commit 16dcfc0fb84fad80fcf18417ae46c5499c96147a
added:
all_fasta.loc.sample
fermi2.xml
test-data/aligned_contigs.bam
test-data/flt.vcf
test-data/small.fa.gz
test-data/sv.vcf
test-data/test.fastq.gz
test-data/unitigs.gz
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r b59546214e63 all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample Thu Jan 05 08:35:31 2017 -0500
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r b59546214e63 fermi2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fermi2.xml Thu Jan 05 08:35:31 2017 -0500
[
@@ -0,0 +1,65 @@
+<tool id="fermi2" name="fermi2" version="0.14.dev1">
+    <description>assembles Illumina reads into unitigs</description>
+    <requirements>
+        <requirement type="package" version="r193">fermi2</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        fermi2.pl unitig
+        -s$genome_size
+        -t\${GALAXY_SLOTS:-4}
+        -l$readlength
+        -p prefix "cat
+        #for fastq in $input1:
+            '$fastq'
+        #end for
+        "
+        -T $T
+        $two_pass_error
+        $E
+        > prefix.mak &&
+        make -f prefix.mak
+    ]]></command>
+    <inputs>
+        <param type="data" multiple="true" name="input1" format="fastqsanger,fastqsanger.gz"/>
+        <param argument="-l" name="readlength" type="integer" label="primary read length" value="101" min="70"/>
+        <param argument="-s" name="genome_size" type="integer" value="180000" min="1" label="approximate genome size in kilobases" help="Enter approximate genome size in kilobases. For a human genome of 3.2 gigabases enter 3200000"/>
+        <param argument="-T" type="integer" value="61" label="use INT-mer for post-trimming/filtering" min="10"/>
+        <param argument="-2" name="two_pass_error" type="boolean" checked="false" truevalue="-2" falsevalue="" label="Use 2-pass error correction"/>
+        <param argument="-E" type="boolean" checked="false" truevalue="-E" falsevalue="" label="Do not apply error correction"/>
+    </inputs>
+    <outputs>
+        <data name="unitigs" format="fastqsanger.gz" from_work_dir="prefix.mag.gz"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="test.fastq.gz,test.fastq.gz"/>
+            <param name="readlength" value="150"/>
+            <param name="genome_size" value="1"/>
+            <output name="unitigs" file="unitigs.gz" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+fermi2 can assemble reads into unitigs.
+Unitig output can be further analysed by alignment to a reference genome using bwa-mem,
+and based on the alignment variants can be called using the fermi-variants tool.
+
+::
+
+  Usage:   fermi2.pl unitig [options] <in.fq>
+
+  Options: -p STR    output prefix [fmdef]
+           -s STR    approximate genome size [100m]
+           -2        2-pass error correction
+           -l INT    primary read length [101]
+           -T INT    use INT-mer for post-trimming/filtering [61]
+           -k INT    min overlap length during unitig construction [based on -l]
+           -o INT    min overlap length during graph cleaning [based on -l]
+           -m INT    min overlap length for unambiguous merging [based on -l]
+           -t INT    number of threads [4]
+           -E        don't apply error correction
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv440</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r b59546214e63 test-data/aligned_contigs.bam
b
Binary file test-data/aligned_contigs.bam has changed
b
diff -r 000000000000 -r b59546214e63 test-data/flt.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/flt.vcf Thu Jan 05 08:35:31 2017 -0500
b
@@ -0,0 +1,49 @@
+##fileformat=VCFv4.1
+##source=htsbox-pileup-r327
+##reference=/tmp/tmpIcvwsb/files/000/dataset_2.dat
+##contig=<ID=11_1910000_1940000,length=30001>
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##INFO=<ID=_DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=_DS,Number=1,Type=Integer,Description="min{alt_DP_on_forward, alt_DP_on_reverse}">
+##INFO=<ID=_AB,Number=1,Type=Integer,Description="Percentage of non-reference reads">
+##INFO=<ID=_FS,Number=1,Type=Integer,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##FILTER=<ID=DPhigh,Description="High read depth: _DP>62.82">
+##FILTER=<ID=DPlow,Description="Low read depth: _DP<3">
+##FILTER=<ID=FShigh,Description="Large Fisher-Strand bias: _FS>30">
+##FILTER=<ID=ABlow,Description="Low fraction of non-reference reads: _AB<30 at SNPs or _AB<30 at INDELs">
+##FILTER=<ID=DSlow,Description="Low double-strand support at SNPs: _DS<1">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT aligned_contigs_bam
+11_1910000_1940000 636 . c T 25 . _DP=39;_AB=64 GT:AD 1/0:14,25
+11_1910000_1940000 708 . t C 40 . _DP=40;_AB=100 GT:AD 1/1:0,40
+11_1910000_1940000 1116 . t C 43 . _DP=43;_AB=100 GT:AD 1/1:0,43
+11_1910000_1940000 1891 . t C 44 . _DP=44;_AB=100 GT:AD 1/1:0,44
+11_1910000_1940000 2297 . a G 43 . _DP=43;_AB=100 GT:AD 1/1:0,43
+11_1910000_1940000 2727 . a G 42 . _DP=42;_AB=100 GT:AD 1/1:0,42
+11_1910000_1940000 3378 . a G 50 . _DP=50;_AB=100 GT:AD 1/1:0,50
+11_1910000_1940000 4140 . c T 21 . _DP=36;_AB=58 GT:AD 1/0:15,21
+11_1910000_1940000 4820 . g A 35 . _DP=35;_AB=100 GT:AD 1/1:0,35
+11_1910000_1940000 4860 . t G 36 . _DP=36;_AB=100 GT:AD 1/1:0,36
+11_1910000_1940000 4976 . g A 33 . _DP=33;_AB=100 GT:AD 1/1:0,33
+11_1910000_1940000 5455 . a AGT 12 . _DP=25;_AB=48 GT:AD 0/1:13,12
+11_1910000_1940000 5559 . g T 39 . _DP=39;_AB=100 GT:AD 1/1:0,39
+11_1910000_1940000 6369 . ct C 8 ABlow _DP=42;_AB=19 GT:AD 0/1:34,8
+11_1910000_1940000 6654 . g A 43 . _DP=43;_AB=100 GT:AD 1/1:0,43
+11_1910000_1940000 7873 . g A 45 . _DP=45;_AB=100 GT:AD 1/1:0,45
+11_1910000_1940000 8084 . t C 54 . _DP=54;_AB=100 GT:AD 1/1:0,54
+11_1910000_1940000 10894 . t G 40 . _DP=40;_AB=100 GT:AD 1/1:0,40
+11_1910000_1940000 12259 . a G 35 . _DP=35;_AB=100 GT:AD 1/1:0,35
+11_1910000_1940000 15695 . g A 36 . _DP=36;_AB=100 GT:AD 1/1:0,36
+11_1910000_1940000 16353 . ctt C 14 . _DP=14;_AB=100 GT:AD 1/1:0,14
+11_1910000_1940000 20714 . c T 24 . _DP=42;_AB=57 GT:AD 1/0:18,24
+11_1910000_1940000 24531 . t G 32 . _DP=32;_AB=100 GT:AD 1/1:0,32
+11_1910000_1940000 24546 . gt G 25 . _DP=25;_AB=100 GT:AD 1/1:0,25
+11_1910000_1940000 25710 . t TTG 17 . _DP=46;_AB=37 GT:AD 0/1:29,17
+11_1910000_1940000 25813 . c CTG,CTGTG 13 . _DP=33;_AB=70 GT:AD 1/0:10,13,10
+11_1910000_1940000 26084 . ctg C,CTGTGTG 17 . _DP=36;_AB=75 GT:AD 1/2:9,17,10
+11_1910000_1940000 26212 . g A 29 . _DP=55;_AB=53 GT:AD 1/0:26,29
+11_1910000_1940000 26370 . c CTG,CTGTGTG 12 . _DP=34;_AB=68 GT:AD 1/0:11,12,11
+11_1910000_1940000 26521 . a G 30 . _DP=30;_AB=100 GT:AD 1/1:0,30
+11_1910000_1940000 26713 . a G 32 . _DP=32;_AB=100 GT:AD 1/1:0,32
+11_1910000_1940000 27735 . tc T 41 . _DP=41;_AB=100 GT:AD 1/1:0,41
+11_1910000_1940000 28524 . c A 18 . _DP=38;_AB=47 GT:AD 0/1:20,18
b
diff -r 000000000000 -r b59546214e63 test-data/small.fa.gz
b
Binary file test-data/small.fa.gz has changed
b
diff -r 000000000000 -r b59546214e63 test-data/sv.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sv.vcf Thu Jan 05 08:35:31 2017 -0500
b
@@ -0,0 +1,16 @@
+##fileformat=VCFv4.1
+##source=htsbox-abreak-r327
+##reference=/tmp/tmpIcvwsb/files/000/dataset_2.dat
+##contig=<ID=11_1910000_1940000,length=30001>
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=INS,Description="Insertion">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="SV length">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant">
+##INFO=<ID=QGAP,Number=1,Type=Integer,Description="Length of gap on the query sequence">
+##INFO=<ID=MINMAPQ,Number=1,Type=Integer,Description="Min flanking mapping quality">
+##INFO=<ID=MINSC,Number=1,Type=Integer,Description="Min flanking alignment score">
+##INFO=<ID=MINTIPQ,Number=1,Type=Integer,Description="Min quality/depth flanking the break point">
+##FILTER=<ID=LowSupp,Description="MINTIPQ < 10">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+11_1910000_1940000 5276 . C <DEL> 30 . SVTYPE=DEL;END=26956;SVLEN=21678;QGAP=3;MINMAPQ=60;MINSC=793;MINTIPQ=23
b
diff -r 000000000000 -r b59546214e63 test-data/test.fastq.gz
b
Binary file test-data/test.fastq.gz has changed
b
diff -r 000000000000 -r b59546214e63 test-data/unitigs.gz
b
Binary file test-data/unitigs.gz has changed
b
diff -r 000000000000 -r b59546214e63 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Jan 05 08:35:31 2017 -0500
b
@@ -0,0 +1,9 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
+