diff recover_samples_discarded_by_subsample.xml @ 0:607c5e7e0a64 draft

planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/mycrobiota commit 1c4c58018b64ff3531a719e789ce71cb0a1244c5
author erasmus-medical-center
date Wed, 13 Dec 2017 10:09:50 -0500
parents
children e93e39c121b1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/recover_samples_discarded_by_subsample.xml	Wed Dec 13 10:09:50 2017 -0500
@@ -0,0 +1,63 @@
+<tool id="mycrobiota_subsample_add_discarded_samples" name="Recover samples" version="0.1" profile="16.07">
+    <description> discarded by sub.sample</description>
+    <requirements>
+        <requirement type="package" version="1.36.1">mothur</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        ln -s "$in_fasta" fasta.dat &&
+        ln -s "$in_group" group.dat &&
+        ln -s "$in_fasta_subsampled" fasta2.dat &&
+        ln -s "$in_group_subsampled" group2.dat
+
+        ## mothur count.groups on in_fasta
+        && echo 'count.groups(group=group.dat)' | sed 's/ //g' | mothur
+
+        ## get group names with fewer than threshold reads and make a dash-separated list
+        && samples=`python -c "print('-'.join([g[0] for g in [ l.strip().split('\t') for l in open('group.count.summary').readlines() ] if int(g[1]) < $threshold]))"`
+
+        ## get.groups on in_fasta with this list of groups, if list not empty, otherwise create empty file
+        &&
+        if [ -z "\$samples"];
+            then
+                touch fasta.pick.dat;
+                touch group.pick.dat;
+            else
+                echo "get.groups(fasta=fasta.dat, group=group.dat, groups=\$samples)" | sed 's/ //g' | mothur;
+        fi
+
+        ## merge selected reads (fasta.pick.dat) with the fasta file from after sub.sample
+        && echo "merge.files(input=fasta2.dat-fasta.pick.dat, output=final_fasta)" | sed 's/ //g' | mothur
+
+        ## merge group files
+        && echo "merge.files(input=group2.dat-group.pick.dat, output=final_group)" | sed 's/ //g' | mothur
+
+    ]]></command>
+    <inputs>
+        <param name="in_fasta" type="data" format="fasta" label="Fasta before subsample"/>
+        <param name="in_fasta_subsampled" type="data" format="fasta" label="Fasta after subsample"/>
+        <param name="in_group" type="data" format="mothur.groups" label="Group file before subsample"/>
+        <param name="in_group_subsampled" type="data" format="mothur.groups" label="Group file after subsample"/>
+        <param name="threshold" type="integer" value="" min="0" label="Subsample level - cutoff value used in the subsampling" help="any samples with fewer reads than this value would have been discarded by sub.sample, but we want to add them back in" />
+    </inputs>
+    <outputs>
+        <data name="out_fasta" format="fasta" from_work_dir="final_fasta" label="${tool.name} on ${on_string}: fasta"/>
+        <data name="out_group" format="mothur.groups" from_work_dir="final_group" label="${tool.name} on ${on_string}: group"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="in_fasta" value="fasta_before_subsample_small.fasta" ftype="fasta"/>
+            <param name="in_fasta_subsampled" value="fasta_after_subsample_small.fasta" ftype="fasta"/>
+            <param name="in_group" value="groups_before_subsample_small.groups" ftype="mothur.groups"/>
+            <param name="in_group_subsampled" value="groups_after_subsample_small.groups" ftype="mothur.groups"/>
+            <param name="threshold" value="3"/>
+            <output name="out_fasta" file="recovered.fasta"/>
+            <output name="out_group" file="recovered.groups"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+filter fasta file by group based on number of sequences in the group.
+    ]]></help>
+    <citations>
+    </citations>
+</tool>