Repository 'annotatemyids'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/annotatemyids

Changeset 2:9ab35a2245ce (2018-03-11)
Previous changeset 1:2e1b256f732f (2018-01-03) Next changeset 3:1a125daea0d8 (2018-11-26)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/annotatemyids commit 348750c48bc5e4f9ecad408519cdd2c68ac749ab
modified:
annotateMyIDs.xml
added:
test-data/out_gokegg_dupsrem.tab
b
diff -r 2e1b256f732f -r 9ab35a2245ce annotateMyIDs.xml
--- a/annotateMyIDs.xml Wed Jan 03 16:01:27 2018 -0500
+++ b/annotateMyIDs.xml Sun Mar 11 05:17:23 2018 -0400
[
@@ -1,4 +1,4 @@
-<tool id="annotatemyids" name="annotateMyIDs" version="3.5.0.0">
+<tool id="annotatemyids" name="annotateMyIDs" version="3.5.0.1">
     <description>annotate a generic set of identifiers</description>
     <requirements>
         <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement>
@@ -7,7 +7,7 @@
         <requirement type="package" version="3.5.0">bioconductor-org.dr.eg.db</requirement>
     </requirements>
     <version_command><![CDATA[
-echo $(R --version | grep version | grep -v GNU)", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dr.eg.db version" $(R --vanilla --slave -e "library(org.Dr.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dr.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dm.eg.db version" $(R --vanilla --slave -e "library(org.Dm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+echo $(R --version | grep version | grep -v GNU)", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dr.eg.db version" $(R --vanilla --slave -e "library(org.Dr.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dr.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dm.eg.db version" $(R --vanilla --slave -e "library(org.Dm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
     <command detect_errors="exit_code"><![CDATA[
         #if $rscriptOpt:
@@ -27,6 +27,7 @@
 organism <- "${organism}"
 output_cols <- "${output_cols}"
 file_has_header <- ${file_has_header}
+remove_dups <- ${remove_dups}
 
 ids <- as.character(read.table('$id_file', header=file_has_header)[,1])
 
@@ -48,6 +49,11 @@
 
 cols <- unlist(strsplit(output_cols, ","))
 result <- select(db, keys=ids, keytype=id_type, columns=cols)
+
+if(remove_dups) {
+    result <- result[!duplicated(result$${id_type}),]
+}
+
 write.table(result, file='$out_tab', sep="\t", row.names=FALSE, quote=FALSE)
 
     ]]></configfile>
@@ -87,6 +93,7 @@
             <option value="ONTOLOGY">ONTOLOGY</option>
             <option value="PATH">KEGG</option>
         </param>
+        <param name="remove_dups" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Remove duplicates?" help="If this option is set to Yes, only the first occurrence of each input Gene ID will be kept. Default: No" />
         <param name="rscriptOpt" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used to annotate the IDs will be provided as a text file in the output. Default: No" />
     </inputs>
     <outputs>
@@ -118,7 +125,16 @@
             <param name="id_type" value="ENSEMBL"/>
             <param name="organism" value="Hs"/>
             <param name="output_cols" value="ENSEMBL,GO,ONTOLOGY,EVIDENCE" />
-            <output name="out_tab" file="out_gokegg.tab" compare="contains" />
+            <output name="out_tab" file="out_gokegg.tab" />
+        </test>
+        <!-- Ensure duplicate Gene ID removal works -->
+        <test expect_num_outputs="1">
+            <param name="id_file" value="ensembl_ids.tab" ftype="tabular"/>
+            <param name="id_type" value="ENSEMBL"/>
+            <param name="organism" value="Hs"/>
+            <param name="output_cols" value="ENSEMBL,GO,ONTOLOGY,EVIDENCE" />
+            <param name="remove_dups" value="True" />
+            <output name="out_tab" file="out_gokegg_dupsrem.tab" />
         </test>
     </tests>
     <help><![CDATA[
b
diff -r 2e1b256f732f -r 9ab35a2245ce test-data/out_gokegg_dupsrem.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_gokegg_dupsrem.tab Sun Mar 11 05:17:23 2018 -0400
b
@@ -0,0 +1,9 @@
+ENSEMBL GO ONTOLOGY EVIDENCE
+ENSG00000091831 GO:0000122 BP IMP
+ENSG00000082175 GO:0000978 MF IDA
+ENSG00000141736 GO:0000165 BP TAS
+ENSG00000012048 GO:0000151 CC NAS
+ENSG00000139618 GO:0000722 BP IEA
+ENSG00000129514 GO:0000122 BP IEA
+ENSG00000171862 GO:0000079 BP TAS
+ENSG00000141510 GO:0000122 BP IBA