Next changeset 1:a175c35ccdce (2024-10-05) |
Commit message:
planemo upload for repository https://github.com/veg/hivclustering/ commit 7d666f963da2c5e3b17c313526cc6169f3242c3c |
added:
hivclustering.xml macros.xml test-data/hivclustering-in1.csv test-data/hivclustering-in2.csv test-data/hivclustering-out1.json test-data/hivclustering-out2.json |
b |
diff -r 000000000000 -r ef842bb94d1f hivclustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hivclustering.xml Thu Jun 14 07:04:38 2018 -0400 |
[ |
@@ -0,0 +1,123 @@ +<?xml version="1.0"?> +<tool id="hivclustering" name="Make inferences" version="@VERSION@.0"> + <description>on HIV-1 transmission networks using HIVClustering</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + hivnetworkcsv --input '$input' $cluster $multiple_edges $singletons $centralities + --threshold $threshold --exclude $exclude --triangles $triangles --format $format + #if $contamination.contaminants != 'ignore': + --contaminants $contamination.contaminants + --contaminant-file '$contamination.contaminant_file' + #end if + #if $edges.edge_filtering != 'ignore': + --edge-filtering $edges.edge_filtering + --sequences '$sequences' + #end if + #if $uds: + --uds '$uds' + #end if + #if $edi: + --edi '$edi' + #end if + #if $old_edi: + --old_edi '$old_edi' + #end if + #if $resistance: + --resistance '$resistance' + #end if + #if $attributes: + --attributes '$attributes' + #end if + #if $filter: + --filter '$filter' + #end if + --json > hivcluster.json + ]]> + </command> + <inputs> + <param argument="--input" type="data" format="csv" multiple="True" label="Input file with inferred links"/> + <conditional name="contamination"> + <param argument="--contaminants" type="select" label="How to treat contamination"> + <option value="ignore" selected="True">Do nothing</option> + <option value="report">Report</option> + <option value="remove">Remove</option> + </param> + <when value="ignore"/> + <when value="report"> + <param name="contaminant_file" argument="--contaminant-file" type="data" format="tabular" label="Input file with contaminant IDs"/> + </when> + <when value="remove"> + <param name="contaminant_file" argument="--contaminant-file" type="data" format="tabular" label="Input file with contaminant IDs"/> + </when> + </conditional> + <conditional name="edges"> + <param name="edge_filtering" argument="--edge-filtering" type="select" label="Compute edge support and mark edges"> + <option value="ignore" selected="True">Do not mark</option> + <option value="report">For display</option> + <option value="remove">For removal</option> + </param> + <when value="ignore"/> + <when value="report"> + <param argument="--sequences" type="data" format="fasta" multiple="True" label="Input multi-sequence alignment"/> + </when> + <when value="remove"> + <param argument="--sequences" type="data" format="fasta" multiple="True" label="Input multi-sequence alignment"/> + </when> + </conditional> + <param argument="--uds" type="data" format="csv" optional="True" label="Input file with UDS data"/> + <param argument="--edi" type="data" format="json" optional="True" label="Input JSON with clinical information"/> + <param argument="--old_edi" type="data" format="csv" optional="True" label="Legacy EDI dates in CSV format"/> + <param argument="--resistance" type="data" format="csv" optional="True" label="Resistance annotation in JSON format"/> + <param argument="--attributes" type="data" format="csv" optional="True" label="Input CSV with node attributes"/> + <param argument="--filter" type="data" format="text" optional="True" label="File with IDs to cluster, one per line"/> + <param argument="--format" type="select" label="Sequence ID format"> + <option value="AEH">AEH</option> + <option value="LANL">LANL</option> + <option value="plain">plain</option> + </param> + <param argument="--cluster" type="boolean" truevalue="--cluster hivcluster.csv" falsevalue="" label="Output a CSV file with cluster assignments for each sequence"/> + <param name="multiple_edges" argument="--multiple-edges" type="boolean" truevalue="--multiple-edges" falsevalue="" label="Permit multiple edges to link the same pair of nodes in the network"/> + <param argument="--singletons" type="boolean" truevalue="--singletons" falsevalue="" label="Include singletons in output"/> + <param argument="--centralities" type="boolean" truevalue="--centralities" falsevalue="" label="Output a CSV file with node centralities"/> + <param argument="--threshold" type="float" value="0.015" label="Only count edges where the distance is less than this threshold"/> + <param argument="--exclude" type="integer" value="1959" label="Only consider sequences isolated after this year"/> + <param argument="--triangles" type="integer" value="65536" label="Maximum number of triangles to consider in each filtering pass"/> + </inputs> + <outputs> + <data name="graph" from_work_dir="hivcluster.json" format="hivtrace"/> + </outputs> + <tests> + <test> + <param name="input" value="hivclustering-in1.csv" ftype="csv"/> + <param name="format" value="plain"/> + <param name="threshold" value="0.8"/> + <param name="json" value="True"/> + <output name="graph" ftype="hivtrace"> + <assert_contents> + <has_text text="Cluster sizes"/> + </assert_contents> + </output> + </test> + <test> + <param name="input" value="hivclustering-in2.csv" ftype="csv"/> + <param name="format" value="plain"/> + <param name="threshold" value="0.8"/> + <param name="json" value="True"/> + <output name="graph" ftype="hivtrace"> + <assert_contents> + <has_text text="Cluster sizes"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +HIVClustering +------------- + +A python library that makes inferences on HIV-1 transmission networks. + ]]></help> + <expand macro="citations"/> +</tool> |
b |
diff -r 000000000000 -r ef842bb94d1f macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 14 07:04:38 2018 -0400 |
b |
@@ -0,0 +1,21 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">python-hivclustering</requirement> + </requirements> + </xml> + <token name="@VERSION@">1.3.1</token> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @UNPUBLISHED{spond, + author = "Sergei Kosakovsky Pond", + title = "HyPhy: Hypothesis Testing using Phylogenies", + year = "2000", + note = "http://hyphy.org/", + url = "http://hyphy.org/"} + </citation> + </citations> + </xml> +</macros> |
b |
diff -r 000000000000 -r ef842bb94d1f test-data/hivclustering-in1.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hivclustering-in1.csv Thu Jun 14 07:04:38 2018 -0400 |
b |
@@ -0,0 +1,8 @@ +ID1,ID2,Distance +B_US_86_JRFL_ACC_U63632,B_US_90_WEAU160_ACC_U21135,0.0408994 +B_FR_83_HXB2_ACC_K03455_5,B_US_83_RF_ACC_M17451,0.045156 +D_CD_83_NDK_ACC_M27323,D_CD_84_84ZR085_ACC_U88822,0.0491974 +B_US_83_RF_ACC_M17451,B_US_86_JRFL_ACC_U63632,0.048328 +B_FR_83_HXB2_ACC_K03455_5,B_US_86_JRFL_ACC_U63632,0.0296218 +D_CD_83_ELI_ACC_K03454_7,D_CD_83_NDK_ACC_M27323,0.0287246 +B_FR_83_HXB2_ACC_K03455_5,B_US_90_WEAU160_ACC_U21135,0.0327566 |
b |
diff -r 000000000000 -r ef842bb94d1f test-data/hivclustering-in2.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hivclustering-in2.csv Thu Jun 14 07:04:38 2018 -0400 |
b |
@@ -0,0 +1,29 @@ +ID1,ID2,Distance +B_US_86_JRFL_ACC_U63632,B_US_90_WEAU160_ACC_U21135,0.0408994 +B_US_90_WEAU160_ACC_U21135,D_CD_83_ELI_ACC_K03454_7,0.0771856 +B_US_86_JRFL_ACC_U63632,D_CD_83_ELI_ACC_K03454_7,0.0771797 +B_FR_83_HXB2_ACC_K03455_5,B_US_83_RF_ACC_M17451,0.045156 +B_US_83_RF_ACC_M17451,B_US_86_JRFL_ACC_U63632,0.048328 +B_US_90_WEAU160_ACC_U21135,D_CD_83_NDK_ACC_M27323,0.0609097 +B_FR_83_HXB2_ACC_K03455_5,B_US_86_JRFL_ACC_U63632,0.0296218 +B_US_86_JRFL_ACC_U63632,D_CD_83_NDK_ACC_M27323,0.0609044 +B_US_83_RF_ACC_M17451,B_US_90_WEAU160_ACC_U21135,0.0515908 +B_US_90_WEAU160_ACC_U21135,D_CD_84_84ZR085_ACC_U88822,0.0740203 +B_FR_83_HXB2_ACC_K03455_5,B_US_90_WEAU160_ACC_U21135,0.0327566 +B_US_86_JRFL_ACC_U63632,D_CD_84_84ZR085_ACC_U88822,0.0705011 +B_US_83_RF_ACC_M17451,D_CD_83_ELI_ACC_K03454_7,0.0810759 +B_US_90_WEAU160_ACC_U21135,D_UG_94_94UG114_ACC_U88824,0.0890019 +B_FR_83_HXB2_ACC_K03455_5,D_CD_83_ELI_ACC_K03454_7,0.0669206 +B_US_83_RF_ACC_M17451,D_CD_83_NDK_ACC_M27323,0.0661066 +B_US_86_JRFL_ACC_U63632,D_UG_94_94UG114_ACC_U88824,0.0882054 +D_CD_83_ELI_ACC_K03454_7,D_CD_83_NDK_ACC_M27323,0.0287246 +B_FR_83_HXB2_ACC_K03455_5,D_CD_83_NDK_ACC_M27323,0.0592586 +B_US_83_RF_ACC_M17451,D_CD_84_84ZR085_ACC_U88822,0.0769146 +D_CD_83_NDK_ACC_M27323,D_CD_84_84ZR085_ACC_U88822,0.0491974 +D_CD_83_ELI_ACC_K03454_7,D_CD_84_84ZR085_ACC_U88822,0.055948 +B_FR_83_HXB2_ACC_K03455_5,D_CD_84_84ZR085_ACC_U88822,0.0663619 +B_US_83_RF_ACC_M17451,D_UG_94_94UG114_ACC_U88824,0.0955213 +D_CD_83_NDK_ACC_M27323,D_UG_94_94UG114_ACC_U88824,0.0726626 +B_FR_83_HXB2_ACC_K03455_5,D_UG_94_94UG114_ACC_U88824,0.0847988 +D_CD_83_ELI_ACC_K03454_7,D_UG_94_94UG114_ACC_U88824,0.0742033 +D_CD_84_84ZR085_ACC_U88822,D_UG_94_94UG114_ACC_U88824,0.0805088 |
b |
diff -r 000000000000 -r ef842bb94d1f test-data/hivclustering-out1.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hivclustering-out1.json Thu Jun 14 07:04:38 2018 -0400 |
[ |
@@ -0,0 +1,206 @@ +{ + "Cluster sizes": [ + 4, + 3 + ], + "Degrees": { + "Distribution": [ + 2, + 3, + 2 + ], + "Model": "Waring", + "fitted": [ + 0.5000721527871422, + 0.24996196530120798, + 0.12496295268484243 + ], + "rho": 3287.8967604237114, + "rho CI": [ + 698.4419473417928, + 10000.0 + ] + }, + "Directed Edges": { + "Count": 0, + "Reasons for unresolved directions": { + "Missing dates": 7 + } + }, + "Edges": [ + { + "attributes": [ + "BULK" + ], + "directed": false, + "length": 0.045156, + "removed": false, + "sequences": [ + "B_FR_83_HXB2_ACC_K03455_5", + "B_US_83_RF_ACC_M17451" + ], + "source": 2, + "support": 0.0, + "target": 3 + }, + { + "attributes": [ + "BULK" + ], + "directed": false, + "length": 0.0296218, + "removed": false, + "sequences": [ + "B_FR_83_HXB2_ACC_K03455_5", + "B_US_86_JRFL_ACC_U63632" + ], + "source": 2, + "support": 0.0, + "target": 0 + }, + { + "attributes": [ + "BULK" + ], + "directed": false, + "length": 0.0327566, + "removed": false, + "sequences": [ + "B_FR_83_HXB2_ACC_K03455_5", + "B_US_90_WEAU160_ACC_U21135" + ], + "source": 2, + "support": 0.0, + "target": 1 + }, + { + "attributes": [ + "BULK" + ], + "directed": false, + "length": 0.048328, + "removed": false, + "sequences": [ + "B_US_83_RF_ACC_M17451", + "B_US_86_JRFL_ACC_U63632" + ], + "source": 3, + "support": 0.0, + "target": 0 + }, + { + "attributes": [ + "BULK" + ], + "directed": false, + "length": 0.0408994, + "removed": false, + "sequences": [ + "B_US_86_JRFL_ACC_U63632", + "B_US_90_WEAU160_ACC_U21135" + ], + "source": 0, + "support": 0.0, + "target": 1 + }, + { + "attributes": [ + "BULK" + ], + "directed": false, + "length": 0.0287246, + "removed": false, + "sequences": [ + "D_CD_83_ELI_ACC_K03454_7", + "D_CD_83_NDK_ACC_M27323" + ], + "source": 6, + "support": 0.0, + "target": 4 + }, + { + "attributes": [ + "BULK" + ], + "directed": false, + "length": 0.0491974, + "removed": false, + "sequences": [ + "D_CD_83_NDK_ACC_M27323", + "D_CD_84_84ZR085_ACC_U88822" + ], + "source": 4, + "support": 0.0, + "target": 5 + } + ], + "HIV Stages": { + "Unknown": 7 + }, + "Multiple sequences": { + "Followup, days": null, + "Subjects with": 0 + }, + "Network Summary": { + "Clusters": 2, + "Edges": 7, + "Nodes": 7, + "Sequences used to make links": 7 + }, + "Nodes": [ + { + "attributes": [], + "baseline": null, + "cluster": 1, + "edi": null, + "id": "B_FR_83_HXB2_ACC_K03455_5" + }, + { + "attributes": [], + "baseline": null, + "cluster": 1, + "edi": null, + "id": "B_US_83_RF_ACC_M17451" + }, + { + "attributes": [], + "baseline": null, + "cluster": 1, + "edi": null, + "id": "B_US_86_JRFL_ACC_U63632" + }, + { + "attributes": [], + "baseline": null, + "cluster": 1, + "edi": null, + "id": "B_US_90_WEAU160_ACC_U21135" + }, + { + "attributes": [], + "baseline": null, + "cluster": 2, + "edi": null, + "id": "D_CD_83_ELI_ACC_K03454_7" + }, + { + "attributes": [], + "baseline": null, + "cluster": 2, + "edi": null, + "id": "D_CD_83_NDK_ACC_M27323" + }, + { + "attributes": [], + "baseline": null, + "cluster": 2, + "edi": null, + "id": "D_CD_84_84ZR085_ACC_U88822" + } + ], + "Settings": { + "contaminants": null, + "edge-filtering": null, + "threshold": 0.8 + } +} |
b |
diff -r 000000000000 -r ef842bb94d1f test-data/hivclustering-out2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hivclustering-out2.json Thu Jun 14 07:04:38 2018 -0400 |
[ |
b'@@ -0,0 +1,527 @@\n+{\n+ "Cluster sizes": [\n+ 8\n+ ],\n+ "Degrees": {\n+ "Distribution": [\n+ 0,\n+ 0,\n+ 0,\n+ 0,\n+ 0,\n+ 0,\n+ 8\n+ ],\n+ "Model": "Negative Binomial",\n+ "fitted": null,\n+ "rho": 0.0,\n+ "rho CI": [\n+ 0.0,\n+ 0.0\n+ ]\n+ },\n+ "Directed Edges": {\n+ "Count": 0,\n+ "Reasons for unresolved directions": {\n+ "Missing dates": 28\n+ }\n+ },\n+ "Edges": [\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.045156,\n+ "removed": false,\n+ "sequences": [\n+ "B_FR_83_HXB2_ACC_K03455_5",\n+ "B_US_83_RF_ACC_M17451"\n+ ],\n+ "source": 3,\n+ "support": 0.0,\n+ "target": 4\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0296218,\n+ "removed": false,\n+ "sequences": [\n+ "B_FR_83_HXB2_ACC_K03455_5",\n+ "B_US_86_JRFL_ACC_U63632"\n+ ],\n+ "source": 3,\n+ "support": 0.0,\n+ "target": 0\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0327566,\n+ "removed": false,\n+ "sequences": [\n+ "B_FR_83_HXB2_ACC_K03455_5",\n+ "B_US_90_WEAU160_ACC_U21135"\n+ ],\n+ "source": 3,\n+ "support": 0.0,\n+ "target": 1\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0669206,\n+ "removed": false,\n+ "sequences": [\n+ "B_FR_83_HXB2_ACC_K03455_5",\n+ "D_CD_83_ELI_ACC_K03454_7"\n+ ],\n+ "source": 3,\n+ "support": 0.0,\n+ "target": 2\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0592586,\n+ "removed": false,\n+ "sequences": [\n+ "B_FR_83_HXB2_ACC_K03455_5",\n+ "D_CD_83_NDK_ACC_M27323"\n+ ],\n+ "source": 3,\n+ "support": 0.0,\n+ "target": 5\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0663619,\n+ "removed": false,\n+ "sequences": [\n+ "B_FR_83_HXB2_ACC_K03455_5",\n+ "D_CD_84_84ZR085_ACC_U88822"\n+ ],\n+ "source": 3,\n+ "support": 0.0,\n+ "target": 6\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0847988,\n+ "removed": false,\n+ "sequences": [\n+ "B_FR_83_HXB2_ACC_K03455_5",\n+ "D_UG_94_94UG114_ACC_U88824"\n+ ],\n+ "source": 3,\n+ "support": 0.0,\n+ "target": 7\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.048328,\n+ "removed": false,\n+ "sequences": [\n+ "B_US_83_RF_ACC_M17451",\n+ "B_US_86_JRFL_ACC_U63632"\n+ ],\n+ "source": 4,\n+ "support": 0.0,\n+ "target": 0\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0515908,\n+ "removed": false,\n+ "sequences": [\n+ "B_US_'..b'\n+ "support": 0.0,\n+ "target": 5\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.055948,\n+ "removed": false,\n+ "sequences": [\n+ "D_CD_83_ELI_ACC_K03454_7",\n+ "D_CD_84_84ZR085_ACC_U88822"\n+ ],\n+ "source": 2,\n+ "support": 0.0,\n+ "target": 6\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0742033,\n+ "removed": false,\n+ "sequences": [\n+ "D_CD_83_ELI_ACC_K03454_7",\n+ "D_UG_94_94UG114_ACC_U88824"\n+ ],\n+ "source": 2,\n+ "support": 0.0,\n+ "target": 7\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0491974,\n+ "removed": false,\n+ "sequences": [\n+ "D_CD_83_NDK_ACC_M27323",\n+ "D_CD_84_84ZR085_ACC_U88822"\n+ ],\n+ "source": 5,\n+ "support": 0.0,\n+ "target": 6\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0726626,\n+ "removed": false,\n+ "sequences": [\n+ "D_CD_83_NDK_ACC_M27323",\n+ "D_UG_94_94UG114_ACC_U88824"\n+ ],\n+ "source": 5,\n+ "support": 0.0,\n+ "target": 7\n+ },\n+ {\n+ "attributes": [\n+ "BULK"\n+ ],\n+ "directed": false,\n+ "length": 0.0805088,\n+ "removed": false,\n+ "sequences": [\n+ "D_CD_84_84ZR085_ACC_U88822",\n+ "D_UG_94_94UG114_ACC_U88824"\n+ ],\n+ "source": 6,\n+ "support": 0.0,\n+ "target": 7\n+ }\n+ ],\n+ "HIV Stages": {\n+ "Unknown": 8\n+ },\n+ "Multiple sequences": {\n+ "Followup, days": null,\n+ "Subjects with": 0\n+ },\n+ "Network Summary": {\n+ "Clusters": 1,\n+ "Edges": 28,\n+ "Nodes": 8,\n+ "Sequences used to make links": 8\n+ },\n+ "Nodes": [\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "B_FR_83_HXB2_ACC_K03455_5"\n+ },\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "B_US_83_RF_ACC_M17451"\n+ },\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "B_US_86_JRFL_ACC_U63632"\n+ },\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "B_US_90_WEAU160_ACC_U21135"\n+ },\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "D_CD_83_ELI_ACC_K03454_7"\n+ },\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "D_CD_83_NDK_ACC_M27323"\n+ },\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "D_CD_84_84ZR085_ACC_U88822"\n+ },\n+ {\n+ "attributes": [],\n+ "baseline": null,\n+ "cluster": 1,\n+ "edi": null,\n+ "id": "D_UG_94_94UG114_ACC_U88824"\n+ }\n+ ],\n+ "Settings": {\n+ "contaminants": null,\n+ "edge-filtering": null,\n+ "threshold": 0.8\n+ }\n+}\n' |