Mercurial > repos > rnateam > graphclust_postprocessing_no_align
comparison glob_report_no_align.xml @ 0:0a48b2db75e7 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResultsNoAlign commit 2a6fd70c1bcec36ffdf0bba2ec82489b39cfc84e
| author | rnateam |
|---|---|
| date | Sat, 27 Oct 2018 13:49:00 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0a48b2db75e7 |
|---|---|
| 1 <tool id="graphclust_glob_report_no_align" name="Graphclust glob_report collect clusters" version="0.5" > | |
| 2 <requirements> | |
| 3 <requirement type="package" version="0.6.0">graphclust-wrappers</requirement> | |
| 4 <requirement type="package" version='0.5'>perl-array-utils</requirement> | |
| 5 <requirement type="package" version='0.18.1'>scikit-learn</requirement> | |
| 6 <requirement type="package" version='1.8.10'>locarna</requirement> | |
| 7 <requirement type="package" version='2.1'>rnaz</requirement> | |
| 8 <requirement type="package" version="1.1.2">infernal</requirement> | |
| 9 <requirement type="package" version='2.2.10'>viennarna</requirement> | |
| 10 <requirement type="package" version='1.3.30'>graphicsmagick</requirement> | |
| 11 <requirement type="package" version='0.6.1'>rscape</requirement> | |
| 12 <requirement type="package" version='6.0'>unzip</requirement> | |
| 13 </requirements> | |
| 14 <command detect_errors="exit_code"> | |
| 15 <![CDATA[ | |
| 16 unzip $FASTA &> /dev/null && | |
| 17 mkdir ./CMSEARCH && | |
| 18 mkdir ./MODEL && | |
| 19 #import re | |
| 20 #for $cms_res in $cmsearch_results: | |
| 21 #set $safename_cm = re.sub('[^\w\-_\.]', '_', $cms_res.element_identifier) | |
| 22 ln -f -s '$cms_res' ./CMSEARCH/$safename_cm && | |
| 23 #end for | |
| 24 #for $mods in $model_tree_files: | |
| 25 #set $safename_tr = re.sub('[^\w\-_\.]', '_', $mods.element_identifier) | |
| 26 ln -f -s '$mods' ./MODEL/$safename_tr && | |
| 27 #end for | |
| 28 | |
| 29 'glob_res.pl' | |
| 30 $merge_cluster_ol | |
| 31 $merge_overlap | |
| 32 $min_cluster_size | |
| 33 $cm_min_bitscore | |
| 34 $cm_max_eval | |
| 35 1 ## cm_bitscore_sig | |
| 36 $partition_type '' | |
| 37 $cut_type | |
| 38 0 ## zero means do not align | |
| 39 #if $iteration_num.iteration_num_selector: | |
| 40 $iteration_num.CI | |
| 41 $final_partition_soft | |
| 42 $final_partition_used_cmsearch | |
| 43 '$combined_cm' | |
| 44 | |
| 45 #end if | |
| 46 | |
| 47 && | |
| 48 python '$__tool_directory__/evaluation.py' FASTA/ RESULTS/ | |
| 49 | |
| 50 #if $cdhit: | |
| 51 && | |
| 52 python '$__tool_directory__/addCdhitseqs.py' '$cdhit' | |
| 53 #end if | |
| 54 ]]> | |
| 55 </command> | |
| 56 <inputs> | |
| 57 <param type="data" name="FASTA" format="zip" help="FASTA.zip from pre-processing step"/> | |
| 58 <param type="data" name="cmsearch_results" format="tabular" multiple="True" | |
| 59 help="Tabular cmsearch results of the candidate clusters from the cmsearch step"/> | |
| 60 <param type="data" name="model_tree_files" format="txt" multiple="True" label="model-tree-stk" | |
| 61 help="model.tree.stk files from pgma_graphclust candidate clustering step"/> | |
| 62 <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition" | |
| 63 help="Whether to do hard partitioning (no overlap) or soft mode(cluster elements may overlap) "/> | |
| 64 <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/> | |
| 65 <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/> | |
| 66 <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_E-val" help=""/> | |
| 67 <param type="data" name="cdhit" format="txt" optional="true" label="CD-HIT output" | |
| 68 help="Optional CD-HIT pre-clustering output to be combined into the final clustering output"/> | |
| 69 <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ovelap" | |
| 70 help="Overlapping ratio criteria to merge overlapping clusters or keep separate clusters (soft partitioning)"/> | |
| 71 <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_fraction_overlap" | |
| 72 help="Overlapping ratio criteria to merge overlapping sequence fractions from same input sequence"/> | |
| 73 <param name="min_cluster_size" type="integer" value="3" size="5" label="minimum cluster size" | |
| 74 help="Minimum number of elements that can form a cluster. Higher values discard small clusters and may produce larger merged clusters"/> | |
| 75 <!-- <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/> --> | |
| 76 | |
| 77 <conditional name="iteration_num"> | |
| 78 <param name="iteration_num_selector" type="boolean" checked="no" label="Multiple iterations" help="for single iteration- NO, for multiple-YES"/> | |
| 79 <when value="true"> | |
| 80 <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/> | |
| 81 <param type="data" name="final_partition_soft" format="txt" /> | |
| 82 <param type="data" name="final_partition_used_cmsearch" format="txt" /> | |
| 83 <param type="data" name="combined_cm" format="txt" /> | |
| 84 </when> | |
| 85 <when value="false" ></when> | |
| 86 </conditional> | |
| 87 | |
| 88 </inputs> | |
| 89 <outputs> | |
| 90 <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats" /> | |
| 91 <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval" /> | |
| 92 <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part" /> | |
| 93 <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch" /> | |
| 94 <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters" /> | |
| 95 <data name="combined_cm_out" format="txt" from_work_dir="combined_cm_out" label="combined_cmsearch_output" /> | |
| 96 <collection name="clusters" type="list" label="CLUSTERS-cmsearch"> | |
| 97 <discover_datasets format="txt" pattern="(?P<name>^.*\.all$)" directory="RESULTS" /> | |
| 98 </collection> | |
| 99 <collection name="allFastaSorted" type="list" label="cluster-sequences-sorted"> | |
| 100 <discover_datasets format="fasta" pattern="(?P<name>^.*\.sorted.fa$)" directory="RESULTS" /> | |
| 101 </collection> | |
| 102 | |
| 103 <collection name="partitions" type="list" label="Partitions"> | |
| 104 <discover_datasets pattern="(?P<name>^.*$)" directory="RESULTS/partitions" /> | |
| 105 </collection> | |
| 106 <data name="RESULTS_zip" format="zip" from_work_dir="RESULTS.zip" label="RESULTS.zip" /> | |
| 107 </outputs> | |
| 108 <tests> | |
| 109 <test> | |
| 110 <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/> | |
| 111 <param name="cmsearch_results" value="1.1.tree,1.2.tree"/> | |
| 112 <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/> | |
| 113 <param name="partition_type" value="0"/> | |
| 114 <param name="cut_type" value="0"/> | |
| 115 <conditional name="iteration_num"> | |
| 116 <param name="iteration_num_selector" value="false"/> | |
| 117 </conditional> | |
| 118 <param name="merge_cluster_ol" value="0.66"/> | |
| 119 <param name="merge_overlap" value="0.51"/> | |
| 120 <param name="min_cluster_size" value="3"/> | |
| 121 <param name="cm_min_bitscore" value="20"/> | |
| 122 <param name="cm_max_eval" value="0.001"/> | |
| 123 <!-- <param name="cm_bitscore_sig" value="0"/> --> | |
| 124 <output name="final_stats" file="RESULTS/cluster.final.stats" /> | |
| 125 <output name="combined_cm_out" file="combined_cm_out"/> | |
| 126 <output name="evaluation" file="evaluation1.txt"/> | |
| 127 <output_collection name="clusters" type="list"> | |
| 128 <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/> | |
| 129 <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/> | |
| 130 | |
| 131 </output_collection> | |
| 132 <output_collection name="partitions"> | |
| 133 <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains"> | |
| 134 <assert_contents> | |
| 135 <has_text text="1.1 1.1 " /> | |
| 136 <has_text text="1.2 1.2" /> | |
| 137 </assert_contents> | |
| 138 </element> | |
| 139 <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains"> | |
| 140 <assert_contents> | |
| 141 <has_text text="MODEL CLASS 0 0" /> | |
| 142 <!--has_text text="1.2" /> | |
| 143 <has_text text="1.1" /--> | |
| 144 </assert_contents> | |
| 145 </element> | |
| 146 <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" /> | |
| 147 <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" /> | |
| 148 <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" /> | |
| 149 <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/> | |
| 150 </output_collection> | |
| 151 | |
| 152 <output name="RESULTS_zip" file="RESULTS.zip" ftype="zip" compare="sim_size" delta="20000"/> | |
| 153 | |
| 154 </test> | |
| 155 </tests> | |
| 156 <help> | |
| 157 <![CDATA[ | |
| 158 | |
| 159 **What it does** | |
| 160 | |
| 161 Post-processing. Redundant clusters are merged and instances that belong to multiple clusters | |
| 162 are assigned unambiguously. For every pair of clusters, the relative overlap (i.e. the fraction of | |
| 163 instances that occur in both clusters) is computed and clusters are merged if the overlap exceeds 50%. | |
| 164 Cluster members are finally ranked by their CM bitscore. | |
| 165 | |
| 166 ]]> | |
| 167 </help> | |
| 168 <citations> | |
| 169 <citation type="doi">10.5281/zenodo.597695</citation> | |
| 170 </citations> | |
| 171 </tool> |
