Mercurial > repos > vijay > pancancer_classifier
comparison pancancer_classifier.xml @ 0:20b049f5b3f5 draft default tip
"planemo upload for repository http://github.com/nvk747/papaa/galaxy/ commit 954b283ef7f82f59f55476a4b3a230d655187ac1"
author | vijay |
---|---|
date | Wed, 16 Dec 2020 23:32:43 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:20b049f5b3f5 |
---|---|
1 <tool id="pancancer_classifier" name="PAPAA: PanCancer classifier" version="@VERSION@" python_template_version="3.6"> | |
2 <description>classifier for pathway aberrant activity</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <expand macro="stdio"/> | |
8 <version_command> | |
9 <![CDATA['papaa_pancancer_classifier.py' --version]]></version_command> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 papaa_pancancer_classifier.py | |
12 #if $folds and $folds is not None: | |
13 --folds '$folds' | |
14 #end if | |
15 #if $seed and $seed is not None: | |
16 --seed '$seed' | |
17 #end if | |
18 $drop | |
19 #if $filter_count and $filter_count is not None: | |
20 --filter_count '$filter_count' | |
21 #end if | |
22 #if $filter_prop and $filter_prop is not None: | |
23 --filter_prop '$filter_prop' | |
24 #end if | |
25 #if $num_features and $num_features is not None: | |
26 --num_features '$num_features' | |
27 #end if | |
28 @INPUTS_ALPHAS@ | |
29 @INPUTS_L1_RATIOS@ | |
30 #if $alt_genes and str($alt_genes) != '': | |
31 --alt_genes '$alt_genes' | |
32 #end if | |
33 #if $alt_diseases and str($alt_diseases) != '': | |
34 --alt_diseases '$alt_diseases' | |
35 #end if | |
36 #if $alt_filter_count and $alt_filter_count is not None: | |
37 --alt_filter_count '$alt_filter_count' | |
38 #end if | |
39 #if $alt_filter_prop and $alt_filter_prop is not None: | |
40 --alt_filter_prop '$alt_filter_prop' | |
41 #end if | |
42 --classifier_results 'classifier' | |
43 @INPUT_REMOVE_HYPER@ | |
44 $keep_intermediate | |
45 @INPUTS_BASIC@ | |
46 @INPUTS_GENES_DISEASES@ | |
47 $drop | |
48 $shuffled | |
49 $shuffled_before_training | |
50 $no_mutation | |
51 #if str($drop_x_genes) | |
52 --drop_x_genes '$drop_x_genes' | |
53 #end if | |
54 $drop_expression | |
55 $drop_covariates | |
56 @INPUTS_COPY_NUMBER_CLASS_FILE_CONDITIONAL@ | |
57 > '${log}' | |
58 ]]> | |
59 </command> | |
60 <inputs> | |
61 <expand macro="inputs_basic" /> | |
62 <expand macro="inputs_genes_diseases" /> | |
63 <param argument="--seed" label="option to set seed" name="seed" optional="true" type="integer" value="1234"/> | |
64 <param argument="--folds" label="Number of cross validation folds to perform" name="folds" optional="true" type="integer" value="5"/> | |
65 <param argument="--drop" checked="false" label="Decision to drop input genes from X matrix" name="drop" type="boolean" truevalue="--drop" falsevalue=""/> | |
66 <expand macro="inputs_copy_number_class_file_conditional" /> | |
67 <param argument="--filter_count" label="Min number of mutations in diseases to include" name="filter_count" optional="true" type="integer" value="15"/> | |
68 <param argument="--filter_prop" label="Min proportion of positives to include disease" name="filter_prop" optional="true" type="float" value="0.05"/> | |
69 <param argument="--num_features" label="Number of MAD genes to include in classifier" name="num_features" optional="true" type="integer" value="8000"/> | |
70 <expand macro="input_alphas" /> | |
71 <expand macro="input_l1_ratios" /> | |
72 <param argument="--alt_genes" label="alternative genes to test performance" name="alt_genes" optional="true" type="text" /> | |
73 <param argument="--alt_diseases" label="The alternative diseases to test performance" name="alt_diseases" optional="true" type="text" /> | |
74 <param argument="--alt_filter_count" label="Min number of mutations in disease to include in alternate" name="alt_filter_count" optional="true" type="integer" value="15"/> | |
75 <param argument="--alt_filter_prop" label="Min proportion of positives to include disease in alternate" name="alt_filter_prop" optional="true" type="float" value="0.05"/> | |
76 <expand macro="input_remove_hyper" /> | |
77 <param argument="--keep_intermediate" checked="false" label="Keep intermediate ROC values for plotting" name="keep_intermediate" type="boolean" truevalue="--keep_intermediate" falsevalue=""/> | |
78 <param argument="--shuffled" checked="false" label="Shuffle the input gene exprs matrix alongside" name="shuffled" type="boolean" truevalue="--shuffled" falsevalue=""/> | |
79 <param argument="--shuffled_before_training" checked="false" label="Shuffle the gene exprs matrix before training" name="shuffled_before_training" type="boolean" truevalue="--shuffled_before_training" falsevalue=""/> | |
80 <param argument="--no_mutation" checked="false" label="Remove mutation data from y matrix" name="no_mutation" type="boolean" truevalue="--no_mutation" falsevalue=""/> | |
81 <param argument="--drop_x_genes" label="Comma separated list of genes to be dropped from X matrix" name="drop_x_genes" optional="true" type="text" value=""/> | |
82 <param argument="--drop_expression" checked="false" label="Decision to drop gene expression values from X" name="drop_expression" type="boolean" truevalue="--drop_expression" falsevalue=""/> | |
83 <param argument="--drop_covariates" checked="false" label="Decision to drop covariate information from X" name="drop_covariates" type="boolean" truevalue="--drop_covariates" falsevalue=""/> | |
84 </inputs> | |
85 <outputs> | |
86 <data format="txt" name="log" label="${tool.name} on ${on_string} (Log)" /> | |
87 <data format="tabular" name="alt_gene_alt_disease_summary" label="${tool.name} on ${on_string} (alt_gene_alt_disease_summary.tsv)" from_work_dir="classifier/alt_gene_alt_disease_summary.tsv"/> | |
88 <data format="csv" name="alt_summary_counts" label="${tool.name} on ${on_string} (alt_summary_counts.csv" from_work_dir="classifier/alt_summary_counts.csv"/> | |
89 <data format="tabular" name="classifier_coefficients" label="${tool.name} on ${on_string} (classifier_coefficients.tsv)" from_work_dir="classifier/classifier_coefficients.tsv"/> | |
90 <data format="txt" name="classifier_summary" label="${tool.name} on ${on_string} (classifier_summary.txt)" from_work_dir="classifier/classifier_summary.txt"/> | |
91 <data format="tabular" name="pancan_roc_results" label="${tool.name} on ${on_string} (pancan_roc_results.tsv)" from_work_dir="classifier/pancan_roc_results.tsv"/> | |
92 <data format="csv" name="summary_counts" label="${tool.name} on ${on_string} (summary_counts.csv)" from_work_dir="classifier/summary_counts.csv"/> | |
93 <data format="pdf" name="cv_heatmap" label="${tool.name} on ${on_string} (cv_heatmap.pdf)" from_work_dir="classifier/cv_heatmap.pdf"/> | |
94 <collection name="disease_figures" type="list:list" label="Disease classifier figures"> | |
95 <discover_datasets pattern="classifier_(?P<identifier_1>.+)__pred_(?P<identifier_0>.+)\.pdf" format="pdf" directory="classifier/disease" visible="false" /> | |
96 </collection> | |
97 <data format="pdf" name="all_disease_pr" label="${tool.name} on ${on_string} (all_disease_pr.pdf)" from_work_dir="classifier/all_disease_pr.pdf"/> | |
98 <data format="pdf" name="all_disease_roc" label="${tool.name} on ${on_string} (all_disease_roc.pdf)" from_work_dir="classifier/all_disease_roc.pdf"/> | |
99 <data format="pdf" name="alt_gene_alt_disease_aupr_bar" label="${tool.name} on ${on_string} (alt_gene_alt_disease_aupr_bar.pdf)" from_work_dir="classifier/alt_gene_alt_disease_aupr_bar.pdf"/> | |
100 <data format="pdf" name="alt_gene_alt_disease_auroc_bar" label="${tool.name} on ${on_string} (alt_gene_alt_disease_auroc_bar.pdf)" from_work_dir="classifier/alt_gene_alt_disease_auroc_bar.pdf"/> | |
101 <data format="pdf" name="disease_aupr" label="${tool.name} on ${on_string} (disease_aupr.pdf)" from_work_dir="classifier/disease_aupr.pdf"/> | |
102 <data format="pdf" name="disease_auroc" label="${tool.name} on ${on_string} (disease_auroc.pdf)" from_work_dir="classifier/disease_auroc.pdf"/> | |
103 </outputs> | |
104 <tests> | |
105 <test> | |
106 <param name="genes" value="ERBB2,PIK3CA,KRAS,AKT1"/> | |
107 <param name="diseases" value="GBM"/> | |
108 <param name="x_matrix" value="pancan_rnaseq_freeze_t1p.tsv.gz" ftype="tabular"/> | |
109 <param name="filename_mut" value="pancan_mutation_freeze_t1p.tsv.gz" ftype="tabular"/> | |
110 <param name="filename_mut_burden" value="mutation_burden_freeze.tsv" ftype="tabular"/> | |
111 <param name="filename_sample" value="sample_freeze.tsv" ftype="tabular"/> | |
112 <param name="seed" value="1234"/> | |
113 <param name="folds" value="5"/> | |
114 <param name="drop" value="true"/> | |
115 <param name="copy_number" value="true"/> | |
116 <param name="filename_copy_loss" value="copy_number_loss_status_t10p.tsv.gz" ftype="tabular"/> | |
117 <param name="filename_copy_gain" value="copy_number_gain_status_t10p.tsv.gz" ftype="tabular"/> | |
118 <param name="filename_cancer_gene_classification" value="cosmic_cancer_classification.tsv" ftype="tabular"/> | |
119 <param name="filter_count" value="15"/> | |
120 <param name="filter_prop" value="0.05"/> | |
121 <param name="num_features" value="8000"/> | |
122 <param name="alphas" value="0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7"/> | |
123 <param name="l1_ratios" value="0.1,0.125,0.15,0.2,0.25,0.3,0.35"/> | |
124 <param name="alt_genes" value="PTEN,PIK3R1,STK11"/> | |
125 <param name="alt_diseases" value="GBM"/> | |
126 <param name="alt_filter_count" value="15"/> | |
127 <param name="alt_filter_prop" value="0.05"/> | |
128 <param name="remove_hyper" value="true"/> | |
129 <param name="keep_intermediate" value="true"/> | |
130 <param name="shuffled" value="true"/> | |
131 <param name="shuffled_before_training" value="false"/> | |
132 <param name="no_mutation" value="false"/> | |
133 <param name="drop_expression" value="false"/> | |
134 <param name="drop_covariates" value="false"/> | |
135 <output name="log" file="Log.txt"/> | |
136 <output name="alt_gene_alt_disease_summary" file="alt_gene_alt_disease_summary.tsv"/> | |
137 <output name="alt_summary_counts" file="alt_summary_counts.csv"/> | |
138 <output name="classifier_coefficients" file="classifier_coefficients.tsv"/> | |
139 <output name="classifier_summary" file="classifier_summary.txt"/> | |
140 <output name="pancan_roc_results"> | |
141 <assert_contents> | |
142 <has_line line="	fpr	tpr	threshold	train_type	disease" /> | |
143 <has_n_columns n="6" /> | |
144 <has_n_lines n="253" /> | |
145 </assert_contents> | |
146 </output> | |
147 <output name="summary_counts" file="summary_counts.csv"/> | |
148 <output name="cv_heatmap" file="cv_heatmap.pdf" compare="sim_size" delta="50"/> | |
149 <output name="all_disease_pr" file="all_disease_pr.pdf" compare="sim_size" delta="50" /> | |
150 <output name="all_disease_roc" file="all_disease_roc.pdf" compare="sim_size" delta="50"/> | |
151 <output name="alt_gene_alt_disease_aupr_bar" file="alt_gene_alt_disease_aupr_bar.pdf" compare="sim_size" delta="50"/> | |
152 <output name="alt_gene_alt_disease_auroc_bar" file="alt_gene_alt_disease_auroc_bar.pdf" compare="sim_size" delta="50"/> | |
153 <output name="disease_aupr" file="disease_aupr.pdf" compare="sim_size" delta="50"/> | |
154 <output name="disease_auroc" file="disease_auroc.pdf" compare="sim_size" delta="50"/> | |
155 </test> | |
156 </tests> | |
157 <help><![CDATA[ | |
158 | |
159 **Pancancer_Aberrant_Pathway_Activity_Analysis scripts/papaa_pancancer_classifier.py:** | |
160 | |
161 **Inputs:** | |
162 --genes comma separated string of HUGO symbols for target genes or targenes_list.csv file | |
163 --diseases comma separated string of disease types/TCGA acronyms for classifier | |
164 default: Auto (will pick diseases from filter args) | |
165 --folds number of cross validation folds | |
166 default: 5 | |
167 --seed value specifies the initial value of the random number seed | |
168 default: 1234 | |
169 --drop drop the input genes from the X matrix | |
170 default: False if flag omitted | |
171 --copy_number optional flag to supplement copy number to define Y | |
172 default: False if flag omitted | |
173 --filter_count int of low count of mutation to include disease | |
174 default: 15 | |
175 --filter_prop float of low proportion of mutated samples per disease | |
176 default: 0.05 | |
177 --num_features int of number of genes to include in classifier | |
178 default: 8000 | |
179 --alphas comma separated string of alphas to test in pipeline default: '0.1,0.15,0.2,0.5,0.8,1' | |
180 --l1_ratios comma separated string of l1 parameters to test | |
181 default: '0,0.1,0.15,0.18,0.2,0.3' | |
182 --alt_genes comma separated string of alternative genes to test | |
183 default: None | |
184 --alt_diseases comma separated string of alternative diseases to test | |
185 default: Auto | |
186 --alt_filter_count int of low count of mutations to include alt_diseases | |
187 default: 15 | |
188 --alt_filter_prop float of low proportion of mutated samples alt_disease | |
189 default: 0.05 | |
190 --classifier_results string of the location to save the classifier results/figures | |
191 default: Auto | |
192 --remove_hyper store_true: remove hypermutated samples | |
193 default: False if flag omitted | |
194 --keep_intermediate store_true: keep intermediate roc curve items | |
195 default: False if flag omitted | |
196 --x_matrix string of which feature matrix to use | |
197 default: raw | |
198 --remove_hyper store_true: remove hypermutated samples | |
199 default: False if flag omitted | |
200 --keep_intermediate store_true: keep intermediate roc curve items | |
201 default: False if flag omitted | |
202 | |
203 **Outputs:** | |
204 ROC curves, AUROC across diseases, and classifier coefficients ]]> | |
205 </help> | |
206 <expand macro="citations" /> | |
207 </tool> |