Mercurial > repos > vijay > pancancer_within_disease_analysis
view within_disease_analysis.xml @ 0:99d944bfe9b3 draft default tip
"planemo upload for repository http://github.com/nvk747/papaa/galaxy/ commit 954b283ef7f82f59f55476a4b3a230d655187ac1"
author | vijay |
---|---|
date | Wed, 16 Dec 2020 23:31:13 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="pancancer_within_disease_analysis" name="PAPAA: PanCancer within disease analysis" version="@VERSION@"> <description>within disease analysis</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <version_command><![CDATA['papaa_within_disease_analysis.py' --version]]></version_command> <command><![CDATA[ papaa_within_disease_analysis.py @INPUTS_ALPHAS@ @INPUTS_L1_RATIOS@ @INPUT_REMOVE_HYPER@ @INPUTS_BASIC@ @INPUTS_GENES_DISEASES@ @INPUTS_COPY_NUMBER_CLASS_FILE_PREFIX@ @INPUTS_COPY_NUMBER_CLASS_FILE@ --classifier_results 'classifier' #if $seed and $seed is not None: --seed '$seed' #end if #if $num_features and $num_features is not None: --num_features '$num_features' #end if > '${log}' && papaa_flatten_classifier_directory.py -i 'classifier/within_disease' -o 'classifier_within' ]]> </command> <inputs> <expand macro="inputs_basic" /> <expand macro="inputs_genes_diseases" /> <expand macro="inputs_copy_number_class_file" /> <expand macro="input_alphas" /> <expand macro="input_l1_ratios" /> <expand macro="input_remove_hyper" /> <param argument="--seed" label="option to set seed" name="seed" optional="true" type="integer" value="1234"/> <param argument="--num_features" label="Number of MAD genes to include in classifier" name="num_features" optional="true" type="integer" value="8000"/> </inputs> <outputs> <data format="txt" name="log" label="${tool.name} on ${on_string} (Log)" /> <collection name="classifier_summary" type="list" label="classifier_summary.txt"> <discover_datasets pattern="(?P<identifier_0>.+)_classifier_summary\.txt" format="txt" directory="classifier_within/within_disease" visible="false" /> </collection> <collection name="classifier_coefficients" type="list" label="classifier_coefficients.tsv"> <discover_datasets pattern="(?P<identifier_0>.+)_classifier_coefficients\.tsv" format="tabular" directory="classifier_within/within_disease" visible="false" /> </collection> <collection name="pancan_roc_results" type="list" label="pancan_roc_results.tsv"> <discover_datasets pattern="(?P<identifier_0>.+)_pancan_roc_results\.tsv" format="tabular" directory="classifier_within/within_disease" visible="false" /> </collection> <collection name="summary_counts" type="list" label="summary_counts.csv"> <discover_datasets pattern="(?P<identifier_0>.+)_summary_counts\.csv" format="csv" directory="classifier_within/within_disease" visible="false" /> </collection> <collection name="within_disease" type="list:list" label="Within disease figures"> <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>.+)\.pdf" format="pdf" directory="classifier_within/within_disease" visible="false" /> </collection> <collection name="disease_figures" type="list:list" label="Disease classifier figures"> <discover_datasets pattern="classifier_(?P<identifier_1>.+)__pred_(?P<identifier_0>.+)\.pdf" format="pdf" directory="classifier_within/disease" visible="false" /> </collection> </outputs> <tests> <test> <param name="genes" value="ERBB2,PIK3CA,KRAS,AKT1"/> <param name="diseases" value="GBM"/> <param name="x_matrix" value="pancan_rnaseq_freeze_t1p.tsv.gz" ftype="tabular"/> <param name="filename_mut" value="pancan_mutation_freeze_t1p.tsv.gz" ftype="tabular"/> <param name="filename_mut_burden" value="mutation_burden_freeze.tsv" ftype="tabular"/> <param name="filename_sample" value="sample_freeze.tsv" ftype="tabular"/> <param name="filename_copy_loss" value="copy_number_loss_status_t10p.tsv.gz" ftype="tabular"/> <param name="filename_copy_gain" value="copy_number_gain_status_t10p.tsv.gz" ftype="tabular"/> <param name="filename_cancer_gene_classification" value="cosmic_cancer_classification.tsv" ftype="tabular"/> <param name="alphas" value="0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7"/> <param name="l1_ratios" value="0.1,0.125,0.15,0.2,0.25,0.3,0.35"/> <param name="seed" value="1234"/> <param name="num_features" value="8000"/> <param name="remove_hyper" value="true"/> <output name="log"> <assert_contents> <has_line line="['ERBB2', 'PIK3CA', 'KRAS', 'AKT1']" /> <has_line line="GBM" /> <has_n_lines n="2" /> </assert_contents> </output> <output_collection name="classifier_coefficients" type="list"> <element name="GBM" file="GBM_108_coef.tabular" ftype="tabular" /> </output_collection> <output_collection name="classifier_summary" type="list"> <element name="GBM" file="GBM_111_summary.txt" ftype="txt" /> </output_collection> <output_collection name="pancan_roc_results" type="list"> <element name="GBM" ftype="tabular"> <assert_contents> <has_line line="	fpr	tpr	threshold	train_type	disease" /> <has_n_columns n="6" /> <has_n_lines n="253" /> </assert_contents> </element> </output_collection> <output_collection name="summary_counts" type="list"> <element name="GBM" file="GBM_106_summary_counts.csv" ftype="csv" /> </output_collection> <output_collection name="within_disease" type="list:list"> <element name="GBM" > <element name="cv_heatmap" file="GBM_cv_heatmap_103.pdf" compare="sim_size" delta="50"/> <element name="all_disease_pr" file="GBM_all_disease_pr_101.pdf" compare="sim_size" delta="50" /> <element name="all_disease_roc" file="GBM_all_disease_roc_102.pdf" compare="sim_size" delta="50"/> <element name="disease_aupr" file="GBM_disease_aupr_104.pdf" compare="sim_size" delta="50"/> <element name="disease_auroc" file="GBM_disease_auroc_105.pdf" compare="sim_size" delta="50"/> </element> </output_collection> <output_collection name="disease_figures" type="list:list"> <element name="GBM" > <element name="pr" file="GBM_pr_99.pdf" compare="sim_size" delta="50"/> <element name="roc" file="GBM_roc_100.pdf" compare="sim_size" delta="50"/> </element> </output_collection> </test> </tests> <help><![CDATA[ **Pancancer_Aberrant_Pathway_Activity_Analysis scripts/papaa_within_disease_analysis.py:** **Inputs:** --genes comma separated string of HUGO symbols for target genes or targenes_list.csv file --diseases Comma separated diseases list in a file --alphas The alphas for parameter sweep --l1_ratios The l1 ratios for parameter sweep --remove_hyper Remove hypermutated samples --classifier_results String of location to classification folder extending to individual diseases --x_matrix Filename of features to use in model --filename_mut Filename of sample/gene mutations to use in model --filename_mut_burden Filename of sample mutation burden to use in model --filename_sample Filename of patient/samples to use in model --filename_copy_loss Filename of copy number loss --filename_copy_gain Filename of copy number gain --filename_cancer_gene_classification Filename of cancer gene classification table **Outputs:** ROC curves, AUROC across diseases, and classifier coefficients for individual diseases ]]> </help> <expand macro="citations" /> </tool>