Mercurial > repos > vijay > pancancer_classifier

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Dec 16 23:32:43 2020 +0000
@@ -0,0 +1,229 @@
+<macros>
+    <token name="@VERSION@">0.1.9</token>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">papaa</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+             <exit_code level="fatal" range="1:"/>
+        </stdio>
+    </xml>
+
+    <xml name="inputs_basic">
+        <param argument="--x_matrix" label="Filename of features to use in model" name="x_matrix" optional="true" type="data" format="tabular" help="data/pancan_rnaseq_freeze.tsv.gz"/>
+        <yield/>
+        <param argument="--filename_mut" label="Filename mutations" name="filename_mut" optional="true" type="data" format="tabular" help="data/pancan_mutation_freeze.tsv.gz"/>
+        <param argument="--filename_mut_burden" label="Filename of mutation burden" name="filename_mut_burden" optional="true" type="data" format="tabular" help="data/mutation_burden_freeze.tsv"/>
+        <param argument="--filename_sample" label="Filename of sample" name="filename_sample" optional="true" type="data" format="tabular" help="data/sample_freeze.tsv"/>
+    </xml>
+
+    <token name="@INPUTS_BASIC@"><![CDATA[
+        #if $x_matrix and $x_matrix is not None:
+        --x_matrix '$x_matrix'
+        #end if
+        #if $filename_mut and $filename_mut is not None:
+        --filename_mut '$filename_mut'
+        #end if
+        #if $filename_mut_burden and $filename_mut_burden is not None:
+        --filename_mut_burden '$filename_mut_burden'
+        #end if
+        #if $filename_sample and $filename_sample is not None:
+        --filename_sample '$filename_sample'
+        #end if
+        ]]>
+    </token>
+
+    <xml name="inputs_genes_diseases">
+        <param argument="--genes" label="Comma separated string of HUGO gene symbols" name="genes" optional="False" type="text" value="ERBB2,PIK3CA,KRAS,AKT1"/>
+        <param argument="--diseases" label="Comma sep string of TCGA disease acronyms. If no arguments are passed, filtering will default to options given in --filter_count and --filter_prop." name="diseases" optional="true" type="text" value="BLCA,BRCA,CESC,COAD,ESCA,LUAD,LUSC,OV,PRAD,READ,STAD,UCEC,UCS"/>
+    </xml>
+
+    <token name="@INPUTS_GENES_DISEASES@"><![CDATA[
+        #if $genes and $genes is not None:
+        --genes '$genes'
+        #end if
+        #if $diseases and str($diseases) != '':
+        --diseases '$diseases'
+        #end if
+        ]]>
+    </token>
+
+
+    <xml name="input_filename_raw_mut">
+        <param argument="--filename_raw_mut" label="Filename of raw mut MAF" name="filename_raw_mut" optional="true" type="data" format="tabular" help="data/raw/mc3.v0.2.8.PUBLIC.maf"/>
+    </xml>
+
+    <token name="@INPUT_FILENAME_RAW_MUT@"><![CDATA[
+        #if $filename_raw_mut and $filename_raw_mut is not None:
+        --filename_raw_mut '$filename_raw_mut'
+        #end if
+        ]]>
+    </token>
+
+
+    <xml name="input_filename_burden">
+        <param argument="--filename_burden" label="Burden file" name="filename_burden" optional="true" type="data" format="tabular" help="data/seg_based_scores.tsv"/>
+    </xml>
+
+    <token name="@INPUT_FILENAME_BURDEN@"><![CDATA[
+        #if $filename_burden and $filename_burden is not None:
+        --filename_burden '$filename_burden'
+        #end if
+        ]]>
+    </token>
+
+    <xml name="input_filename_snaptron_samples">
+        <param argument="--sample_file" label="SNAPTRON samples" name="sample_file" optional="true" type="data" format="tabular" help="scripts/snaptron/samples.tsv.gz"/>
+    </xml>
+
+    <token name="@INPUT_FILENAME_SNAPTRON_SAMPLES@"><![CDATA[
+        #if $sample_file and $sample_file is not None:
+        --sample_file '$sample_file'
+        #end if
+        ]]>
+    </token>
+
+
+    <xml name="input_filename_snaptron_junctions">
+        <param argument="--junction_file" label="SNAPTRON junctions" name="junction_file" optional="true" type="data" format="tabular" help="scripts/snaptron/tp53_junctions.txt.gz"/>
+    </xml>
+
+    <token name="@INPUT_FILENAME_SNAPTRON_JUNCTIONS@"><![CDATA[
+        #if $junction_file and $junction_file is not None:
+        --junction_file '$junction_file'
+        #end if
+        ]]>
+    </token>
+
+    <xml name="inputs_copy_number_file">
+        <param argument="--filename_copy_loss" label="File with Copy number loss" name="filename_copy_loss" optional="true" type="data" format="tabular" help="data/copy_number_loss_status.tsv.gz"/>
+        <param argument="--filename_copy_gain" label="File with Copy number gain" name="filename_copy_gain" optional="true" type="data" format="tabular" help="data/copy_number_gain_status.tsv.gz"/>
+    </xml>
+
+    <xml name="inputs_copy_number_class_file">
+        <expand macro="inputs_copy_number_file" />
+        <param argument="--filename_cancer_gene_classification" label="File with cancer gene classification table" name="filename_cancer_gene_classification" optional="true" type="data" format="tabular" help="data/cosmic_cancer_classification.tsv"/>
+    </xml>
+
+    <xml name="inputs_copy_number_file_conditional">
+        <conditional name="copy_number_conditional">
+          <param argument="--copy_number" checked="false" label="Supplement Y matrix with copy number events" name="copy_number" type="boolean" truevalue="--copy_number" falsevalue=""/>
+          <when value="--copy_number">
+            <expand macro="inputs_copy_number_file" />
+          </when>
+          <when value=""/>
+        </conditional>
+    </xml>
+
+    <xml name="inputs_copy_number_class_file_conditional">
+        <conditional name="copy_number_conditional">
+          <param argument="--copy_number" checked="false" label="Supplement Y matrix with copy number events" name="copy_number" type="boolean" truevalue="--copy_number" falsevalue=""/>
+          <when value="--copy_number">
+            <expand macro="inputs_copy_number_class_file" />
+          </when>
+          <when value=""/>
+        </conditional>
+    </xml>
+
+    <token name="@INPUTS_COPY_NUMBER_FILE_PREFIX@"><![CDATA[
+        #set $copy_number_conditional = type('',(object,),{'filename_copy_loss':$filename_copy_loss,'filename_copy_gain':$filename_copy_gain})()
+                ]]>
+    </token>
+
+    <token name="@INPUTS_COPY_NUMBER_FILE@"><![CDATA[
+        #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None:
+        --filename_copy_loss '$copy_number_conditional.filename_copy_loss'
+        #end if
+        #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None:
+        --filename_copy_gain '$copy_number_conditional.filename_copy_gain'
+        #end if
+        ]]>
+    </token>
+
+    <token name="@INPUTS_COPY_NUMBER_FILE_CONDITIONAL@"><![CDATA[
+        #if $copy_number_conditional.copy_number
+        $copy_number_conditional.copy_number
+        #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None:
+        --filename_copy_loss '$copy_number_conditional.filename_copy_loss'
+        #end if
+        #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None:
+        --filename_copy_gain '$copy_number_conditional.filename_copy_gain'
+        #end if
+        ##@INPUTS_COPY_NUMBER_FILE@
+        #end if
+        ]]>
+    </token>
+
+    <token name="@INPUTS_COPY_NUMBER_CLASS_FILE_PREFIX@"><![CDATA[
+        #set $copy_number_conditional = type('',(object,),{'filename_copy_loss':$filename_copy_loss,'filename_copy_gain':$filename_copy_gain,'filename_cancer_gene_classification':$filename_cancer_gene_classification})()
+        ]]>
+    </token>
+
+    <token name="@INPUTS_COPY_NUMBER_CLASS_FILE@"><![CDATA[
+        #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None:
+        --filename_copy_loss '$copy_number_conditional.filename_copy_loss'
+        #end if
+        #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None:
+        --filename_copy_gain '$copy_number_conditional.filename_copy_gain'
+        #end if
+        #if $copy_number_conditional.filename_cancer_gene_classification and $copy_number_conditional.filename_cancer_gene_classification is not None:
+        --filename_cancer_gene_classification '$copy_number_conditional.filename_cancer_gene_classification'
+        #end if
+        ]]>
+    </token>
+
+    <token name="@INPUTS_COPY_NUMBER_CLASS_FILE_CONDITIONAL@"><![CDATA[
+        #if $copy_number_conditional.copy_number
+        $copy_number_conditional.copy_number
+        ##
+        #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None:
+        --filename_copy_loss '$copy_number_conditional.filename_copy_loss'
+        #end if
+        #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None:
+        --filename_copy_gain '$copy_number_conditional.filename_copy_gain'
+        #end if
+        #if $copy_number_conditional.filename_cancer_gene_classification and $copy_number_conditional.filename_cancer_gene_classification is not None:
+        --filename_cancer_gene_classification '$copy_number_conditional.filename_cancer_gene_classification'
+        #end if
+        ##@INPUTS_COPY_NUMBER_CLASS_FILE@
+        #end if
+        ]]>
+    </token>
+
+    <xml name="input_remove_hyper">
+        <param argument="--remove_hyper" checked="false" label="Remove hypermutated samples" name="remove_hyper" type="boolean" truevalue="--remove_hyper" falsevalue=""/>
+    </xml>
+    <token name="@INPUT_REMOVE_HYPER@"><![CDATA[$remove_hyper]]></token>
+
+    <xml name="input_alphas">
+        <param argument="--alphas" label="the alphas for parameter sweep" name="alphas" optional="true" type="text" value="0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7"/>
+    </xml>
+
+    <token name="@INPUTS_ALPHAS@"><![CDATA[
+        #if $alphas and $alphas is not None:
+        --alphas '$alphas'
+        #end if
+        ]]>
+    </token>
+
+    <xml name="input_l1_ratios">
+        <param argument="--l1_ratios" label="the l1 ratios for parameter sweep" name="l1_ratios" optional="true" type="text" value="0.1,0.125,0.15,0.2,0.25,0.3,0.35"/>
+    </xml>
+
+    <token name="@INPUTS_L1_RATIOS@"><![CDATA[
+        #if $l1_ratios and $l1_ratios is not None:
+        --l1_ratios '$l1_ratios'
+        #end if
+        ]]>
+    </token>
+    <xml name="citations">
+        <citations>
+            <yield />
+        </citations>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pancancer_classifier.xml	Wed Dec 16 23:32:43 2020 +0000
@@ -0,0 +1,207 @@
+<tool id="pancancer_classifier" name="PAPAA: PanCancer classifier" version="@VERSION@" python_template_version="3.6">
+    <description>classifier for pathway aberrant activity</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <version_command>
+        <![CDATA['papaa_pancancer_classifier.py' --version]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        papaa_pancancer_classifier.py
+        #if $folds and $folds is not None:
+        --folds '$folds'
+        #end if
+        #if $seed and $seed is not None:
+        --seed '$seed'
+        #end if
+        $drop
+        #if $filter_count and $filter_count is not None:
+        --filter_count '$filter_count'
+        #end if
+        #if $filter_prop and $filter_prop is not None:
+        --filter_prop '$filter_prop'
+        #end if
+        #if $num_features and $num_features is not None:
+        --num_features '$num_features'
+        #end if
+        @INPUTS_ALPHAS@
+        @INPUTS_L1_RATIOS@
+        #if $alt_genes and str($alt_genes) != '':
+        --alt_genes '$alt_genes'
+        #end if
+        #if $alt_diseases and str($alt_diseases) != '':
+        --alt_diseases '$alt_diseases'
+        #end if
+        #if $alt_filter_count and $alt_filter_count is not None:
+        --alt_filter_count '$alt_filter_count'
+        #end if
+        #if $alt_filter_prop and $alt_filter_prop is not None:
+        --alt_filter_prop '$alt_filter_prop'
+        #end if
+        --classifier_results 'classifier'
+        @INPUT_REMOVE_HYPER@
+        $keep_intermediate
+        @INPUTS_BASIC@
+        @INPUTS_GENES_DISEASES@
+        $drop
+        $shuffled
+        $shuffled_before_training
+        $no_mutation
+        #if str($drop_x_genes)
+        --drop_x_genes '$drop_x_genes'
+        #end if
+        $drop_expression
+        $drop_covariates
+        @INPUTS_COPY_NUMBER_CLASS_FILE_CONDITIONAL@
+        > '${log}'
+        ]]>
+    </command>
+    <inputs>
+        <expand macro="inputs_basic" />
+        <expand macro="inputs_genes_diseases" />
+        <param argument="--seed" label="option to set seed" name="seed" optional="true" type="integer" value="1234"/>
+        <param argument="--folds" label="Number of cross validation folds to perform" name="folds" optional="true" type="integer" value="5"/>
+        <param argument="--drop" checked="false" label="Decision to drop input genes from X matrix" name="drop" type="boolean" truevalue="--drop" falsevalue=""/>
+        <expand macro="inputs_copy_number_class_file_conditional" />
+        <param argument="--filter_count" label="Min number of mutations in diseases to include" name="filter_count" optional="true" type="integer" value="15"/>
+        <param argument="--filter_prop" label="Min proportion of positives to include disease" name="filter_prop" optional="true" type="float" value="0.05"/>
+        <param argument="--num_features" label="Number of MAD genes to include in classifier" name="num_features" optional="true" type="integer" value="8000"/>
+        <expand macro="input_alphas" />
+        <expand macro="input_l1_ratios" />
+        <param argument="--alt_genes" label="alternative genes to test performance" name="alt_genes" optional="true" type="text" />
+        <param argument="--alt_diseases" label="The alternative diseases to test performance" name="alt_diseases" optional="true" type="text" />
+        <param argument="--alt_filter_count" label="Min number of mutations in disease to include in alternate" name="alt_filter_count" optional="true" type="integer" value="15"/>
+        <param argument="--alt_filter_prop" label="Min proportion of positives to include disease in alternate" name="alt_filter_prop" optional="true" type="float" value="0.05"/>
+        <expand macro="input_remove_hyper" />
+        <param argument="--keep_intermediate" checked="false" label="Keep intermediate ROC values for plotting" name="keep_intermediate" type="boolean" truevalue="--keep_intermediate" falsevalue=""/>
+        <param argument="--shuffled" checked="false" label="Shuffle the input gene exprs matrix alongside" name="shuffled" type="boolean" truevalue="--shuffled" falsevalue=""/>
+        <param argument="--shuffled_before_training" checked="false" label="Shuffle the gene exprs matrix before training" name="shuffled_before_training" type="boolean" truevalue="--shuffled_before_training" falsevalue=""/>
+        <param argument="--no_mutation" checked="false" label="Remove mutation data from y matrix" name="no_mutation" type="boolean" truevalue="--no_mutation" falsevalue=""/>
+        <param argument="--drop_x_genes" label="Comma separated list of genes to be dropped from X matrix" name="drop_x_genes" optional="true" type="text" value=""/>
+        <param argument="--drop_expression" checked="false" label="Decision to drop gene expression values from X" name="drop_expression" type="boolean" truevalue="--drop_expression" falsevalue=""/>
+        <param argument="--drop_covariates" checked="false" label="Decision to drop covariate information from X" name="drop_covariates" type="boolean" truevalue="--drop_covariates" falsevalue=""/>
+    </inputs>
+    <outputs>
+        <data format="txt" name="log" label="${tool.name} on ${on_string} (Log)" />
+        <data format="tabular" name="alt_gene_alt_disease_summary" label="${tool.name} on ${on_string} (alt_gene_alt_disease_summary.tsv)" from_work_dir="classifier/alt_gene_alt_disease_summary.tsv"/>
+        <data format="csv" name="alt_summary_counts" label="${tool.name} on ${on_string} (alt_summary_counts.csv" from_work_dir="classifier/alt_summary_counts.csv"/>
+        <data format="tabular" name="classifier_coefficients" label="${tool.name} on ${on_string} (classifier_coefficients.tsv)" from_work_dir="classifier/classifier_coefficients.tsv"/>
+        <data format="txt" name="classifier_summary" label="${tool.name} on ${on_string} (classifier_summary.txt)" from_work_dir="classifier/classifier_summary.txt"/>
+        <data format="tabular" name="pancan_roc_results" label="${tool.name} on ${on_string} (pancan_roc_results.tsv)" from_work_dir="classifier/pancan_roc_results.tsv"/>
+        <data format="csv" name="summary_counts" label="${tool.name} on ${on_string} (summary_counts.csv)" from_work_dir="classifier/summary_counts.csv"/>
+        <data format="pdf" name="cv_heatmap" label="${tool.name} on ${on_string} (cv_heatmap.pdf)" from_work_dir="classifier/cv_heatmap.pdf"/>
+        <collection name="disease_figures" type="list:list" label="Disease classifier figures">
+          <discover_datasets pattern="classifier_(?P&lt;identifier_1&gt;.+)__pred_(?P&lt;identifier_0&gt;.+)\.pdf" format="pdf" directory="classifier/disease" visible="false" />
+        </collection>
+        <data format="pdf" name="all_disease_pr" label="${tool.name} on ${on_string} (all_disease_pr.pdf)" from_work_dir="classifier/all_disease_pr.pdf"/>
+        <data format="pdf" name="all_disease_roc" label="${tool.name} on ${on_string} (all_disease_roc.pdf)" from_work_dir="classifier/all_disease_roc.pdf"/>
+        <data format="pdf" name="alt_gene_alt_disease_aupr_bar" label="${tool.name} on ${on_string} (alt_gene_alt_disease_aupr_bar.pdf)" from_work_dir="classifier/alt_gene_alt_disease_aupr_bar.pdf"/>
+        <data format="pdf" name="alt_gene_alt_disease_auroc_bar" label="${tool.name} on ${on_string} (alt_gene_alt_disease_auroc_bar.pdf)" from_work_dir="classifier/alt_gene_alt_disease_auroc_bar.pdf"/>
+        <data format="pdf" name="disease_aupr" label="${tool.name} on ${on_string} (disease_aupr.pdf)" from_work_dir="classifier/disease_aupr.pdf"/>
+        <data format="pdf" name="disease_auroc" label="${tool.name} on ${on_string} (disease_auroc.pdf)" from_work_dir="classifier/disease_auroc.pdf"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="genes" value="ERBB2,PIK3CA,KRAS,AKT1"/>
+            <param name="diseases" value="GBM"/>
+            <param name="x_matrix" value="pancan_rnaseq_freeze_t1p.tsv.gz" ftype="tabular"/>
+            <param name="filename_mut" value="pancan_mutation_freeze_t1p.tsv.gz" ftype="tabular"/>
+            <param name="filename_mut_burden" value="mutation_burden_freeze.tsv" ftype="tabular"/>
+            <param name="filename_sample" value="sample_freeze.tsv" ftype="tabular"/>
+            <param name="seed" value="1234"/>
+            <param name="folds" value="5"/>
+            <param name="drop" value="true"/>
+            <param name="copy_number" value="true"/>
+            <param name="filename_copy_loss" value="copy_number_loss_status_t10p.tsv.gz" ftype="tabular"/>
+            <param name="filename_copy_gain" value="copy_number_gain_status_t10p.tsv.gz" ftype="tabular"/>
+            <param name="filename_cancer_gene_classification" value="cosmic_cancer_classification.tsv" ftype="tabular"/>
+            <param name="filter_count" value="15"/>
+            <param name="filter_prop" value="0.05"/>
+            <param name="num_features" value="8000"/>
+            <param name="alphas" value="0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7"/>
+            <param name="l1_ratios" value="0.1,0.125,0.15,0.2,0.25,0.3,0.35"/>
+            <param name="alt_genes" value="PTEN,PIK3R1,STK11"/>
+            <param name="alt_diseases" value="GBM"/>
+            <param name="alt_filter_count" value="15"/>
+            <param name="alt_filter_prop" value="0.05"/>
+            <param name="remove_hyper" value="true"/>
+            <param name="keep_intermediate" value="true"/>
+            <param name="shuffled" value="true"/>
+            <param name="shuffled_before_training" value="false"/>
+            <param name="no_mutation" value="false"/>
+            <param name="drop_expression" value="false"/>
+            <param name="drop_covariates" value="false"/>
+            <output name="log" file="Log.txt"/>
+            <output name="alt_gene_alt_disease_summary" file="alt_gene_alt_disease_summary.tsv"/>
+            <output name="alt_summary_counts" file="alt_summary_counts.csv"/>
+            <output name="classifier_coefficients" file="classifier_coefficients.tsv"/>
+            <output name="classifier_summary" file="classifier_summary.txt"/>
+            <output name="pancan_roc_results">
+              <assert_contents>
+                  <has_line line="&#009;fpr&#009;tpr&#009;threshold&#009;train_type&#009;disease" />
+                  <has_n_columns n="6" />
+                  <has_n_lines n="253" />
+              </assert_contents>
+            </output>
+            <output name="summary_counts" file="summary_counts.csv"/>
+            <output name="cv_heatmap" file="cv_heatmap.pdf" compare="sim_size" delta="50"/>
+            <output name="all_disease_pr" file="all_disease_pr.pdf" compare="sim_size" delta="50" />
+            <output name="all_disease_roc" file="all_disease_roc.pdf" compare="sim_size" delta="50"/>
+            <output name="alt_gene_alt_disease_aupr_bar" file="alt_gene_alt_disease_aupr_bar.pdf" compare="sim_size" delta="50"/>
+            <output name="alt_gene_alt_disease_auroc_bar" file="alt_gene_alt_disease_auroc_bar.pdf" compare="sim_size" delta="50"/>
+            <output name="disease_aupr" file="disease_aupr.pdf" compare="sim_size" delta="50"/>
+            <output name="disease_auroc" file="disease_auroc.pdf" compare="sim_size" delta="50"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+        **Pancancer_Aberrant_Pathway_Activity_Analysis scripts/papaa_pancancer_classifier.py:**
+
+            **Inputs:**
+                --genes     comma separated string of HUGO symbols for target genes or targenes_list.csv file
+                --diseases  comma separated string of disease types/TCGA acronyms for classifier
+                            default: Auto (will pick diseases from filter args)
+                --folds     number of cross validation folds
+                            default: 5
+                --seed  value specifies the initial value of the random number seed
+                        default: 1234
+                --drop  drop the input genes from the X matrix
+                        default: False if flag omitted
+                --copy_number   optional flag to supplement copy number to define Y
+                                default: False if flag omitted
+                --filter_count  int of low count of mutation to include disease
+                                default: 15
+                --filter_prop   float of low proportion of mutated samples per disease
+                                default: 0.05
+                --num_features  int of number of genes to include in classifier
+                                default: 8000
+                --alphas    comma separated string of alphas to test in pipeline default: '0.1,0.15,0.2,0.5,0.8,1'
+                --l1_ratios     comma separated string of l1 parameters to test
+                                default: '0,0.1,0.15,0.18,0.2,0.3'
+                --alt_genes     comma separated string of alternative genes to test
+                                default: None
+                --alt_diseases  comma separated string of alternative diseases to test
+                                default: Auto
+                --alt_filter_count  int of low count of mutations to include alt_diseases
+                                    default: 15
+                --alt_filter_prop   float of low proportion of mutated samples alt_disease
+                                    default: 0.05
+                --classifier_results    string of the location to save the classifier results/figures
+                                        default: Auto
+                --remove_hyper  store_true: remove hypermutated samples
+                                default: False if flag omitted
+                --keep_intermediate     store_true: keep intermediate roc curve items
+                                        default: False if flag omitted
+                --x_matrix  string of which feature matrix to use
+                            default: raw
+                --remove_hyper  store_true: remove hypermutated samples
+                                default: False if flag omitted
+                --keep_intermediate     store_true: keep intermediate roc curve items
+                                        default: False if flag omitted
+
+            **Outputs:**
+            ROC curves, AUROC across diseases, and classifier coefficients  ]]>
+    </help>
+    <expand macro="citations" />
+</tool>