view within_disease_analysis.xml @ 0:99d944bfe9b3 draft default tip

"planemo upload for repository http://github.com/nvk747/papaa/galaxy/ commit 954b283ef7f82f59f55476a4b3a230d655187ac1"
author vijay
date Wed, 16 Dec 2020 23:31:13 +0000
parents
children
line wrap: on
line source

<tool id="pancancer_within_disease_analysis" name="PAPAA: PanCancer within disease analysis" version="@VERSION@">
  <description>within disease analysis</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <version_command><![CDATA['papaa_within_disease_analysis.py' --version]]></version_command>
  <command><![CDATA[
    papaa_within_disease_analysis.py
    @INPUTS_ALPHAS@
    @INPUTS_L1_RATIOS@
    @INPUT_REMOVE_HYPER@
    @INPUTS_BASIC@
    @INPUTS_GENES_DISEASES@
    @INPUTS_COPY_NUMBER_CLASS_FILE_PREFIX@
    @INPUTS_COPY_NUMBER_CLASS_FILE@
    --classifier_results 'classifier'
    #if $seed and $seed is not None:
    --seed '$seed'
    #end if
    #if $num_features and $num_features is not None:
    --num_features '$num_features'
    #end if
    > '${log}'
    && papaa_flatten_classifier_directory.py -i 'classifier/within_disease' -o 'classifier_within'
   ]]>
  </command>
  <inputs>
    <expand macro="inputs_basic" />
    <expand macro="inputs_genes_diseases" />
    <expand macro="inputs_copy_number_class_file" />
    <expand macro="input_alphas" />
    <expand macro="input_l1_ratios" />
    <expand macro="input_remove_hyper" />
    <param argument="--seed" label="option to set seed" name="seed" optional="true" type="integer" value="1234"/>
    <param argument="--num_features" label="Number of MAD genes to include in classifier" name="num_features" optional="true" type="integer" value="8000"/>
  </inputs>
  <outputs>
    <data format="txt" name="log" label="${tool.name} on ${on_string} (Log)" />
    <collection name="classifier_summary" type="list" label="classifier_summary.txt">
      <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_classifier_summary\.txt" format="txt" directory="classifier_within/within_disease" visible="false" />
    </collection>
    <collection name="classifier_coefficients" type="list" label="classifier_coefficients.tsv">
      <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_classifier_coefficients\.tsv" format="tabular" directory="classifier_within/within_disease" visible="false" />
    </collection>
    <collection name="pancan_roc_results" type="list" label="pancan_roc_results.tsv">
      <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_pancan_roc_results\.tsv" format="tabular" directory="classifier_within/within_disease" visible="false" />
    </collection>
    <collection name="summary_counts" type="list" label="summary_counts.csv">
      <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_summary_counts\.csv" format="csv" directory="classifier_within/within_disease" visible="false" />
    </collection>
    <collection name="within_disease" type="list:list" label="Within disease figures">
      <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;.+)\.pdf" format="pdf" directory="classifier_within/within_disease" visible="false" />
    </collection>
    <collection name="disease_figures" type="list:list" label="Disease classifier figures">
      <discover_datasets pattern="classifier_(?P&lt;identifier_1&gt;.+)__pred_(?P&lt;identifier_0&gt;.+)\.pdf" format="pdf" directory="classifier_within/disease" visible="false" />
    </collection>
  </outputs>
  <tests>
        <test>
              <param name="genes" value="ERBB2,PIK3CA,KRAS,AKT1"/>
              <param name="diseases" value="GBM"/>
              <param name="x_matrix" value="pancan_rnaseq_freeze_t1p.tsv.gz" ftype="tabular"/>
              <param name="filename_mut" value="pancan_mutation_freeze_t1p.tsv.gz" ftype="tabular"/>
              <param name="filename_mut_burden" value="mutation_burden_freeze.tsv" ftype="tabular"/>
              <param name="filename_sample" value="sample_freeze.tsv" ftype="tabular"/>
              <param name="filename_copy_loss" value="copy_number_loss_status_t10p.tsv.gz" ftype="tabular"/>
              <param name="filename_copy_gain" value="copy_number_gain_status_t10p.tsv.gz" ftype="tabular"/>
              <param name="filename_cancer_gene_classification" value="cosmic_cancer_classification.tsv" ftype="tabular"/>
              <param name="alphas" value="0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7"/>
              <param name="l1_ratios" value="0.1,0.125,0.15,0.2,0.25,0.3,0.35"/>
              <param name="seed" value="1234"/>
              <param name="num_features" value="8000"/>
              <param name="remove_hyper" value="true"/>
              <output name="log">
                <assert_contents>
                  <has_line line="[&apos;ERBB2&apos;, &apos;PIK3CA&apos;, &apos;KRAS&apos;, &apos;AKT1&apos;]" />
                  <has_line line="GBM" />
                  <has_n_lines n="2" />
                </assert_contents>
              </output>
              <output_collection name="classifier_coefficients" type="list">
                 <element name="GBM" file="GBM_108_coef.tabular" ftype="tabular" />
              </output_collection>
              <output_collection name="classifier_summary" type="list">
                 <element name="GBM" file="GBM_111_summary.txt" ftype="txt" />
              </output_collection>
              <output_collection name="pancan_roc_results" type="list">
                 <element name="GBM" ftype="tabular">
                    <assert_contents>
                        <has_line line="&#009;fpr&#009;tpr&#009;threshold&#009;train_type&#009;disease" />
                        <has_n_columns n="6" />
                        <has_n_lines n="253" />
                    </assert_contents>
                  </element>
              </output_collection>
              <output_collection name="summary_counts" type="list">
                 <element name="GBM" file="GBM_106_summary_counts.csv" ftype="csv" />
              </output_collection>
              <output_collection name="within_disease" type="list:list">
                <element name="GBM" >
                  <element name="cv_heatmap" file="GBM_cv_heatmap_103.pdf" compare="sim_size" delta="50"/>
                  <element name="all_disease_pr" file="GBM_all_disease_pr_101.pdf" compare="sim_size" delta="50" />
                  <element name="all_disease_roc" file="GBM_all_disease_roc_102.pdf" compare="sim_size" delta="50"/>
                  <element name="disease_aupr" file="GBM_disease_aupr_104.pdf" compare="sim_size" delta="50"/>
                  <element name="disease_auroc" file="GBM_disease_auroc_105.pdf" compare="sim_size" delta="50"/>
                </element>
              </output_collection>
              <output_collection name="disease_figures" type="list:list">
                <element name="GBM" >
                  <element name="pr" file="GBM_pr_99.pdf" compare="sim_size" delta="50"/>
                  <element name="roc" file="GBM_roc_100.pdf" compare="sim_size" delta="50"/>
                </element>
              </output_collection>
        </test>
    </tests>
  <help><![CDATA[
    **Pancancer_Aberrant_Pathway_Activity_Analysis scripts/papaa_within_disease_analysis.py:**

      **Inputs:**
        --genes   comma separated string of HUGO symbols for target genes or targenes_list.csv file
        --diseases  Comma separated diseases list in a file
        --alphas  The alphas for parameter sweep
        --l1_ratios   The l1 ratios for parameter sweep
        --remove_hyper  Remove hypermutated samples
        --classifier_results  String of location to classification folder extending to individual diseases
        --x_matrix  Filename of features to use in model
        --filename_mut  Filename of sample/gene mutations to use in model
        --filename_mut_burden   Filename of sample mutation burden to use in model
        --filename_sample   Filename of patient/samples to use in model
        --filename_copy_loss  Filename of copy number loss
        --filename_copy_gain  Filename of copy number gain
        --filename_cancer_gene_classification   Filename of cancer gene classification table

      **Outputs:**
        ROC curves, AUROC across diseases, and classifier coefficients for individual diseases ]]>
  </help>
  <expand macro="citations" />
</tool>