Mercurial > repos > jjohnson > seq2hla

<tool id="seq2hla" name="seq2HLA" version="0.1.0">
    <description>HLA genotype and expression from RNA-seq</description>
    <requirements>
        <requirement type="package" version="2.2">seq2HLA</requirement>
        <requirement type="package" version="1.0.0">bowtie</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
        seq2HLA --runName="$run_name" -1 "$fastq_input1" -2 "$fastq_input2"
        #if str($trim) != '':
            --trim3=$trim
        #end if
        -p \${GALAXY_SLOTS:-1} >> "$seq2hla_log"
        && cp -p "${run_name}-ClassI.HLAgenotype2digits" "${c1_genotype2digits}"
        && cp -p "${run_name}-ClassI.HLAgenotype4digits" "${c1_genotype4digits}"
        && echo "#Locus_RPKM" | cat - "${run_name}-ClassI.expression" | sed 's/^\(.*\): \([0-9.]*\).*$/\1_\2/' | tr '_' '\t' > "${c1_expression}"
        && cp -p "${run_name}-ClassII.HLAgenotype4digits" "${c2_genotype4digits}"
        && echo "#Locus_RPKM" | cat - "${run_name}-ClassII.expression" | sed 's/^\(.*\): \([0-9.]*\).*$/\1_\2/' | tr '_' '\t' > "${c2_expression}"
        && cp -p "${run_name}.ambiguity" "${ambiguity}"
    ]]></command>
    <inputs>
        <param name="run_name" type="text" value="sample1" label="Name prefix for this analysis">
            <validator type="regex" message="Use letters,digits,_.-">^[A-Za-z0-9_\-+.]+$</validator>
        </param>
        <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
        <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
        <param name="trim" type="integer" value="" min="0" optional="true" label="Trim bases from 3 prime"
               help="trim this many bases from the low-quality end of each read" />
    </inputs>
    <outputs>
        <data format="txt" name="seq2hla_log" label="${tool.name} on ${on_string}: ${run_name} logfile"/>
        <data format="tabular" name="c1_genotype2digits" label="${tool.name} on ${on_string}: ${run_name}-ClassI.HLAgenotype2digits"/>
        <data format="tabular" name="c1_genotype4digits" label="${tool.name} on ${on_string}: ${run_name}-ClassI.HLAgenotype4digits"/>
        <data format="tabular" name="c2_genotype4digits" label="${tool.name} on ${on_string}: ${run_name}-ClassII.HLAgenotype4digits"/>
        <data format="tabular" name="c1_expression" label="${tool.name} on ${on_string}: ${run_name}-ClassI.expression"/>
        <data format="tabular" name="c2_expression" label="${tool.name} on ${on_string}: ${run_name}-ClassII.expression"/>
        <data format="tabular" name="c2_expression" label="${tool.name} on ${on_string}: ${run_name}-ClassII.expression"/>
        <data format="txt" name="ambiguity" label="${tool.name} on ${on_string}: ${run_name}.ambiguity"/>
    </outputs>
    <tests>
        <test>
            <param name="run_name" value="test"/>
            <param name="fastq_input1" ftype="fastqsanger" value="reads1.fastq"/>
            <param name="fastq_input2" ftype="fastqsanger" value="reads2.fastq"/>
            <output name="c1_genotype4digits">
                <assert_contents>
                    <has_text text="A*24:02" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**seq2HLA**  *HLA typing from RNA-Seq sequence reads*

Release: 2.2

seq2HLA_ is an in-silico method, written in python and R, which takes standard RNA-Seq sequence reads in fastq format
as input, uses a bowtie index comprising all HLA alleles and outputs the most likely HLA class I and class II genotypes (in 4 digit resolution),
a p-value for each call, and the expression of each class

**Inputs**

    Paired read fastq files with illumina style IDs.


**Outputs**

    1. <prefix>-ClassI.HLAgenotype2digits => 2 digit result of Class I
    2. <prefix>-ClassII.HLAgenotype2digits => 2 digit result of Class II
    3. <prefix>-ClassI.HLAgenotype4digits => 4 digit result of Class I
    4. <prefix>-ClassII.HLAgenotype4digits => 4 digit result of Class II
    5. <prefix>.ambiguity => reports typing ambuigities (more than one solution for an allele possible)
    6. <prefix>-ClassI.expression => expression of Class I alleles
    7. <prefix>-ClassII.expression => expression of Class II alleles


    ClassI.HLAgenotype4digits

       =======  ========  ===========  ========  ============
        #Locus  Allele 1  Confidence   Allele 2  Confidence
       =======  ========  ===========  ========  ============
        A       A*03:01   0.000510333  A*02:01'  0.0005975604
        B       B*50:01   0.001271273  B*58:02   3.52561e-05
        C       C*04:01   0.06362723   C*06:02   0.04725865
       =======  ========  ===========  ========  ============


    ClassI.expression

       =======  ======
        #Locus   RPKM
       =======  ======
        A        89.59
        B       139.66
        C       184.42
       =======  ======


.. _seq2HLA: https://bitbucket.org/sebastian_boegel/seq2hla

    ]]></help>
    <citations>
        <citation type="doi">10.1186/gm403</citation>
    </citations>
</tool>
author	jjohnson
date	Thu, 22 Dec 2016 09:15:31 -0500
parents
children	6ecaa71a5aa3