view netmhc.xml @ 0:bb25a4e5f211 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/netmhc commit 3bf9a39fe11622806ac6b032ba4fc6139a003580"
author jjohnson
date Tue, 18 Feb 2020 14:48:51 -0500
parents
children
line wrap: on
line source

<tool id="netmhc" name="netMHC" version="4.0.0">
    <description>MHC Binding prediction</description>
    <requirements>
        <requirement type="package" version="4.0">netMHC</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <configfiles>
        <configfile name="format_out"><![CDATA[
import sys
import re
if len(sys.argv) != 3:
  print >> sys.stderr, "python script.py  netMHC_output_tsv output_file"
  exit(4);
hpat = '^\s*(pos)\s+(HLA)\s+(peptide)\s+(Core)\s+(Offset)\s+(I_pos)\s+(I_len)\s+(D_pos)\s+(D_len)\s+(iCore)\s+(Identity)\s+(1-log50k.aff.)\s+(Affinity.nM.)\s+(%Rank)\s+(BindLevel)\s*$'
epat = '^\s*(\d+)\s+(\S+)\s+([A-Z]+)\s+([-_A-Z]*)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([A-Z]+)\s+(\S+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+).*?([SWB]*)$'
cnt = 0
try:
  wh = open(sys.argv[2],'w')
  fh = open(sys.argv[1],'r')
  for i,line in enumerate(fh):
    line = line.rstrip()
    if not line:
      continue
    ## print >> sys.stderr, line
    m = re.match(epat,line)
    if m:
      ## print >> sys.stderr, str(m.groups())
      wh.write("%s\n" % '\t'.join([x if x else '' for x in m.groups()]))
      cnt += 1
    elif cnt == 0:
      m = re.match(hpat,line)
      if m:
         ## print >> sys.stderr, str(m.groups())
         wh.write("#%s\n" % '\t'.join(m.groups()))
         cnt += 1
  wh.close()
  fh.close()
except Exception, e:
  print sys.stderr, "error: %s" % e
  exit(3)
]]>
        </configfile>
        <configfile name="format_tsv"><![CDATA[
#!/usr/bin/env python
import sys
if len(sys.argv) != 3:
  print >> sys.stderr, "python script.py netMHC_xls output_file"
  exit(4); 
try:
  wh = open(sys.argv[2],'w')
  fh = open(sys.argv[1],'r')
  for n,line in enumerate(fh):
    if n > 1:
      wh.write(line)
    if n == 0:
      alleles = line.rstrip('\n').split('\t')
    if n == 1:
      hdr = line.rstrip('\n').split('\t')
      wh.write('#%s\n' % '\t'.join([' '.join([alleles[i - i%3],hdr[i]]).strip() for i in range(len(hdr))]))
  wh.close()
  fh.close()
except Exception, e:
  print sys.stderr, "error: %s" % e
  exit(3)
]]>
        </configfile>
    </configfiles>
    <command><![CDATA[
### netMHC -tdir tmp -f OS11Fusion.fa -a 'HLA-A3001,HLA-A0301,HLA-B4201,HLA-B5802,HLA-C0602' -l '8,9,10' -xls -xlsfile OS11Fusion.xls > OS11_netMHC.out
#set $valid_alleles = [
'BoLA-AW10',
'BoLA-D18.4',
'BoLA-HD6',
'BoLA-JSP.1',
'BoLA-T2C',
'BoLA-T2a',
'BoLA-T2b',
'H-2-Db',
'H-2-Dd',
'H-2-Kb',
'H-2-Kd',
'H-2-Kk',
'H-2-Ld',
'HLA-A0101',
'HLA-A0201',
'HLA-A0202',
'HLA-A0203',
'HLA-A0205',
'HLA-A0206',
'HLA-A0207',
'HLA-A0211',
'HLA-A0212',
'HLA-A0216',
'HLA-A0217',
'HLA-A0219',
'HLA-A0250',
'HLA-A0301',
'HLA-A0302',
'HLA-A0319',
'HLA-A1101',
'HLA-A2301',
'HLA-A2402',
'HLA-A2403',
'HLA-A2501',
'HLA-A2601',
'HLA-A2602',
'HLA-A2603',
'HLA-A2902',
'HLA-A3001',
'HLA-A3002',
'HLA-A3101',
'HLA-A3201',
'HLA-A3207',
'HLA-A3215',
'HLA-A3301',
'HLA-A6601',
'HLA-A6801',
'HLA-A6802',
'HLA-A6823',
'HLA-A6901',
'HLA-A8001',
'HLA-B0702',
'HLA-B0801',
'HLA-B0802',
'HLA-B0803',
'HLA-B1401',
'HLA-B1402',
'HLA-B1501',
'HLA-B1502',
'HLA-B1503',
'HLA-B1509',
'HLA-B1517',
'HLA-B1801',
'HLA-B2705',
'HLA-B2720',
'HLA-B3501',
'HLA-B3503',
'HLA-B3701',
'HLA-B3801',
'HLA-B3901',
'HLA-B4001',
'HLA-B4002',
'HLA-B4013',
'HLA-B4201',
'HLA-B4402',
'HLA-B4403',
'HLA-B4501',
'HLA-B4506',
'HLA-B4601',
'HLA-B4801',
'HLA-B5101',
'HLA-B5301',
'HLA-B5401',
'HLA-B5701',
'HLA-B5703',
'HLA-B5801',
'HLA-B5802',
'HLA-B7301',
'HLA-B8101',
'HLA-B8301',
'HLA-C0303',
'HLA-C0401',
'HLA-C0501',
'HLA-C0602',
'HLA-C0701',
'HLA-C0702',
'HLA-C0802',
'HLA-C1203',
'HLA-C1402',
'HLA-C1502',
'HLA-E0101',
'HLA-E0103',
'Mamu-A01',
'Mamu-A02',
'Mamu-A07',
'Mamu-A11',
'Mamu-A20102',
'Mamu-A2201',
'Mamu-A2601',
'Mamu-A70103',
'Mamu-B01',
'Mamu-B03',
'Mamu-B08',
'Mamu-B1001',
'Mamu-B17',
'Mamu-B3901',
'Mamu-B52',
'Mamu-B6601',
'Mamu-B8301',
'Mamu-B8701',
'Patr-A0101',
'Patr-A0301',
'Patr-A0401',
'Patr-A0701',
'Patr-A0901',
'Patr-B0101',
'Patr-B1301',
'Patr-B2401',
'SLA-10401',
'SLA-10701',
'SLA-20401',
'SLA-30401',
]
        #set $allelelist = []
        #set $unknown_alleles = []
        #if $alleles.allelesrc == 'history':
          #for $line in open(str($alleles.allele_file)):
            #set $fields = $line.strip().split(',') 
            #set $allele = $fields[0].strip()
            #if $allele in $valid_alleles:
              $allelelist.append($allele)
            #else
              $unknown_alleles.append($allele)
            #end if
          #end for
        #else:
          #for $word in str($alleles.allele_text).strip().split():
            #set $fields = $word.strip().split(',') 
            #set $allele = $fields[0].strip()
            #if $allele in $valid_alleles:
              $allelelist.append($allele)
            #else
              $unknown_alleles.append($allele)
            #end if
          #end for
        #end if
        #if len($allelelist) < 1
            echo 'No netMHC alleles'; 
            echo "unknown: $unknown_alleles";  
            exit 1;
        #else
            echo "netMHC alleles: $allelelist"  
            && echo "unknown alleles: $unknown_alleles"  
            && echo "peptide lengths: $lengths"  
            #set $alist = ','.join($allelelist)
            && netMHC -tdir tmp -f "$seq_fasta" -a '$alist' -l '$lengths' $sort 
            #if $threshold_sec.rth:
              -rth $threshold_sec.rth
            #end if
            #if $threshold_sec.rlt:
              -rlt $threshold_sec.rlt
            #end if
            -xls -xlsfile results.tsv > results.out
            && python $format_out results.out  $output
            && python $format_tsv results.tsv $results_tsv
        #end if
    ]]></command>
    <inputs>
        <param name="seq_fasta" type="data" format="fasta" label="Peptide Sequence Fasta"/>
        <conditional name="alleles">
           <param name="allelesrc" type="select" label="Alleles">
               <option value="history">From history</option>
               <option value="entry">Entered</option>
           </param>
           <when value="history">
               <param name="allele_file" type="data" format="txt" label="Alleles file"/>
               <help>The dataset should have on allele per line: HLA-A0201</help>
           </when>
           <when value="entry">
               <param name="allele_text" type="text" label="Alleles">
                   <help>Enter alleles separated by commas: HLA-A0201,HLA-B0702</help>
                   <validator type="regex" message="IDs separted by commas">^(\S+)(,\S+)*$</validator>
               </param>
           </when>
        </conditional>
        <param name="lengths" type="select" multiple="true" label="peptide lengths for prediction">
            <help>Used for any alleles which don't include specified lengths</help>
            <option value="8">8</option>
            <option value="9">9</option>
            <option value="10">10</option>
            <option value="11">11</option>
            <option value="12">12 (unvalidated)</option>
            <option value="13">13 (unvalidated)</option>
            <option value="14">14 (unvalidated)</option>
        </param>
        <param name="sort" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Sort output on descending affinity"/>
        <section name="threshold_sec" expanded="false" title="Adjust Thresholds">
            <param name="rth" type="float" value="0.500000" optional="true" label="Threshold for high binding peptides (%Rank)"/>
            <param name="rlt" type="float" value="2.000000" optional="true" label="Threshold for low binding peptides (%Rank)"/>
        </section>
    </inputs>
    <outputs>
       <data name="output" format="tabular" label="${tool.name} on ${on_string} Binding Scores"/>
       <data name="results_tsv" format="tabular" label="${tool.name} on ${on_string} Peptide Summary"/>
    </outputs>
    <help><![CDATA[
**NetMHC**

http://www.cbs.dtu.dk/services/NetMHC/

NetMHC 4.0 predicts binding of peptides to a number of different HLA alleles using artificial neural networks (ANNs). 

ANNs have been trained for 78 different Human MHC (HLA) alleles representing all 12 HLA A and B Supertypes as defined by Lund et al. (2004). Furthermore 41 animal (Monkey, Cattle, Pig, and Mouse) allele predictions are available.

Prediction values are given in nM IC50 values.

Predictions of lengths 8-14:       Predictions can be made for lengths between 8 and 14 for all alleles using an novel approximation algorithm using ANNs trained on 9mer peptides. Probably because of the limited amount of available 10mer data this method has a better predictive value than ANNs trained on 10mer data.
Predictions of peptides longer than 11 have not been extensively validated!
Caution should be taken for 8mer predictions as some alleles might not bind 8mers to any significant extend.

Strong and weak binding peptides are indicated in the output. In the selection window for HLA alleles, the recommended allele for each HLA supertype is indicated. 

**Inputs**

  A fasta file of peptide sequences in your history
  
  A list Alleles entered as text or from a history dataset, one allele per line 

**Outputs**

  **Binding Scores**

  ====   =========   ==========    ========= ======  =====  =====  =====  =====  ==========   =============  =============  ============  =====  =========
  #pos   HLA         peptide       Core      Offset  I_pos  I_len  D_pos  D_len  iCore        Identity       1-log50k(aff)  Affinity(nM)  %Rank  BindLevel
  ====   =========   ==========    ========= ======  =====  =====  =====  =====  ==========   =============  =============  ============  =====  =========
   16    HLA-A3001   HGRWDTNCA     HGRWDTNCA      0      0      0      0      0  HGRWDTNCA    SOGA2_CREB3L1          0.487       257.58    0.90  WB
    1    HLA-A3001   LQNELERLK     LQNELERLK      0      0      0      0      0  LQNELERLK    SOGA2_CREB3L1          0.242      3647.96    6.00
   16    HLA-A3001   HGRWDTNCAP    HGRWTNCAP      0      0      0      4      1  HGRWDTNCAP   SOGA2_CREB3L1          0.185      6739.05    9.50
    6    HLA-C0602   ERLKEMQSM     ERLKEMQSM      0      0      0      0      0  ERLKEMQSM    SOGA2_CREB3L1          0.382       798.43    0.40  SB
   12    HLA-C0602   QSMEHGRWD     QSMEHGRWD      0      0      0      0      0  QSMEHGRWD    SOGA2_CREB3L1          0.229      4177.34    1.50  WB
    3    HLA-C0602   NELERLKEM     NELERLKEM      0      0      0      0      0  NELERLKEM    SOGA2_CREB3L1          0.209      5224.29    1.80  WB
   20    HLA-A3001   DTNCAPSW      DTNCA-PSW      0      5      1      0      0  DTNCAPSW     SOGA2_CREB3L1          0.050     29125.62   60.00
   20    HLA-C0602   DTNCAPSW      DT-NCAPSW      0      2      1      0      0  DTNCAPSW     SOGA2_CREB3L1          0.005     47120.04   90.00
  ====   =========   ==========    ========= ======  =====  =====  =====  =====  ==========   =============  =============  ============  =====  =========



  **Peptide Summary**

  ====  =========  =============  ============  ==============  ==============  ============  ==============  ==============  ===========  =========
  #Pos  Peptide    ID             HLA-A3001 nM  HLA-A3001 Rank  HLA-A3001 Core  HLA-C0602 nM  HLA-C0602 Rank  HLA-C0602 Core  H_Avg_Ranks  N_binders
  ====  =========  =============  ============  ==============  ==============  ============  ==============  ==============  ===========  =========
  16    HGRWDTNCA  SOGA2_CREB3L1         257.6           0.900  HGRWDTNCA            35765.3          25.000  HGRWDTNCA             4.124          1
  20    DTNCAPSW   SOGA2_CREB3L1       29125.6          60.000  DTNCA_PSW            47120.0          90.000  DT_NCAPSW             6.909          0
  ====  =========  =============  ============  ==============  ==============  ============  ==============  ==============  ===========  =========


    ]]></help>
    <citations>
       <citation type="doi">10.1093/nar/gkn202</citation>
       <citation type="doi">10.1093/bioinformatics/btn128</citation>
       <citation type="doi">10.1093/bioinformatics/btn100</citation>
       <citation type="doi">10.1110/ps.0239403</citation>
    </citations>
</tool>