Overview

Namespaces

  • cli_db
    • propel
      • map
      • om
  • cli_import
  • LoggedPDO
  • None
  • PHP
  • webservices
    • cart
    • combisearch
    • details
      • annotations
        • feature
    • graphs
      • barplot
      • genome
    • listing
    • queue

Classes

  • AbstractImporter
  • Importer_Annotations_Dbxref
  • Importer_Annotations_Description
  • Importer_Annotations_EC
  • Importer_Annotations_GO
  • Importer_Annotations_Interpro
  • Importer_Annotations_MapMan
  • Importer_Annotations_Repeatmasker
  • Importer_Differential_Expressions
  • Importer_Expressions
  • Importer_Sequence_Ids
  • Importer_Sequences_FASTA

Interfaces

  • Importer
  • Overview
  • Namespace
  • Class
  • Tree
  1: <?php
  2: 
  3: namespace cli_import;
  4: 
  5: require_once ROOT . 'classes/AbstractImporter.php';
  6: require_once ROOT . 'commands/Importer_Sequence_Ids.php';
  7: require_once ROOT . 'commands/Importer_Sequences_FASTA.php';
  8: 
  9: /**
 10:  * importer for interpro Annotations
 11:  */
 12: class Importer_Annotations_Interpro extends AbstractImporter {
 13: 
 14:     /**
 15:      * Interpro Line RegeX
 16:      * @var RegEx-String
 17:      */
 18:     private static $regex = <<<EOF
 19: {^
 20:    (?<feature>\w+)
 21: [\t]   (?<pepStart>\d+)
 22: [\t]   (?<pepEnd>\d+)
 23: [\t]   (?<pepStrand>[+-])
 24: [\t]   (?<checksum>\w+)
 25: [\t]   (?<length>\d+)
 26: [\t]   (?<analysisMethod>\w+)
 27: [\t]   (?<analysisMatchID>.*?)
 28: (?:[\t]   (?<analysisMatchDescription>.*))?
 29: [\t]   (?<domStart>\d+)
 30: [\t]   (?<domEnd>\d+)
 31: [\t]   (?<eValue>(?:NA|\d+(?:\.\d+)?(?:[Ee][+-]\d+)?))
 32: [\t]   (?<status>[T?])
 33: [\t]   (?<timeexecuted>[\w-]*)
 34: [\t]   (?<interproID>\w*)
 35: [\t]   (?<interproDesc>.*?)
 36: (?:[\t]   (?<interproGOs>.*))?
 37: $}x
 38: EOF;
 39: 
 40:     /**
 41:      * 
 42:      * @inheritDoc
 43:      */
 44:     static function import($options) {
 45: 
 46: #SEQNAME    ?   ?   ?   CRC LENGTH  EVIDENCE    MATCHID MATCHNAME   START   END SCORE   STATUS  DATE    INTERPROID  INTERPRONAME
 47:         /*
 48:          * http://wiki.bioinformatics.ucdavis.edu/index.php/InterProScan#Iprscan_raw_output_explanation
 49:          * 
 50:           ------
 51:           NF00181542      0A5FDCE74AB7C3AD        272     HMMPIR  PIRSF001424     Prephenate dehydratase  1       270     6.5e-141        T       06-Oct-2004         IPR008237       Prephenate dehydratase with ACT region  Molecular Function:prephenate dehydratase activity (GO:0004664), Biological Process:L-phenylalanine biosynthesis (GO:0009094)
 52:           ------
 53: 
 54:           Where: NF00181542:             is the id of the input sequence.
 55:           27A9BBAC0587AB84:       is the crc64 (checksum) of the proteic sequence (supposed to be unique).
 56:           272:                    is the length of the sequence (in AA).
 57:           !HMMPIR:                 is the anaysis method launched.
 58:           !PIRSF001424:            is the database members entry for this match.
 59:           !Prephenate dehydratase: is the database member description for the entry.
 60:           !1:                      is the start of the domain match.
 61:           !270:                    is the end of the domain match.
 62:           6.5e-141:               is the evalue of the match (reported by member database anayling method).
 63:           T:                      is the status of the match (T: true, ?: unknown).
 64:           06-Oct-2004:            is the date of the run.
 65:           IPR008237:              is the corresponding InterPro entry (if iprlookup requested by the user).
 66:           Prephenate dehydratase with ACT region:                           is the description of the InterPro entry.
 67:           Molecular Function:prephenate dehydratase activity (GO:0004664):  is the GO (gene ontology) description for the InterPro entry.
 68: 
 69:          * file format we have differs slightly:
 70:           comp214244_c0_seq2    252 1718    +   75AFB115E637E163    488 FPrintScan  PR00724 CRBOXYPTASEC    150 162 1e-25   T   08-Nov-2012 IPR001563   Peptidase S10, serine carboxypeptidase  Molecular Function: serine-type carboxypeptidase activity (GO:0004185), Biological Process: proteolysis (GO:0006508)
 71:          * 
 72:          * param 2,3 and 4 are inserted: 
 73:          * derived from "comp214244_c0_seq2:2137-2445(+)" in predpep sequence file, which gets stored as comp214244_c0_seq2:2137-2445 in the feature table
 74:          * 
 75:          */
 76: 
 77: 
 78:         $filename = $options['file'];
 79: 
 80:         $interpro_version = $options['interpro_version'];
 81: 
 82:         $lines_total = trim(`wc -l $filename | cut -d' ' -f1`);
 83:         self::setLineCount($lines_total);
 84: 
 85:         global $db;
 86:         $lines_imported = 0;
 87:         $interpro_ids_added = 0;
 88:         $dbxrefs_added = 0;
 89: 
 90:         try {
 91:             $db->beginTransaction();
 92:             $import_prefix_id = Importer_Sequence_Ids::get_import_dbxref();
 93: 
 94:             #shared parameters
 95:             $param_feature_uniq = null;
 96:             $param_feature_domain_name = null;
 97:             $param_feature_domain_uniq = null;
 98:             $param_domain_fmin = null;
 99:             $param_domain_fmax = null;
100:             $param_source_name = null;
101:             $param_evalue = null;
102:             $param_featureprop_type = null;
103:             $param_featureprop_value = null;
104:             $param_accession = null;
105:             $param_dbname = null;
106: 
107:             //statement to create subfeature of type CV_ANNOTATION_INTERPRO
108:             $statement_insert_feature_domain = $db->prepare('INSERT INTO feature (name, uniquename, type_id, organism_id, dbxref_id) VALUES (:feature_domain_name, :feature_domain_unique, :type_id, :organism_id, :dbxref_id)');
109:             $statement_insert_feature_domain->bindValue('type_id', CV_ANNOTATION_INTERPRO, PDO::PARAM_INT);
110:             $statement_insert_feature_domain->bindValue('organism_id', DB_ORGANISM_ID, PDO::PARAM_INT);
111:             $statement_insert_feature_domain->bindValue('dbxref_id', $import_prefix_id, PDO::PARAM_INT);
112:             $statement_insert_feature_domain->bindParam('feature_domain_name', $param_feature_domain_name, PDO::PARAM_STR);
113:             $statement_insert_feature_domain->bindParam('feature_domain_unique', $param_feature_domain_uniq, PDO::PARAM_STR);
114: 
115:             //statement to insert featureloc to link parent feature with newly inserted Domain
116:             $statement_insert_featureloc = $db->prepare(sprintf('INSERT INTO featureloc (fmin, fmax, strand, feature_id, srcfeature_id) VALUES (:fmin, :fmax, :strand, currval(\'feature_feature_id_seq\'), (%s))', 'SELECT feature_id FROM feature WHERE uniquename=:srcfeature_uniquename AND organism_id=:organism  LIMIT 1'));
117:             $statement_insert_featureloc->bindParam('fmin', $param_domain_fmin, PDO::PARAM_INT);
118:             $statement_insert_featureloc->bindParam('fmax', $param_domain_fmax, PDO::PARAM_INT);
119:             $statement_insert_featureloc->bindValue('strand', 1, PDO::PARAM_INT);
120:             $statement_insert_featureloc->bindParam('srcfeature_uniquename', $param_feature_uniq, PDO::PARAM_STR);
121:             $statement_insert_featureloc->bindValue('organism', DB_ORGANISM_ID, PDO::PARAM_INT);
122: 
123:             /**
124:              * link domain feature to analysis with evalue. create analysis if non-existant
125:              */
126:             $statement_insert_analysisfeature = $db->prepare('INSERT INTO analysisfeature (analysis_id, feature_id, significance) VALUES (get_or_insert_analysis(:name, :program, :version, :source) ,currval(\'feature_feature_id_seq\'), :significance)');
127:             $statement_insert_analysisfeature->bindValue('name', 'Interpro Analysis', PDO::PARAM_STR);
128:             $statement_insert_analysisfeature->bindValue('program', 'Interpro', PDO::PARAM_STR);
129:             $statement_insert_analysisfeature->bindValue('version', $interpro_version, PDO::PARAM_STR);
130:             $statement_insert_analysisfeature->bindParam('source', $param_source_name, PDO::PARAM_STR);
131:             $statement_insert_analysisfeature->bindParam('significance', $param_evalue, PDO::PARAM_STR);
132: 
133:             /**
134:              * add textual domain feature annotations
135:              */
136:             $statement_insert_featureprop = $db->prepare('INSERT INTO featureprop (feature_id, type_id, value) VALUES (currval(\'feature_feature_id_seq\'), :type_id, :value)');
137:             $statement_insert_featureprop->bindParam(':type_id', $param_featureprop_type, PDO::PARAM_INT);
138:             $statement_insert_featureprop->bindParam(':value', $param_featureprop_value, PDO::PARAM_STR);
139: 
140:             /**
141:              * link domain to GO
142:              */
143:             $statement_insert_feature_dbxref = $db->prepare('INSERT INTO feature_dbxref (feature_id, dbxref_id) VALUES (currval(\'feature_feature_id_seq\'), get_or_insert_dbxref(:dbname, :accession))');
144:             $statement_insert_feature_dbxref->bindParam('accession', $param_accession, PDO::PARAM_STR);
145:             $statement_insert_feature_dbxref->bindParam('dbname', $param_dbname, PDO::PARAM_STR);
146: 
147:             $file = fopen($filename, 'r');
148:             while (($line = trim(fgets($file))) != false) {
149:                 $match = array();
150:                 // see if line matches RegExp, else skip
151:                 preg_match(self::$regex, $line, $match);
152:                 if (count($match) == 0) {
153:                     self::$log->log(sprintf("line does not match, skipping:\n\t" . $line), PEAR_LOG_NOTICE);
154:                     continue;
155:                 }
156: 
157: 
158:                 // set params for statements
159:                 // available matches, see RegEx
160:                 $param_source_name = $match['analysisMethod'];
161:                 $param_domain_fmin = $match['domStart'];
162:                 $param_domain_fmax = $match['domEnd'];
163:                 $param_evalue = $match['eValue'];
164: 
165:                 //more complex parameters
166:                 $param_feature = Importer_Sequences_FASTA::prepare_predpep_name($match['feature'], $match['pepStart'], $match['pepEnd'], $match['pepStrand']);
167:                 $param_feature_uniq = IMPORT_PREFIX . "_" . $param_feature;
168:                 $param_feature_domain_name = sprintf('%s_%s_%s_%s', $param_feature, $match['analysisMatchID'], $param_domain_fmin, $param_domain_fmax);
169:                 $param_feature_domain_uniq = IMPORT_PREFIX . "_" . $param_feature_domain_name;
170: 
171:                 //insert domain feature
172:                 $statement_insert_feature_domain->execute();
173:                 //link with parent feature
174:                 $statement_insert_featureloc->execute();
175: 
176:                 if ($param_evalue == 'NA')
177:                     $param_evalue = NULL;
178: 
179:                 //link domain to analysis with evalue
180:                 $statement_insert_analysisfeature->execute();
181: 
182:                 //add interpro ID as textual annoation of type CV_INTERPRO_ID
183:                 if ($match['interproID'] != "NULL") {
184:                     $param_featureprop_type = CV_INTERPRO_ID;
185:                     $param_featureprop_value = $match['interproID'];
186: 
187:                     $statement_insert_featureprop->execute();
188:                     $interpro_ids_added++;
189:                 }
190: 
191:                 //add analysis match id as textual annotation of type CV_INTERPRO_ANALYSIS_MATCH_ID
192:                 if ($match['analysisMatchID'] != null) {
193:                     $param_featureprop_type = CV_INTERPRO_ANALYSIS_MATCH_ID;
194:                     $param_featureprop_value = $match['analysisMatchID'];
195:                     $statement_insert_featureprop->execute();
196: 
197:                     //add analysis match description as textual annotation of type CV_INTERPRO_ANALYSIS_MATCH_DESCRIPTION
198:                     if (isset($match['analysisMatchDescription']) && !empty($match['analysisMatchDescription'])) {
199:                         $param_featureprop_type = CV_INTERPRO_ANALYSIS_MATCH_DESCRIPTION;
200:                         $param_featureprop_value = $match['analysisMatchDescription'];
201:                         $statement_insert_featureprop->execute();
202:                     }
203:                 }
204: 
205:                 // if line contains GOs
206:                 if (isset($match['interproGOs']) && $match['interproGOs'] != "NULL") {
207:                     $go_matches = array();
208:                     preg_match_all('/[\s,]*(?<description>.*?)\((?<dbname>\w+):(?<accession>\w+)\)/', $match['interproGOs'], $go_matches);
209:                     //for all GO matches
210:                     for ($i = 0; $i < count($go_matches[0]); $i++) {
211:                         $param_dbname = $go_matches['dbname'][$i];
212:                         $param_accession = $go_matches['accession'][$i];
213:                         // link domain to dbxref
214:                         $statement_insert_feature_dbxref->execute();
215:                         $dbxrefs_added++;
216:                     }
217:                 }
218: 
219:                 self::updateProgress(++$lines_imported);
220:             }
221:             self::preCommitMsg();
222:             if (!$db->commit()) {
223:                 $err = $db->errorInfo();
224:                 throw new ErrorException($err[2], ERRCODE_TRANSACTION_NOT_COMPLETED, 1);
225:             }
226:         } catch (\Exception $error) {
227:             $db->rollback();
228:             throw $error;
229:         }
230:         return array(LINES_IMPORTED => $lines_imported, 'interpro_ids_added' => $interpro_ids_added, 'dbxrefs_added' => $dbxrefs_added);
231:     }
232: 
233:     /**
234:      * @inheritDoc
235:      */
236:     public static function CLI_getCommand(\Console_CommandLine $parser) {
237:         $command = parent::CLI_getCommand($parser);
238:         $command->addOption('interpro_version', array(
239:             'short_name' => '-i',
240:             'long_name' => '--interpro_version',
241:             'description' => 'interpro version'
242:         ));
243:     }
244: 
245:     /**
246:      * @inheritDoc
247:      */
248:     public static function CLI_checkRequiredOpts(\Console_CommandLine_Result $command) {
249:         parent::CLI_checkRequiredOpts($command);
250:         $options = $command->options;
251:         AbstractImporter::dieOnMissingArg($options, 'interpro_version');
252:     }
253: 
254:     /**
255:      * @inheritDoc
256:      */
257:     public static function CLI_commandDescription() {
258:         return "Interpro Output Importer";
259:     }
260: 
261:     /**
262:      * @inheritDoc
263:      */
264:     public static function CLI_commandName() {
265:         return 'annotation_interpro';
266:     }
267: 
268:     /**
269:      * @inheritDoc
270:      */
271:     public static function CLI_longHelp() {
272:         return <<<EOF
273:    
274: \033[0;31mThis import requires a successful Sequence ID Import!\033[0m
275: \033[0;31mThis import requires a successful Sequence FASTA Import!\033[0m
276: EOF;
277:     }
278: 
279: }
280: 
281: ?>
282: 
tbro API documentation generated by ApiGen 2.8.0