1: <?php
2:
3: namespace cli_import;
4:
5: require_once ROOT . 'classes/AbstractImporter.php';
6: require_once ROOT . 'commands/Importer_Sequence_Ids.php';
7: require_once ROOT . 'commands/Importer_Sequences_FASTA.php';
8:
9: 10: 11:
12: class Importer_Annotations_Interpro extends AbstractImporter {
13:
14: 15: 16: 17:
18: private static $regex = <<<EOF
19: {^
20: (?<feature>\w+)
21: [\t] (?<pepStart>\d+)
22: [\t] (?<pepEnd>\d+)
23: [\t] (?<pepStrand>[+-])
24: [\t] (?<checksum>\w+)
25: [\t] (?<length>\d+)
26: [\t] (?<analysisMethod>\w+)
27: [\t] (?<analysisMatchID>.*?)
28: (?:[\t] (?<analysisMatchDescription>.*))?
29: [\t] (?<domStart>\d+)
30: [\t] (?<domEnd>\d+)
31: [\t] (?<eValue>(?:NA|\d+(?:\.\d+)?(?:[Ee][+-]\d+)?))
32: [\t] (?<status>[T?])
33: [\t] (?<timeexecuted>[\w-]*)
34: [\t] (?<interproID>\w*)
35: [\t] (?<interproDesc>.*?)
36: (?:[\t] (?<interproGOs>.*))?
37: $}x
38: EOF;
39:
40: 41: 42: 43:
44: static function import($options) {
45:
46:
47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75:
76:
77:
78: $filename = $options['file'];
79:
80: $interpro_version = $options['interpro_version'];
81:
82: $lines_total = trim(`wc -l $filename | cut -d' ' -f1`);
83: self::setLineCount($lines_total);
84:
85: global $db;
86: $lines_imported = 0;
87: $interpro_ids_added = 0;
88: $dbxrefs_added = 0;
89:
90: try {
91: $db->beginTransaction();
92: $import_prefix_id = Importer_Sequence_Ids::get_import_dbxref();
93:
94:
95: $param_feature_uniq = null;
96: $param_feature_domain_name = null;
97: $param_feature_domain_uniq = null;
98: $param_domain_fmin = null;
99: $param_domain_fmax = null;
100: $param_source_name = null;
101: $param_evalue = null;
102: $param_featureprop_type = null;
103: $param_featureprop_value = null;
104: $param_accession = null;
105: $param_dbname = null;
106:
107:
108: $statement_insert_feature_domain = $db->prepare('INSERT INTO feature (name, uniquename, type_id, organism_id, dbxref_id) VALUES (:feature_domain_name, :feature_domain_unique, :type_id, :organism_id, :dbxref_id)');
109: $statement_insert_feature_domain->bindValue('type_id', CV_ANNOTATION_INTERPRO, PDO::PARAM_INT);
110: $statement_insert_feature_domain->bindValue('organism_id', DB_ORGANISM_ID, PDO::PARAM_INT);
111: $statement_insert_feature_domain->bindValue('dbxref_id', $import_prefix_id, PDO::PARAM_INT);
112: $statement_insert_feature_domain->bindParam('feature_domain_name', $param_feature_domain_name, PDO::PARAM_STR);
113: $statement_insert_feature_domain->bindParam('feature_domain_unique', $param_feature_domain_uniq, PDO::PARAM_STR);
114:
115:
116: $statement_insert_featureloc = $db->prepare(sprintf('INSERT INTO featureloc (fmin, fmax, strand, feature_id, srcfeature_id) VALUES (:fmin, :fmax, :strand, currval(\'feature_feature_id_seq\'), (%s))', 'SELECT feature_id FROM feature WHERE uniquename=:srcfeature_uniquename AND organism_id=:organism LIMIT 1'));
117: $statement_insert_featureloc->bindParam('fmin', $param_domain_fmin, PDO::PARAM_INT);
118: $statement_insert_featureloc->bindParam('fmax', $param_domain_fmax, PDO::PARAM_INT);
119: $statement_insert_featureloc->bindValue('strand', 1, PDO::PARAM_INT);
120: $statement_insert_featureloc->bindParam('srcfeature_uniquename', $param_feature_uniq, PDO::PARAM_STR);
121: $statement_insert_featureloc->bindValue('organism', DB_ORGANISM_ID, PDO::PARAM_INT);
122:
123: 124: 125:
126: $statement_insert_analysisfeature = $db->prepare('INSERT INTO analysisfeature (analysis_id, feature_id, significance) VALUES (get_or_insert_analysis(:name, :program, :version, :source) ,currval(\'feature_feature_id_seq\'), :significance)');
127: $statement_insert_analysisfeature->bindValue('name', 'Interpro Analysis', PDO::PARAM_STR);
128: $statement_insert_analysisfeature->bindValue('program', 'Interpro', PDO::PARAM_STR);
129: $statement_insert_analysisfeature->bindValue('version', $interpro_version, PDO::PARAM_STR);
130: $statement_insert_analysisfeature->bindParam('source', $param_source_name, PDO::PARAM_STR);
131: $statement_insert_analysisfeature->bindParam('significance', $param_evalue, PDO::PARAM_STR);
132:
133: 134: 135:
136: $statement_insert_featureprop = $db->prepare('INSERT INTO featureprop (feature_id, type_id, value) VALUES (currval(\'feature_feature_id_seq\'), :type_id, :value)');
137: $statement_insert_featureprop->bindParam(':type_id', $param_featureprop_type, PDO::PARAM_INT);
138: $statement_insert_featureprop->bindParam(':value', $param_featureprop_value, PDO::PARAM_STR);
139:
140: 141: 142:
143: $statement_insert_feature_dbxref = $db->prepare('INSERT INTO feature_dbxref (feature_id, dbxref_id) VALUES (currval(\'feature_feature_id_seq\'), get_or_insert_dbxref(:dbname, :accession))');
144: $statement_insert_feature_dbxref->bindParam('accession', $param_accession, PDO::PARAM_STR);
145: $statement_insert_feature_dbxref->bindParam('dbname', $param_dbname, PDO::PARAM_STR);
146:
147: $file = fopen($filename, 'r');
148: while (($line = trim(fgets($file))) != false) {
149: $match = array();
150:
151: preg_match(self::$regex, $line, $match);
152: if (count($match) == 0) {
153: self::$log->log(sprintf("line does not match, skipping:\n\t" . $line), PEAR_LOG_NOTICE);
154: continue;
155: }
156:
157:
158:
159:
160: $param_source_name = $match['analysisMethod'];
161: $param_domain_fmin = $match['domStart'];
162: $param_domain_fmax = $match['domEnd'];
163: $param_evalue = $match['eValue'];
164:
165:
166: $param_feature = Importer_Sequences_FASTA::prepare_predpep_name($match['feature'], $match['pepStart'], $match['pepEnd'], $match['pepStrand']);
167: $param_feature_uniq = IMPORT_PREFIX . "_" . $param_feature;
168: $param_feature_domain_name = sprintf('%s_%s_%s_%s', $param_feature, $match['analysisMatchID'], $param_domain_fmin, $param_domain_fmax);
169: $param_feature_domain_uniq = IMPORT_PREFIX . "_" . $param_feature_domain_name;
170:
171:
172: $statement_insert_feature_domain->execute();
173:
174: $statement_insert_featureloc->execute();
175:
176: if ($param_evalue == 'NA')
177: $param_evalue = NULL;
178:
179:
180: $statement_insert_analysisfeature->execute();
181:
182:
183: if ($match['interproID'] != "NULL") {
184: $param_featureprop_type = CV_INTERPRO_ID;
185: $param_featureprop_value = $match['interproID'];
186:
187: $statement_insert_featureprop->execute();
188: $interpro_ids_added++;
189: }
190:
191:
192: if ($match['analysisMatchID'] != null) {
193: $param_featureprop_type = CV_INTERPRO_ANALYSIS_MATCH_ID;
194: $param_featureprop_value = $match['analysisMatchID'];
195: $statement_insert_featureprop->execute();
196:
197:
198: if (isset($match['analysisMatchDescription']) && !empty($match['analysisMatchDescription'])) {
199: $param_featureprop_type = CV_INTERPRO_ANALYSIS_MATCH_DESCRIPTION;
200: $param_featureprop_value = $match['analysisMatchDescription'];
201: $statement_insert_featureprop->execute();
202: }
203: }
204:
205:
206: if (isset($match['interproGOs']) && $match['interproGOs'] != "NULL") {
207: $go_matches = array();
208: preg_match_all('/[\s,]*(?<description>.*?)\((?<dbname>\w+):(?<accession>\w+)\)/', $match['interproGOs'], $go_matches);
209:
210: for ($i = 0; $i < count($go_matches[0]); $i++) {
211: $param_dbname = $go_matches['dbname'][$i];
212: $param_accession = $go_matches['accession'][$i];
213:
214: $statement_insert_feature_dbxref->execute();
215: $dbxrefs_added++;
216: }
217: }
218:
219: self::updateProgress(++$lines_imported);
220: }
221: self::preCommitMsg();
222: if (!$db->commit()) {
223: $err = $db->errorInfo();
224: throw new ErrorException($err[2], ERRCODE_TRANSACTION_NOT_COMPLETED, 1);
225: }
226: } catch (\Exception $error) {
227: $db->rollback();
228: throw $error;
229: }
230: return array(LINES_IMPORTED => $lines_imported, 'interpro_ids_added' => $interpro_ids_added, 'dbxrefs_added' => $dbxrefs_added);
231: }
232:
233: 234: 235:
236: public static function CLI_getCommand(\Console_CommandLine $parser) {
237: $command = parent::CLI_getCommand($parser);
238: $command->addOption('interpro_version', array(
239: 'short_name' => '-i',
240: 'long_name' => '--interpro_version',
241: 'description' => 'interpro version'
242: ));
243: }
244:
245: 246: 247:
248: public static function CLI_checkRequiredOpts(\Console_CommandLine_Result $command) {
249: parent::CLI_checkRequiredOpts($command);
250: $options = $command->options;
251: AbstractImporter::dieOnMissingArg($options, 'interpro_version');
252: }
253:
254: 255: 256:
257: public static function CLI_commandDescription() {
258: return "Interpro Output Importer";
259: }
260:
261: 262: 263:
264: public static function CLI_commandName() {
265: return 'annotation_interpro';
266: }
267:
268: 269: 270:
271: public static function CLI_longHelp() {
272: return <<<EOF
273:
274: \033[0;31mThis import requires a successful Sequence ID Import!\033[0m
275: \033[0;31mThis import requires a successful Sequence FASTA Import!\033[0m
276: EOF;
277: }
278:
279: }
280:
281: ?>
282: