1: <?php
2:
3: namespace cli_import;
4:
5: require_once ROOT . 'classes/AbstractImporter.php';
6: require_once ROOT . 'commands/Importer_Sequence_Ids.php';
7:
8: 9: 10:
11: class Importer_Sequences_FASTA extends AbstractImporter {
12:
13: 14: 15: 16: 17: 18:
19: static function read_fasta($fasta_handle) {
20: $description = '';
21: while (empty($description) && !feof($fasta_handle))
22: $description = trim(fgets($fasta_handle));
23: if (strpos($description, '>') !== 0)
24: throw new ErrorException(ERR_ILLEGAL_FILE_FORMAT);
25:
26:
27: $sequence = '';
28: while (!feof($fasta_handle)) {
29: $pos = ftell($fasta_handle);
30: $line = fgets($fasta_handle);
31: if (strpos($line, '>') === 0) {
32: fseek($fasta_handle, $pos, SEEK_SET);
33: break;
34: }
35: $sequence .= trim($line);
36: }
37: return array($description, $sequence);
38: }
39:
40: 41: 42: 43: 44: 45: 46: 47:
48: static function prepare_predpep_name($isoform_name, $left, $right, $direction) {
49: return $isoform_name . ':' . ($direction == '+' ? "$left-$right" : "$right-$left");
50: }
51:
52: 53: 54:
55: static function import($options) {
56: $filename = $options['file'];
57: $lines_total = trim(`grep -c '>' $filename`);
58: self::setLineCount($lines_total);
59:
60: global $db;
61: $lines_imported = 0;
62: $isoforms_updated = 0;
63: $predpeps_added = 0;
64:
65:
66: $param_isoform_uniq = null;
67: $param_isoform_seqlen = null;
68: $param_isoform_residues = null;
69:
70: $param_predpep_name = null;
71: $param_predpep_uniq = null;
72: $param_predpep_seqlen = null;
73: $param_predpep_residues = null;
74: $param_predpep_feature_id = null;
75: $param_predpep_fmin = null;
76: $param_predpep_fmax = null;
77: $param_predpep_strand = null;
78: $param_predpep_srcfeature_uniq = null;
79:
80: try {
81: $db->beginTransaction();
82: $import_prefix_id = Importer_Sequence_Ids::get_import_dbxref();
83:
84:
85:
86: $statement_update_isoform = $db->prepare('UPDATE feature SET (seqlen, residues) = (:seqlen, :residues) WHERE uniquename=:uniquename AND organism_id=:organism RETURNING feature_id');
87: $statement_update_isoform->bindParam('uniquename', $param_isoform_uniq, PDO::PARAM_STR);
88: $statement_update_isoform->bindParam('seqlen', $param_isoform_seqlen, PDO::PARAM_INT);
89: $statement_update_isoform->bindParam('residues', $param_isoform_residues, PDO::PARAM_STR);
90: $statement_update_isoform->bindValue('organism', DB_ORGANISM_ID, PDO::PARAM_INT);
91:
92:
93:
94: $statement_insert_predpep = $db->prepare('INSERT INTO feature (type_id, organism_id, name, uniquename, seqlen, residues, dbxref_id) '
95: . 'VALUES (:type_id, :organism_id, :name, :uniquename, :seqlen, :residues, :dbxref_id) RETURNING feature_id');
96: $statement_insert_predpep->bindValue('type_id', CV_PREDPEP, PDO::PARAM_INT);
97: $statement_insert_predpep->bindValue('organism_id', DB_ORGANISM_ID, PDO::PARAM_INT);
98: $statement_insert_predpep->bindParam('name', $param_predpep_name, PDO::PARAM_STR);
99: $statement_insert_predpep->bindParam('uniquename', $param_predpep_uniq, PDO::PARAM_STR);
100: $statement_insert_predpep->bindParam('seqlen', $param_predpep_seqlen, PDO::PARAM_INT);
101: $statement_insert_predpep->bindParam('residues', $param_predpep_residues, PDO::PARAM_STR);
102: $statement_insert_predpep->bindValue('dbxref_id', $import_prefix_id, PDO::PARAM_INT);
103:
104:
105: $statement_insert_predpep_location = $db->prepare(sprintf('INSERT INTO featureloc (fmin, fmax, strand, feature_id, srcfeature_id) VALUES (:fmin, :fmax, :strand, :feature_id, (%s))', 'SELECT feature_id FROM feature WHERE uniquename=:srcfeature_uniquename LIMIT 1'));
106: $statement_insert_predpep_location->bindParam('fmin', $param_predpep_fmin, PDO::PARAM_INT);
107: $statement_insert_predpep_location->bindParam('fmax', $param_predpep_fmax, PDO::PARAM_INT);
108: $statement_insert_predpep_location->bindParam('strand', $param_predpep_strand, PDO::PARAM_INT);
109: $statement_insert_predpep_location->bindParam('feature_id', $param_predpep_feature_id, PDO::PARAM_INT);
110: $statement_insert_predpep_location->bindParam('srcfeature_uniquename', $param_predpep_srcfeature_uniq, PDO::PARAM_STR);
111:
112:
113:
114: $file = fopen($filename, 'r');
115: while (!feof($file)) {
116:
117: list($description, $sequence) = self::read_fasta($file);
118:
119: $matches = array();
120:
121:
122: if (preg_match('/^>(?<id>[^\s]+) .* (?<name>\w+):(?<from>\d+)-(?<to>\d+)\((?<dir>[+-])\)$/', $description, $matches)) {
123: $param_predpep_name = $matches['id'];
124: $param_predpep_uniq = IMPORT_PREFIX . "_" . self::prepare_predpep_name($matches['name'], $matches['from'], $matches['to'], $matches['dir']);
125: $param_predpep_seqlen = strlen($sequence);
126: $param_predpep_residues = $sequence;
127:
128: $statement_insert_predpep->execute();
129:
130: $param_predpep_feature_id = $statement_insert_predpep->fetchColumn();
131: $param_predpep_srcfeature_uniq = IMPORT_PREFIX . "_" . $matches['name'];
132: $param_predpep_fmin = min($matches['from'], $matches['to']);
133: $param_predpep_fmax = max($matches['from'], $matches['to']);
134: $param_predpep_strand = $matches['dir'] == '+' ? 1 : -1;
135: $statement_insert_predpep_location->execute();
136: $predpeps_added+=$statement_insert_predpep->rowCount();
137: }
138:
139:
140:
141: else if (preg_match('/^>(?<name>[^\s]+) .*$/', $description, $matches)) {
142: $param_isoform_uniq = IMPORT_PREFIX . "_" . $matches['name'];
143: $param_isoform_seqlen = strlen($sequence);
144: $param_isoform_residues = $sequence;
145:
146: $statement_update_isoform->execute();
147:
148: $isoforms_updated+=$statement_update_isoform->rowCount();
149: }
150:
151:
152: self::updateProgress(++$lines_imported);
153: }
154: self::preCommitMsg();
155: if (!$db->commit()) {
156: $err = $db->errorInfo();
157: throw new ErrorException($err[2], ERRCODE_TRANSACTION_NOT_COMPLETED, 1);
158: }
159: } catch (\Exception $error) {
160: $db->rollback();
161: throw $error;
162: }
163: return array(LINES_IMPORTED => $lines_imported, 'isoforms_updated' => $isoforms_updated, 'predpeps_added' => $predpeps_added);
164: }
165:
166: 167: 168:
169: public static function CLI_commandDescription() {
170: return "Sequence File Importer";
171: }
172:
173: 174: 175:
176: public static function CLI_commandName() {
177: return 'sequences_fasta';
178: }
179:
180: 181: 182:
183: public static function CLI_longHelp() {
184: return <<<EOF
185:
186: File Format has to be a typical fasta file.
187: isoform headers have to look like
188: >comp173079_c0_seq1 <comment>
189:
190: predpep headers have to look like
191: >m.1812924 <comments> comp173079_c0_seq1:3-1130(+)
192:
193: \033[0;31mThis import requires a successful Sequence ID Import for the isoforms that should be imported!\033[0m
194: EOF;
195: }
196:
197: }
198:
199: ?>
200: