Overview

Namespaces

  • cli_db
    • propel
      • map
      • om
  • cli_import
  • LoggedPDO
  • None
  • PHP
  • webservices
    • cart
    • combisearch
    • details
      • annotations
        • feature
    • graphs
      • barplot
      • genome
    • listing
    • queue

Classes

  • AbstractImporter
  • Importer_Annotations_Dbxref
  • Importer_Annotations_Description
  • Importer_Annotations_EC
  • Importer_Annotations_GO
  • Importer_Annotations_Interpro
  • Importer_Annotations_MapMan
  • Importer_Annotations_Repeatmasker
  • Importer_Differential_Expressions
  • Importer_Expressions
  • Importer_Sequence_Ids
  • Importer_Sequences_FASTA

Interfaces

  • Importer
  • Overview
  • Namespace
  • Class
  • Tree
  1: <?php
  2: 
  3: namespace cli_import;
  4: 
  5: require_once ROOT . 'classes/AbstractImporter.php';
  6: require_once ROOT . 'commands/Importer_Sequence_Ids.php';
  7: 
  8: /**
  9:  * importer for fasta files. created predicted peptides.
 10:  */
 11: class Importer_Sequences_FASTA extends AbstractImporter {
 12: 
 13:     /**
 14:      * reads the next fasta sequence from file handle $fasta_handle and returns a list of description and sequence (without whitespace and newlines)
 15:      * @param resource $fasta_handle
 16:      * @return list($description,$sequence)
 17:      * @throws ErrorException with ErrorMsg ERRCODE_ILLEGAL_FILE_FORMAT: next non-empty line has to start with '>'
 18:      */
 19:     static function read_fasta($fasta_handle) {
 20:         $description = '';
 21:         while (empty($description) && !feof($fasta_handle))
 22:             $description = trim(fgets($fasta_handle));
 23:         if (strpos($description, '>') !== 0)
 24:             throw new ErrorException(ERR_ILLEGAL_FILE_FORMAT);
 25: 
 26: 
 27:         $sequence = '';
 28:         while (!feof($fasta_handle)) {
 29:             $pos = ftell($fasta_handle);
 30:             $line = fgets($fasta_handle);
 31:             if (strpos($line, '>') === 0) {
 32:                 fseek($fasta_handle, $pos, SEEK_SET);
 33:                 break;
 34:             }
 35:             $sequence .= trim($line);
 36:         }
 37:         return array($description, $sequence);
 38:     }
 39: 
 40:     /**
 41:      * Converts values to String that would be stored as Name (Suffix of UniqueName) in DB
 42:      * @param string $isoform_name
 43:      * @param int $left
 44:      * @param int $right
 45:      * @param char $direction [+-]
 46:      * @return string
 47:      */
 48:     static function prepare_predpep_name($isoform_name, $left, $right, $direction) {
 49:         return $isoform_name . ':' . ($direction == '+' ? "$left-$right" : "$right-$left");
 50:     }
 51: 
 52:     /**
 53:      * @inheritDoc
 54:      */
 55:     static function import($options) {
 56:         $filename = $options['file'];
 57:         $lines_total = trim(`grep -c '>' $filename`);
 58:         self::setLineCount($lines_total);
 59: 
 60:         global $db;
 61:         $lines_imported = 0;
 62:         $isoforms_updated = 0;
 63:         $predpeps_added = 0;
 64: 
 65:         #pre-initialize variables to bind statement parameters
 66:         $param_isoform_uniq = null;
 67:         $param_isoform_seqlen = null;
 68:         $param_isoform_residues = null;
 69: 
 70:         $param_predpep_name = null;
 71:         $param_predpep_uniq = null;
 72:         $param_predpep_seqlen = null;
 73:         $param_predpep_residues = null;
 74:         $param_predpep_feature_id = null;
 75:         $param_predpep_fmin = null;
 76:         $param_predpep_fmax = null;
 77:         $param_predpep_strand = null;
 78:         $param_predpep_srcfeature_uniq = null;
 79: 
 80:         try {
 81:             $db->beginTransaction();
 82:             $import_prefix_id = Importer_Sequence_Ids::get_import_dbxref();
 83:             # prepare statements
 84:             #
 85:             #insert sequence into existing isoform
 86:             $statement_update_isoform = $db->prepare('UPDATE feature SET (seqlen, residues) = (:seqlen, :residues) WHERE uniquename=:uniquename AND organism_id=:organism RETURNING feature_id');
 87:             $statement_update_isoform->bindParam('uniquename', $param_isoform_uniq, PDO::PARAM_STR);
 88:             $statement_update_isoform->bindParam('seqlen', $param_isoform_seqlen, PDO::PARAM_INT);
 89:             $statement_update_isoform->bindParam('residues', $param_isoform_residues, PDO::PARAM_STR);
 90:             $statement_update_isoform->bindValue('organism', DB_ORGANISM_ID, PDO::PARAM_INT);
 91: 
 92: 
 93:             #create predicted peptide
 94:             $statement_insert_predpep = $db->prepare('INSERT INTO feature  (type_id, organism_id, name, uniquename, seqlen, residues, dbxref_id) '
 95:                     . 'VALUES (:type_id, :organism_id, :name, :uniquename, :seqlen, :residues, :dbxref_id) RETURNING feature_id');
 96:             $statement_insert_predpep->bindValue('type_id', CV_PREDPEP, PDO::PARAM_INT);
 97:             $statement_insert_predpep->bindValue('organism_id', DB_ORGANISM_ID, PDO::PARAM_INT);
 98:             $statement_insert_predpep->bindParam('name', $param_predpep_name, PDO::PARAM_STR);
 99:             $statement_insert_predpep->bindParam('uniquename', $param_predpep_uniq, PDO::PARAM_STR);
100:             $statement_insert_predpep->bindParam('seqlen', $param_predpep_seqlen, PDO::PARAM_INT);
101:             $statement_insert_predpep->bindParam('residues', $param_predpep_residues, PDO::PARAM_STR);
102:             $statement_insert_predpep->bindValue('dbxref_id', $import_prefix_id, PDO::PARAM_INT);
103: 
104:             #link predpep to parent isoform
105:             $statement_insert_predpep_location = $db->prepare(sprintf('INSERT INTO featureloc (fmin, fmax, strand, feature_id, srcfeature_id) VALUES (:fmin, :fmax, :strand, :feature_id, (%s))', 'SELECT feature_id FROM feature WHERE uniquename=:srcfeature_uniquename LIMIT 1'));
106:             $statement_insert_predpep_location->bindParam('fmin', $param_predpep_fmin, PDO::PARAM_INT);
107:             $statement_insert_predpep_location->bindParam('fmax', $param_predpep_fmax, PDO::PARAM_INT);
108:             $statement_insert_predpep_location->bindParam('strand', $param_predpep_strand, PDO::PARAM_INT);
109:             $statement_insert_predpep_location->bindParam('feature_id', $param_predpep_feature_id, PDO::PARAM_INT);
110:             $statement_insert_predpep_location->bindParam('srcfeature_uniquename', $param_predpep_srcfeature_uniq, PDO::PARAM_STR);
111: 
112:             #read file and execute statements
113: 
114:             $file = fopen($filename, 'r');
115:             while (!feof($file)) {
116:                 #read next fasta entry
117:                 list($description, $sequence) = self::read_fasta($file);
118: 
119:                 $matches = array();
120:                 #predicted peptide header like this:
121:                 #>m.1812924 g.1812924  ORF g.1812924 m.1812924 type:5prime_partial len:376 (+) comp224705_c0_seq18:3-1130(+)
122:                 if (preg_match('/^>(?<id>[^\s]+) .* (?<name>\w+):(?<from>\d+)-(?<to>\d+)\((?<dir>[+-])\)$/', $description, $matches)) {
123:                     $param_predpep_name = $matches['id'];
124:                     $param_predpep_uniq = IMPORT_PREFIX . "_" . self::prepare_predpep_name($matches['name'], $matches['from'], $matches['to'], $matches['dir']);
125:                     $param_predpep_seqlen = strlen($sequence);
126:                     $param_predpep_residues = $sequence;
127:                     //create predpep
128:                     $statement_insert_predpep->execute();
129:                     //link to parent feature
130:                     $param_predpep_feature_id = $statement_insert_predpep->fetchColumn();
131:                     $param_predpep_srcfeature_uniq = IMPORT_PREFIX . "_" . $matches['name'];
132:                     $param_predpep_fmin = min($matches['from'], $matches['to']);
133:                     $param_predpep_fmax = max($matches['from'], $matches['to']);
134:                     $param_predpep_strand = $matches['dir'] == '+' ? 1 : -1;
135:                     $statement_insert_predpep_location->execute();
136:                     $predpeps_added+=$statement_insert_predpep->rowCount();
137:                 }
138: 
139:                 #isoform header like this:
140:                 #>comp173079_c0_seq1 len=2161 path=[2139:0-732 2872:733-733 2873:734-1159 3299:1160-1160 3300:1161-1513 3653:1514-1517 3657:1518-2160]
141:                 else if (preg_match('/^>(?<name>[^\s]+) .*$/', $description, $matches)) {
142:                     $param_isoform_uniq = IMPORT_PREFIX . "_" . $matches['name'];
143:                     $param_isoform_seqlen = strlen($sequence);
144:                     $param_isoform_residues = $sequence;
145:                     //update isoform with values
146:                     $statement_update_isoform->execute();
147: 
148:                     $isoforms_updated+=$statement_update_isoform->rowCount();
149:                 }
150: 
151: 
152:                 self::updateProgress(++$lines_imported);
153:             }
154:             self::preCommitMsg();
155:             if (!$db->commit()) {
156:                 $err = $db->errorInfo();
157:                 throw new ErrorException($err[2], ERRCODE_TRANSACTION_NOT_COMPLETED, 1);
158:             }
159:         } catch (\Exception $error) {
160:             $db->rollback();
161:             throw $error;
162:         }
163:         return array(LINES_IMPORTED => $lines_imported, 'isoforms_updated' => $isoforms_updated, 'predpeps_added' => $predpeps_added);
164:     }
165: 
166:     /**
167:      * @inheritDoc
168:      */
169:     public static function CLI_commandDescription() {
170:         return "Sequence File Importer";
171:     }
172: 
173:     /**
174:      * @inheritDoc
175:      */
176:     public static function CLI_commandName() {
177:         return 'sequences_fasta';
178:     }
179: 
180:     /**
181:      * @inheritDoc
182:      */
183:     public static function CLI_longHelp() {
184:         return <<<EOF
185:    
186: File Format has to be a typical fasta file.
187: isoform headers have to look like
188: >comp173079_c0_seq1 <comment>
189: 
190: predpep headers have to look like
191: >m.1812924 <comments> comp173079_c0_seq1:3-1130(+)
192: 
193: \033[0;31mThis import requires a successful Sequence ID Import for the isoforms that should be imported!\033[0m
194: EOF;
195:     }
196: 
197: }
198: 
199: ?>
200: 
tbro API documentation generated by ApiGen 2.8.0