Overview

Namespaces

  • cli_db
    • propel
      • map
      • om
  • cli_import
  • LoggedPDO
  • None
  • PHP
  • webservices
    • cart
    • combisearch
    • details
      • annotations
        • feature
    • graphs
      • barplot
      • genome
    • listing
    • queue

Classes

  • AbstractImporter
  • Importer_Annotations_Dbxref
  • Importer_Annotations_Description
  • Importer_Annotations_EC
  • Importer_Annotations_GO
  • Importer_Annotations_Interpro
  • Importer_Annotations_MapMan
  • Importer_Annotations_Repeatmasker
  • Importer_Differential_Expressions
  • Importer_Expressions
  • Importer_Sequence_Ids
  • Importer_Sequences_FASTA

Interfaces

  • Importer
  • Overview
  • Namespace
  • Class
  • Tree
  1: <?php
  2: 
  3: namespace cli_import;
  4: 
  5: require_once ROOT . 'classes/AbstractImporter.php';
  6: require_once ROOT . 'commands/Importer_Sequence_Ids.php';
  7: 
  8: /**
  9:  * importer for repeatmasker results
 10:  */
 11: class Importer_Annotations_Repeatmasker extends AbstractImporter {
 12: 
 13:     /**
 14:      * @inheritDoc
 15:      */
 16:     static function import($options) {
 17: 
 18:         $filename = $options['file'];
 19:         $lines_total = trim(`wc -l $filename | cut -d' ' -f1`);
 20:         self::setLineCount($lines_total);
 21: 
 22:         global $db;
 23: 
 24:         $regex = <<<EOF
 25: {^ 
 26: \d+[ ]
 27: # 1320     = Smith-Waterman score of the match, usually complexity adjusted
 28: \d+\.\d+[ ]
 29: # 15.6     = % divergence = mismatches/(matches+mismatches) **
 30: \d+\.\d+[ ]
 31: # 6.2      = % of bases opposite a gap in the query sequence (deleted bp)
 32: \d+\.\d+[ ]
 33: # 0.0      = % of bases opposite a gap in the repeat consensus (inserted bp)
 34: (?<name>\w+)[ ]
 35: # HSU08988 = name of query sequence
 36: (?<start>\d+)[ ]
 37: # 6563     = starting position of match in query sequence
 38: (?<end>\d+)[ ]
 39: # 6781     = ending position of match in query sequence
 40: \(\d+\)[ ]
 41: # (22462)  = no. of bases in query sequence past the ending position of match
 42: (?:[C+][ ])?
 43: # C        = match is with the Complement of the repeat consensus sequence
 44: (?<repeat_name>[\w()-?]+)\#
 45: # MER7A    = name of the matching interspersed repeat
 46: (?<repeat_class>[\w()-?]+)
 47: (?:/(?<repeat_family>[\w()-?]+))?[ ]
 48: # DNA/MER2_type = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)
 49: \(?\d+\)?[ ]
 50: # (0)      = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (0 means that the match extended all the way to the end of the repeat consensus sequence)
 51: \(?\d+\)?[ ]
 52: # 337      = starting position of match in repeat consensus sequence
 53: \(?\d+\)?[ ]
 54: # 104      = ending position of match in repeat consensus sequence
 55: \d+
 56: # 20       = unique identifier for individual insertions    
 57: $}x
 58: EOF;
 59: 
 60:         $lines_imported = 0;
 61:         $families_added = 0;
 62: 
 63:         try {
 64:             $db->beginTransaction();
 65:             $import_prefix_id = Importer_Sequence_Ids::get_import_dbxref();
 66: 
 67:             #shared parameters
 68:             $param_name = null;
 69:             $param_uniquename = null;
 70:             $param_cvterm = null;
 71:             $param_value = null;
 72:             $param_fmin = null;
 73:             $param_fmax = null;
 74:             $param_srcfeature_uniq = null;
 75: 
 76:             //adds new feature
 77:             $statement_insert_repeat = $db->prepare('INSERT INTO feature (name, uniquename, type_id, organism_id, dbxref_id) VALUES (:name, :uniquename, :type_id, :organism_id, :dbxref_id)');
 78:             $statement_insert_repeat->bindValue('type_id', CV_ANNOTATION_REPEATMASKER, PDO::PARAM_INT);
 79:             $statement_insert_repeat->bindValue('organism_id', DB_ORGANISM_ID, PDO::PARAM_INT);
 80:             $statement_insert_repeat->bindParam('name', $param_name, PDO::PARAM_STR);
 81:             $statement_insert_repeat->bindParam('uniquename', $param_uniquename, PDO::PARAM_STR);
 82:             $statement_insert_repeat->bindValue('dbxref_id', $import_prefix_id, PDO::PARAM_INT);
 83: 
 84:             //binds repeat feature to parent feature
 85:             $statement_insert_featureloc = $db->prepare(sprintf('INSERT INTO featureloc (fmin, fmax, strand, feature_id, srcfeature_id) VALUES (:fmin, :fmax, :strand, currval(\'feature_feature_id_seq\'), (%s))', 'SELECT feature_id FROM feature WHERE uniquename=:srcfeature_uniquename AND organism_id=:organism LIMIT 1'));
 86:             $statement_insert_featureloc->bindParam('fmin', $param_fmin, PDO::PARAM_INT);
 87:             $statement_insert_featureloc->bindParam('fmax', $param_fmax, PDO::PARAM_INT);
 88:             $statement_insert_featureloc->bindValue('strand', 1, PDO::PARAM_INT);
 89:             $statement_insert_featureloc->bindParam('srcfeature_uniquename', $param_srcfeature_uniq, PDO::PARAM_STR);
 90:             $statement_insert_featureloc->bindValue('organism', DB_ORGANISM_ID, PDO::PARAM_INT);
 91: 
 92:             //adds textual annotation
 93:             $statement_annotate_domain = $db->prepare('INSERT INTO featureprop (feature_id, type_id, value) VALUES (currval(\'feature_feature_id_seq\'), :cvterm, :value)');
 94:             $statement_annotate_domain->bindParam('cvterm', $param_cvterm, PDO::PARAM_INT);
 95:             $statement_annotate_domain->bindParam('value', $param_value, PDO::PARAM_STR);
 96: 
 97:             $file = fopen($filename, 'r');
 98:             while (($line = trim(fgets($file))) != false) {
 99:                 $matches = null;
100: 
101:                 // see if line matches RegExp, else skip
102:                 if (preg_match($regex, $line, $matches) !== 1) {
103:                     self::$log->log(sprintf("line does not match, skipping:\n\t" . $line), PEAR_LOG_NOTICE);
104:                     continue;
105:                 } else {
106:                     $param_name = sprintf("%s(%d-%d):%s#%s(%s)"
107:                             , $matches['name']
108:                             , $matches['start']
109:                             , $matches['end']
110:                             , $matches['repeat_name']
111:                             , $matches['repeat_class']
112:                             , (isset($matches['repeat_family']) ? $matches['repeat_family'] : '')
113:                     );
114:                     $param_uniquename = IMPORT_PREFIX . "_" . $param_name;
115:                     //insert feature
116:                     $statement_insert_repeat->execute();
117: 
118: 
119:                     $param_srcfeature_uniq = IMPORT_PREFIX . "_" . $matches['name'];
120:                     $param_fmin = $matches['start'];
121:                     $param_fmax = $matches['end'];
122:                     //link to parent feature
123:                     $statement_insert_featureloc->execute();
124: 
125:                     //add repeat name annotation
126:                     $param_cvterm = CV_REPEAT_NAME;
127:                     $param_value = $matches['repeat_name'];
128:                     $statement_annotate_domain->execute();
129: 
130:                     //add repeat class annotation
131:                     $param_cvterm = CV_REPEAT_CLASS;
132:                     $param_value = $matches['repeat_class'];
133:                     $statement_annotate_domain->execute();
134: 
135:                     //if existant, add repeat family annotation
136:                     if (!empty($matches['repeat_family'])) {
137:                         $param_cvterm = CV_REPEAT_FAMILY;
138:                         $param_value = $matches['repeat_family'];
139:                         $statement_annotate_domain->execute();
140:                         $families_added++;
141:                     }
142: 
143: 
144:                     self::updateProgress(++$lines_imported);
145:                 }
146:             }
147:             self::preCommitMsg();
148:             if (!$db->commit()) {
149:                 $err = $db->errorInfo();
150:                 throw new ErrorException($err[2], ERRCODE_TRANSACTION_NOT_COMPLETED, 1);
151:             }
152:         } catch (\Exception $error) {
153:             $db->rollback();
154:             throw $error;
155:         }
156:         return array(LINES_IMPORTED => $lines_imported, 'families_added' => $families_added);
157:     }
158: 
159:     /**
160:      * @inheritDoc
161:      */
162:     public static function CLI_commandDescription() {
163:         return "Repeatmasker Output Importer";
164:     }
165: 
166:     /**
167:      * @inheritDoc
168:      */
169:     public static function CLI_commandName() {
170:         return "annotation_repeatmasker";
171:     }
172: 
173:     /**
174:      * @inheritDoc
175:      */
176:     public static function CLI_longHelp() {
177:         return <<<EOF
178: 
179: \033[0;31mThis import requires a successful Sequence ID Import!\033[0m
180: EOF;
181:     }
182: 
183: }
184: 
185: ?>
186: 
tbro API documentation generated by ApiGen 2.8.0