Class Bio::EMBL
In: lib/bio/db/embl/embl.rb
Parent: EMBLDB

Methods

Included Modules

Bio::EMBLDB::Common

External Aliases

oc -> classification
  taxonomy classfication

Public Instance methods

returns comment text in the comments (CC) line.

CC Line; comments of notes (>=0)

[Source]

     # File lib/bio/db/embl/embl.rb, line 401
401:   def cc
402:     get('CC').to_s.gsub(/^CC   /, '')
403:   end
comment()

Alias for cc

[Source]

     # File lib/bio/db/embl/embl.rb, line 130
130:   def data_class
131:     id_line('DATA_CLASS')
132:   end

created date. Returns Date object, String or nil.

[Source]

     # File lib/bio/db/embl/embl.rb, line 461
461:   def date_created
462:     parse_date(self.dt['created'])
463:   end

modified date. Returns Date object, String or nil.

[Source]

     # File lib/bio/db/embl/embl.rb, line 456
456:   def date_modified
457:     parse_date(self.dt['updated'])
458:   end

database references (DR). Returns an array of Bio::Sequence::DBLink objects.

[Source]

     # File lib/bio/db/embl/embl.rb, line 511
511:   def dblinks
512:     get('DR').split(/\n/).collect { |x|
513:       Bio::Sequence::DBLink.parse_embl_DR_line(x)
514:     }
515:   end

returns DIVISION in the ID line.

[Source]

     # File lib/bio/db/embl/embl.rb, line 140
140:   def division
141:     id_line('DIVISION')
142:   end

returns contents in the date (DT) line.

where <DT Hash> is:

 {}

keys: ‘created’ and ‘updated‘

DT Line; date (2/entry)

[Source]

     # File lib/bio/db/embl/embl.rb, line 182
182:   def dt(key=nil)
183:     unless @data['DT']
184:       tmp = Hash.new
185:       dt_line = self.get('DT').split(/\n/)
186:       tmp['created'] = dt_line[0].sub(/\w{2}   /,'').strip
187:       tmp['updated'] = dt_line[1].sub(/\w{2}   /,'').strip
188:       @data['DT'] = tmp
189:     end
190:     if key
191:       @data['DT'][key]
192:     else
193:       @data['DT']
194:     end
195:   end

iterates on CDS features in the FT lines.

[Source]

     # File lib/bio/db/embl/embl.rb, line 380
380:   def each_cds
381:     ft.each do |cds_feature|
382:       if cds_feature.feature == 'CDS'
383:         yield cds_feature
384:       end
385:     end
386:   end

iterates on gene features in the FT lines.

[Source]

     # File lib/bio/db/embl/embl.rb, line 389
389:   def each_gene
390:     ft.each do |gene_feature|
391:       if gene_feature.feature == 'gene'
392:         yield gene_feature
393:       end
394:     end
395:   end

returns ENTRY_NAME in the ID line.

[Source]

     # File lib/bio/db/embl/embl.rb, line 117
117:   def entry
118:     id_line('ENTRY_NAME')
119:   end
entry_id()

Alias for entry

entry_name()

Alias for entry

entry version number numbered by EMBL

[Source]

     # File lib/bio/db/embl/embl.rb, line 476
476:   def entry_version
477:     parse_release_version(self.dt['updated'])[1]
478:   end
features()

Alias for ft

returns feature table header (String) in the feature header (FH) line.

FH Line; feature table header (0 or 2)

[Source]

     # File lib/bio/db/embl/embl.rb, line 325
325:   def fh
326:     fetch('FH')
327:   end

returns contents in the feature table (FT) lines.

same as features method in bio/db/genbank.rb

FT Line; feature table data (>=0)

[Source]

     # File lib/bio/db/embl/embl.rb, line 336
336:   def ft
337:     unless @data['FT']
338:       ary = Array.new
339:       in_quote = false
340:       @orig['FT'].each_line do |line|
341:         next if line =~ /^FEATURES/
342: 
343:         head = line[0,20].strip  # feature key (source, CDS, ...)
344:         body = line[20,60].chomp # feature value (position, /qualifier=)
345:         if line =~ /^FT {3}(\S+)/
346:           ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
347:         elsif body =~ /^ \// and not in_quote
348:           ary.last.push(body)    # /q="data..., /q=data, /q
349: 
350:           if body =~ /=" / and body !~ /"$/
351:             in_quote = true
352:           end
353: 
354:         else
355:           ary.last.last << body # ...data..., ...data..."
356: 
357:           if body =~ /"$/
358:             in_quote = false
359:           end
360:         end
361:       end
362: 
363:       ary.map! do |subary|
364:         parse_qualifiers(subary)
365:       end
366: 
367:       @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
368:     end
369:     if block_given?
370:       @data['FT'].each do |feature|
371:         yield feature
372:       end
373:     else
374:       @data['FT']
375:     end
376:   end

returns contents in the ID line.

where <ID Hash> is:

 {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
  'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}

ID Line

 "ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."

DATA_CLASS = [‘standard’]

MOLECULE_TYPE: DNA RNA XXX

Code ( DIVISION )

 EST (ESTs)
 PHG (Bacteriophage)
 FUN (Fungi)
 GSS (Genome survey)
 HTC (High Throughput cDNAs)
 HTG (HTGs)
 HUM (Human)
 INV (Invertebrates)
 ORG (Organelles)
 MAM (Other Mammals)
 VRT (Other Vertebrates)
 PLN (Plants)
 PRO (Prokaryotes)
 ROD (Rodents)
 SYN (Synthetic)
 STS (STSs)
 UNC (Unclassified)
 VRL (Viruses)

Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.

  1. Primary accession number
  2. Sequence version number
  3. Topology: ‘circular’ or ‘linear‘
  4. Molecule type (see note 1 below)
  5. Data class (see section 3.1)
  6. Taxonomic division (see section 3.2)
  7. Sequence length (see note 2 below)

[Source]

     # File lib/bio/db/embl/embl.rb, line 89
 89:   def id_line(key=nil)
 90:     unless @data['ID']
 91:       tmp = Hash.new
 92:       idline = fetch('ID').split(/; +/)         
 93:       tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
 94:       if idline.first =~ /^SV/
 95:         tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
 96:         tmp['TOPOLOGY'] = idline.shift
 97:         tmp['MOLECULE_TYPE'] = idline.shift
 98:         tmp['DATA_CLASS'] = idline.shift
 99:       else
100:         tmp['MOLECULE_TYPE'] = idline.shift
101:       end
102:       tmp['DIVISION'] = idline.shift
103:       tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
104: 
105:       @data['ID'] = tmp
106:     end
107:     
108:     if key
109:       @data['ID'][key]
110:     else
111:       @data['ID']
112:     end
113:   end

returns MOLECULE_TYPE in the ID line.

[Source]

     # File lib/bio/db/embl/embl.rb, line 125
125:   def molecule
126:     id_line('MOLECULE_TYPE')
127:   end
molecule_type()

Alias for molecule

naseq()

Alias for seq

ntseq()

Alias for seq

returns contents in the OS line.

where <OS Hash> is:

 [{'name'=>'Human', 'os'=>'Homo sapiens'},
  {'name'=>'Rat', 'os'=>'Rattus norveticus'}]

OS Line; organism species (>=1)

  OS   Trifolium repens (white clover)

Typically, OS line shows "Genus species (name)" style:

  OS   Genus species (name)

Other examples:

  OS   uncultured bacterium
  OS   xxxxxx metagenome
  OS   Cloning vector xxxxxxxx

Complicated examples:

  OS   Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848))
  OS   Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark)
  OS   Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias)
  OS   Sicydiinae sp. 'Keith et al., 2010'
  OS   Acanthopagrus sp. 'Jean & Lee, 2008'
  OS   Gaussia princeps (T. Scott, 1894)
  OS   Rana sp. 8 Hillis & Wilcox, 2005
  OS   Contracaecum rudolphii C D'Amelio et al., 2007
  OS   Partula sp. 'Mt. Marau, Tahiti'
  OS   Leptocephalus sp. 'type II larva' (Smith, 1989)
  OS   Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002
  OS   Non-A, non-B hepatitis virus
  OS   Canidae (dog, coyote, wolf, fox)
  OS   Salmonella enterica subsp. enterica serovar 4,[5],12:i:-
  OS   Yersinia enterocolitica (type O:5,27)
  OS   Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4))
  OS   Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2))
  OS   Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L

[Source]

     # File lib/bio/db/embl/embl.rb, line 266
266:   def os(num = nil)
267:     unless @data['OS']
268:       os = Array.new
269:       tmp = fetch('OS')
270:       if /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d]) *\(([\w\d ]+)\)\s*\z/ =~ tmp
271:         org = $1
272:         os.push({'name' => $2, 'os' => $1})
273:       else
274:         os.push({'name' => nil, 'os' => tmp})
275:       end
276:       @data['OS'] = os
277:     end
278:     if num
279:       # EX. "Trifolium repens (white clover)"
280:       "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
281:     end
282:     @data['OS']
283:   end

release number when created

[Source]

     # File lib/bio/db/embl/embl.rb, line 471
471:   def release_created
472:     parse_release_version(self.dt['created'])[0]
473:   end

release number when last updated

[Source]

     # File lib/bio/db/embl/embl.rb, line 466
466:   def release_modified
467:     parse_release_version(self.dt['updated'])[0]
468:   end

returns the nucleotie sequence in this entry.

@orig[’’] as sequence bb Line; (blanks) sequence data (>=1)

[Source]

     # File lib/bio/db/embl/embl.rb, line 445
445:   def seq
446:     Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
447:   end
seqlen()

Alias for sequence_length

returns SEQUENCE_LENGTH in the ID line.

  • Bio::EMBL#sequencelength -> String

[Source]

     # File lib/bio/db/embl/embl.rb, line 146
146:   def sequence_length
147:     id_line('SEQUENCE_LENGTH')
148:   end

species

[Source]

     # File lib/bio/db/embl/embl.rb, line 518
518:   def species
519:     self.fetch('OS')
520:   end

returns sequence header information in the sequence header (SQ) line.

where <SQ Hash> is:

    {'ntlen' => Int, 'other' => Int,
     'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}

SQ Line; sequence header (1/entry)

 SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;

[Source]

     # File lib/bio/db/embl/embl.rb, line 422
422:   def sq(base = nil)
423:     unless @data['SQ']
424:       fetch('SQ') =~ \
425:              /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
426:       @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
427:                      'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
428:     else
429:       @data['SQ']
430:     end
431: 
432:     if base
433:       @data['SQ'][base.downcase]
434:     else
435:       @data['SQ']
436:     end
437:   end

returns the version information in the sequence version (SV) line.

SV Line; sequence version (1/entry)

 SV    Accession.Version

[Source]

     # File lib/bio/db/embl/embl.rb, line 162
162:   def sv
163:     if (v = field_fetch('SV').sub(/;/,'')) == ""
164:       [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 
165:     else
166:       v
167:     end  
168:   end

converts the entry to Bio::Sequence object


Arguments::

Returns:Bio::Sequence object

[Source]

     # File lib/bio/db/embl/embl.rb, line 530
530:   def to_biosequence
531:     Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
532:   end

[Source]

     # File lib/bio/db/embl/embl.rb, line 134
134:   def topology
135:     id_line('TOPOLOGY')
136:   end

[Source]

     # File lib/bio/db/embl/embl.rb, line 169
169:   def version
170:     (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
171:   end

[Validate]