Bio::EnsEMBL::Funcgen::Parsers
ExperimentalSet
Toolbar
Summary
Bio::EnsEMBL::Funcgen::Parsers::Simple
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::ExperimentalSet);
Description
This is a base class to support simple file format parsers. For simple imports the vendor is
set to the parser type i.e. the file format. The generic read_and_import_simple_data assumes
a one line per feature format, other format need there own read_and_import_format_data method,
which will need defining in the result_data config element.
Methods
annotated_feature_adaptor | No description | Code |
count | No description | Code |
counts | No description | Code |
data_set | No description | Code |
dbentry_adaptor | No description | Code |
dbentry_params | No description | Code |
define_sets | No description | Code |
experimental_set_adaptor | No description | Code |
feature_params | No description | Code |
feature_separator | No description | Code |
load_feature_and_xrefs | No description | Code |
new | Description | Code |
read_and_import_data | No description | Code |
set_config | Description | Code |
set_feature_separator | No description | Code |
validate_files | No description | Code |
Methods description
Example : my $self = $class->SUPER::new(@_); Description: Constructor method for Bed class Returntype : Bio::EnsEMBL::Funcgen::Parsers::Simple Exceptions : throws if caller is not Importer Caller : Bio::EnsEMBL::Funcgen::Parsers:Simple Status : at risk |
Example : my $self->set_config; Description: Sets attribute dependent config Returntype : None Exceptions : None Caller : Bio::EnsEMBL::Funcgen::Importer Status : at risk |
Methods code
annotated_feature_adaptor | description | prev | next | Top |
sub annotated_feature_adaptor
{ my $self = shift;
return $self->{'annotated_feature_adaptor'}; } |
sub count
{ my ($self, $count_type) = @_;
$self->{'_counts'}{$count_type} ||=0;
$self->{'_counts'}{$count_type}++;
return; } |
sub counts
{ my $self = shift;
return $self->{'_counts'} } |
sub data_set
{ my $self = shift;
return $self->{'_data_set'}; } |
sub dbentry_adaptor
{ my $self = shift;
return $self->{'dbentry_adaptor'}; } |
sub dbentry_params
{ my $self = shift;
return $self->{'_dbentry_params'}; } |
sub define_sets
{ my ($self) = @_;
my $eset = $self->experimental_set_adaptor->fetch_by_name($self->experimental_set_name);
if(! defined $eset){
$eset = Bio::EnsEMBL::Funcgen::ExperimentalSet->new
(
-name => $self->experimental_set_name(),
-experiment => $self->experiment(),
-feature_type => $self->feature_type(),
-cell_type => $self->cell_type(),
-vendor => $self->vendor(),
-format => $self->format(),
-analysis => $self->feature_analysis,
);
($eset) = @{$self->experimental_set_adaptor->store($eset)};
}
my $dset = $self->define_and_validate_sets
(
-dbadaptor => $self->db,
-name => $self->experimental_set_name,
-feature_type => $self->feature_type,
-cell_type => $self->cell_type,
-analysis => $self->feature_analysis,
-type => 'annotated',
-description => $self->feature_set_description,
-recovery => $self->recovery,
-supporting_sets => [$eset],
);
$self->{'_data_set'} = $dset;
return $self->{'_data_set'}; } |
sub experimental_set_adaptor
{ my $self = shift;
return $self->{'experimental_set_adaptor'}; } |
sub feature_params
{ my $self = shift;
return $self->{'_feature_params'};
}
} |
sub feature_separator
{ my $self = shift;
return $self->{'_feature_separator'};
}
} |
sub load_feature_and_xrefs
{ my $self = shift;
my $seq;
if($self->dump_fasta){
if(exists $self->feature_params->{'sequence'}){
$seq = $self->feature_params->{'sequence'};
delete $self->feature_params->{'sequence'};
}
else{
$self->log('No fasta sequence available for '.$self->feature_params->display_label);
}
}
my $feature = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new(%{$self->feature_params});
($feature) = @{$self->annotated_feature_adaptor->store($feature)};
$self->count('stored_features');
if ($self->dump_fasta){
$self->{'_fasta'} .= $self->generate_fasta_header($feature)."\n$seq\n";
}
foreach my $dbentry_hash(@{$self->{'_dbentry_params'}}){
my $ftype = $dbentry_hash->{feature_type};
delete $dbentry_hash->{feature_type};
my $dbentry = Bio::EnsEMBL::DBEntry->new(%{$dbentry_hash});
$self->dbentry_adaptor->store($dbentry, $feature->dbID, $ftype, 1); }
$self->{'_feature_params'} = {};
$self->{'_dbentry_params'} = [];
return $feature;
}
1; } |
sub new
{ my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly")
if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
$self->{'config'} =
{(
array_data => [], probe_data => [], norm_method => undef,
'results_data' => ["and_import"],
)};
$self->{'_feature_params'} = {};
$self->{'_dbentry_params'} = [];
$self->{'counts'} = {};
return $self;
}
} |
sub read_and_import_data
{ my $self = shift;
$self->log("Reading and importing ".$self->vendor()." data");
my ($filename, $fh, $f_out, $fasta_file, %feature_params, @lines);
my $dset = $self->define_sets;
my $fset = $dset->product_FeatureSet;
my ($eset) = @{$dset->get_supporting_sets};
my ($new_data) = $self->validate_files;
foreach my $filepath(@{$self->result_files()}) {
chomp $filepath;
($filename = $filepath) =~ s/.*\///;
if( $new_data->{$filepath} ){
$filepath = $self->pre_process_file($filepath) if $self->can('pre_process_file');
$self->log_header('Reading '.$self->vendor." file:\t".$filepath);
$fh = open_file($filepath);
my @lines = <$fh>;
close($fh);
$self->{'_fasta'} = '';
$fasta_file = $ENV{'EFG_DATA'}."/fastas/".$self->experiment->name().'.'.$filename.'.fasta';
if($self->dump_fasta){
$self->backup_file($fasta_file);
$f_out = open_file($fasta_file, '>');
}
foreach my $line (@lines) {
$line =~ s/\r*\n//o;
next if $line =~ /^\#/;
next if $line =~ /^$/;
$self->parse_line($line);
}
$self->process_params;
if ($self->dump_fasta()){
print $f_out $self->{'_fasta'};
close($f_out);
}
$self->log('Finished importing '.$self->counts->{'features'}.' '.
$fset->name." features from:\t$filepath");
$self->log("Counts:\n".Data::Dumper::Dumper($self->{'_counts'}));
my $sub_set = $eset->get_subset_by_name($filename);
$sub_set->adaptor->store_status('IMPORTED', $sub_set);
}
}
if(! $fset->has_status('IMPORTED')){
$fset->adaptor->store_status('IMPORTED', $fset);
}
$self->log("No new data, skipping result parse") if ! grep /1/,values %{$new_data};
$self->log("Finished parsing and importing results");
return; } |
sub set_config
{ my $self = shift;
throw('Must provide an ExperimentalSet name for a '.uc($self->vendor).' import') if ! defined $self->experimental_set_name();
if(! defined $self->feature_analysis){
throw('Must define a -feature_analysis parameter for '.uc($self->vendor).' imports');
}
$self->{'config'}{'norm_method'} = undef;
$self->{'annotated_feature_adaptor'} = $self->db->get_AnnotatedFeatureAdaptor;
$self->{'dbentry_adaptor'} = $self->db->get_DBEntryAdaptor;
$self->{'experimental_set_adaptor'} = $self->db->get_ExperimentalSetAdaptor();
return; } |
sub set_feature_separator
{ my ($self, $separator) = @_;
throw('Must provide a valid feature separator') if ( (! defined $separator) || ($separator eq '') );
$self->{'_feature_separator'} = $separator; } |
validate_files | description | prev | next | Top |
sub validate_files
{ my $self = shift;
if (! @{$self->result_files()}) {
my $list = "ls ".$self->get_dir('input').'/'.$self->name().'*.'.lc($self->vendor); my @rfiles = `$list`;
$self->result_files(\@rfiles);
}
if (scalar(@{$self->result_files()}) >1) {
warn('Found more than one '.$self->vendor." file:\n".
join("\n", @{$self->result_files()})."\nThe Simple parser does not yet handle replicates.".
" We need to resolve how we are going handle replicates with random cluster IDs");
}
my (%new_data);
my $recover_unimported = 0;
my ($eset) = @{$self->data_set->get_supporting_sets};
foreach my $filepath( @{$self->result_files} ) {
chomp $filepath;
my $filename;
($filename = $filepath) =~ s/.*\///;
my $sub_set;
$self->log('Found '.$self->vendor." file\t$filename");
if( $sub_set = $eset->get_subset_by_name($filename) ){
if($recover_unimported){
$new_data{$filepath} = 1;
next;
}
if( $sub_set->has_status('IMPORTED') ){
$new_data{$filepath} = 0;
$self->log("ExperimentalSubset(${filename}) has already been imported");
}
else{
$self->log("Found partially imported ExperimentalSubset(${filename})");
$recover_unimported = 1;
$new_data{$filepath} = 1;
if ( $self->recovery && $recover_unimported ) {
$self->log("Rolling back results for ExperimentalSubset:\t".$filename);
warn "Cannot yet rollback for just an ExperimentalSubset, rolling back entire set\n";
warn "WARNING:: This may be deleting previously imported data which you are not re-importing..list?!!!\n";
$self->rollback_FeatureSet($self->data_set->product_FeatureSet);
$self->rollback_ExperimentalSet($eset);
last;
}
elsif( $recover_unimported ){
throw("Found partially imported ExperimentalSubSet:\t$filepath\n".
"You must specify -recover to perform a full roll back for this ExperimentalSet:\t".$eset->name);
}
}
}
else{
$self->log("Found new ExperimentalSubset(${filename})");
$new_data{$filepath} = 1;
$sub_set = $eset->add_new_subset($filename);
$self->experimental_set_adaptor->store_ExperimentalSubsets([$sub_set]);
}
}
if ($recover_unimported){
foreach my $esset(@{$eset->get_subsets}){
$new_data{$esset->name} = 1;
$eset->adaptor->revoke_states($esset);
}
}
return (\%new_data); } |
General documentation
This module was created by Nathan Johnson.