sub new
{ my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new();
throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly")
if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
$self->{'config'} =
{(
array_data => [], probe_data => [], results_data => ["and_import_bed"],
norm_method => undef,
protocols => {()},
)};
return $self; } |
sub read_and_import_bed_data
{ my $self = shift;
$self->log("Reading and importing ".$self->vendor()." data");
my (@header, @data, @design_ids, @lines);
my ($anal, $fh, $file);
my $eset_adaptor = $self->db->get_ExperimentalSetAdaptor();
my $af_adaptor = $self->db->get_AnnotatedFeatureAdaptor();
my $fset_adaptor = $self->db->get_FeatureSetAdaptor();
my $dset_adaptor = $self->db->get_DataSetAdaptor();
my $new_data = 0;
my $eset = $eset_adaptor->fetch_by_name($self->experimental_set_name());
if(! defined $eset){
$eset = Bio::EnsEMBL::Funcgen::ExperimentalSet->new(
-name => $self->experimental_set_name(),
-experiment => $self->experiment(),
-feature_type => $self->feature_type(),
-cell_type => $self->cell_type(),
-vendor => $self->vendor(),
-format => $self->format(),
-analysis => $self->feature_analysis,
);
($eset) = @{$eset_adaptor->store($eset)};
}
my $fset = $fset_adaptor->fetch_by_name($self->experiment->name());
if(! defined $fset){
$fset = Bio::EnsEMBL::Funcgen::FeatureSet->new(
-name => $self->experiment->name(),
-feature_type => $self->feature_type(),
-cell_type => $self->cell_type(),
-type => 'annotated',
-analysis => $self->feature_analysis,
);
($fset) = @{$fset_adaptor->store($fset)};
}
my $dset = $dset_adaptor->fetch_by_name($self->experiment->name());
if(! defined $dset){
$dset = Bio::EnsEMBL::Funcgen::DataSet->new(
-name => $self->experiment->name(),
-supporting_sets => [$eset],
-feature_set => $fset,
-displayable => 1,
-supporting_set_type => 'experimental',
);
($dset) = @{$dset_adaptor->store($dset)};
}
if (! @{$self->result_files()}) {
my $list = "ls ".$self->input_dir().'/'.$self->name().'*.bed';
my @rfiles = `$list`;
$self->result_files(\@rfiles);
}
if (scalar(@{$self->result_files()}) >1) {
warn("Found more than one bed file:\n".
join("\n", @{$self->result_files()})."\nBed does not yet handle replicates.".
" We need to resolve how we are going handle replicates with random cluster IDs");
}
my %new_data;
my $roll_back = 0;
foreach my $filepath(@{$self->result_files()}) {
chomp $filepath;
my $filename;
($filename = $filepath) =~ s/.*\///;
my $sub_set;
$self->log("Found bed file\t$filename");
if($sub_set = $eset->get_subset_by_name($filename)){
if ($sub_set->has_status('IMPORTED')){
$self->log("ExperimentalSubset(${filename}) has already been imported");
}
else {
$self->log("Found partially imported ExperimentalSubset(${filename})");
$roll_back = 1;
if ($self->recovery() && $roll_back) {
$self->log("Rolling back results for ExperimentalSubset:\t".$filename);
warn "Cannot yet rollback for just an ExperimentalSubset, rolling back entire set\n";
$self->rollback_FeatureSet($fset);
$self->rollback_ExperimentalSet($eset);
last;
}
elsif($roll_back){
throw("Found partially imported ExperimentalSubSet:\t$filepath\n".
"You must specify -recover to perform a full roll back for this ExperimentalSet:\t".$eset->name);
}
}
}
else{
$self->log("Found new ExperimentalSubset(${filename})");
$new_data{$filepath} = undef;
$sub_set = $eset->add_new_subset($filename);
$eset_adaptor->store_ExperimentalSubsets([$sub_set]);
}
}
foreach my $filepath(@{$self->result_files()}) {
chomp $filepath;
my $filename;
my $count = 0;
($filename = $filepath) =~ s/.*\///;
if($roll_back || exists $new_data{$filepath}){
$self->log("Reading bed file:\t".$filename);
my $fh = open_file($filepath);
my @lines = <$fh>;
close($fh);
my ($line, $f_out);
my $fasta = '';
my $fasta_file = $ENV{'EFG_DATA'}."/fastas/".$self->experiment->name().'.'.$filename.'.fasta';
if($self->dump_fasta()){
$self->backup_file($fasta_file);
$f_out = open_file($fasta_file, '>');
}
foreach my $line (@lines) {
$line =~ s/\r*\n//o;
next if $line =~ /^\#/;
next if $line =~ /^$/;
next unless $line =~ /^chr/i;
my ($chr, $start, $end, $pid, $score) = split/\t/o, $line;
if($self->ucsc_coords){
$start +=1;
$end +=1;
}
if(! $self->cache_slice($chr)){
warn "Skipping AnnotatedFeature import, cound non standard chromosome: $chr";
}
else{
my $feature = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new
(
-START => $start,
-END => $end,
-STRAND => 1,
-SLICE => $self->cache_slice($chr),
-ANALYSIS => $fset->analysis,
-SCORE => $score,
-DISPLAY_LABEL => $pid,
-FEATURE_SET => $fset,
);
$af_adaptor->store($feature);
$count++;
if ($self->dump_fasta()){
$fasta .= '>'.$pid."\n".$self->cache_slice($chr)->sub_Slice($start, $end, 1)->seq()."\n";
}
}
}
if ($self->dump_fasta()){
print $f_out $fasta;
close($f_out);
}
$self->log("Finished importing $count features:\t$filepath");
my $sub_set = $eset->get_subset_by_name($filename);
$sub_set->adaptor->store_status('IMPORTED', $sub_set);
}
}
$self->log("No new data, skipping result parse") if ! keys %new_data && ! $roll_back;
$self->log("Finished parsing and importing results");
return;
}
1; } |
This module was created by Stefan Graf.