sub new
{ my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new();
throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly") if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
$self->{'config'} = {(
array_data => [], probe_data => ["array_probe"],
results_data => ["and_import_result"],
norm_method => undef,
)};
return $self; } |
sub read_and_import_result_data
{ my $self = shift;
$self->log("Reading ".$self->vendor()." result data (".localtime().")");
my ($file, $chip_uid, $line, $echip);
my ($ratio, $pid, %chip_files, %roll_back);
my $of_adaptor = $self->db->get_ProbeFeatureAdaptor();
my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
my $chan_adaptor = $self->db->get_ChannelAdaptor();
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("SangerPCR");
my $result_adaptor = $self->db->get_ResultSetAdaptor();
my $array = ${$self->arrays()}[0];
if (! @{$self->result_files()}) {
my $list = "ls ".$self->input_dir().'/[0-9]*-[0-9a-zA-Z]*\.all\.*';
my @rfiles = `$list`;
$self->result_files(\@rfiles);
}
foreach $file(@{$self->result_files()}) {
chomp $file;
($chip_uid = $file) =~ s/.*\///;
$chip_uid =~ s/\..*//;
$self->log("Found SANGER results file for $chip_uid:\t$file");
$chip_files{$chip_uid} = $file;
$echip = $ec_adaptor->fetch_by_unique_id_vendor($chip_uid, 'SANGER');
if ($echip) {
if (! $self->recovery()) {
throw("ExperimentalChip(".$echip->unqiue_id().") already exists in the database\nMaybe you want to recover?");
}else{
$roll_back{$echip->dbID()} = 1;
}
} else {
$echip = Bio::EnsEMBL::Funcgen::ExperimentalChip->new
(
-EXPERIMENT_ID => $self->experiment->dbID(),
-ARRAY_CHIP_ID => $self->arrays->[0]->get_ArrayChip_by_design_id($array->name())->dbID(),
-UNIQUE_ID => $chip_uid,
);
($echip) = @{$ec_adaptor->store($echip)};
$self->experiment->add_ExperimentalChip($echip); }
foreach my $type ('DUMMY_TOTAL', 'DUMMY_EXPERIMENTAL') {
my $channel = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
if ($channel) {
if (! $self->recovery()) {
throw("Channel(".$echip->unique_id().":$type) already exists in the database\nMaybe you want to recover?");
}
} else {
$channel = Bio::EnsEMBL::Funcgen::Channel->new
(
-EXPERIMENTAL_CHIP_ID => $echip->dbID(),
-TYPE => $type,
);
($channel) = @{$chan_adaptor->store($channel)};
}
}
}
my $rset = $self->get_import_ResultSet($analysis, 'experimental_chip');
if ($rset) {
foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
if ($echip->has_status('IMPORTED_SangerPCR', $echip)) {
$self->log("ExperimentalChip(".$echip->unique_id().") has already been imported");
} else {
my $cc_id = $rset->get_chip_channel_id($echip->dbID());
if ($self->recovery() && $roll_back{$echip->dbID()}){
$self->log("Rolling back results for ExperimentalChip:\t".$echip->unique_id());
$self->rollback_results($cc_id);
}
$self->log("Reading SANGER result file for ".$echip->unique_id().":\t".$chip_files{$echip->unique_id()});
$self->get_probe_cache_by_Array($array) || throw('Failed to reset probe cache handle');
my $fh = open_file($chip_files{$echip->unique_id()});
my @lines = <$fh>;
close($fh);
my $rfile_path = $self->get_dir("norm")."/result.SangerPCR.".$echip->unique_id().".txt";
my $rfile = open_file($rfile_path, '>');
my $r_string = "";
@lines = sort {(split/\t|\:/o, $a)[5] cmp (split/\t|\:/o, $b)[5]} @lines;
foreach my $line (@lines) {
$line =~ s/\r*\n//o;
($ratio, undef, $pid) = (split/\t|\:/o, $line)[3..5];
$pid =~ s/.*://o;
$ratio = '\N' if $ratio eq 'NA';
$r_string .= '\N'."\t".$self->get_probe_id_by_name_Array($pid, $array)."\t${ratio}\t${cc_id}\t".'\N'."\t".'\N'."\n"; }
print $rfile $r_string;
close($rfile);
$self->log("Importing:\t$rfile_path");
$self->db->load_table_data("result", $rfile_path);
$self->log("Finished importing:\t$rfile_path");
$echip->adaptor->set_status('IMPORTED_SangerPCR', $echip);
}
}
} else {
$self->log("No new data, skipping result parse");
}
$self->log("Finished reading and importing ".$self->vendor()." result data (".localtime().")");
return;
}
1; } |
sub read_array_probe_data
{ my ($self, $array_file) = @_;
warn("Remove hard coding for Sanger array import, and accomodate adf format");
$array_file ||= $self->array_file();
my ($line, $fh, @list, $array_file_format, $cmd);
my ($op, $of, $imported, $fimported, $fanal);
my $oa_adaptor = $self->db->get_ArrayAdaptor();
my $op_adaptor = $self->db->get_ProbeAdaptor();
my $of_adaptor = $self->db->get_ProbeFeatureAdaptor();
my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
my $ac_adaptor = $self->db->get_ArrayChipAdaptor();
my $slice_adaptor = $self->db->get_SliceAdaptor();
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("SangerPCR")->dbID();
my $array = Bio::EnsEMBL::Funcgen::Array->new
(
-NAME => $self->array_name(),
-FORMAT => uc($self->format()),
-VENDOR => uc($self->vendor()),
-TYPE => 'PCR',
-DESCRIPTION => "Sanger ENCODE PCR array 3.1.1",
);
($array) = @{$oa_adaptor->store($array)};
my $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
-NAME => $array->name(),
-DESIGN_ID => $array->name(),
-ARRAY_ID =>$array->dbID(),
);
($array_chip) = @{$ac_adaptor->store($array_chip)};
$array->add_ArrayChip($array_chip);
$self->add_Array($array);
my $dnadb_cs = $self->db->dnadb->get_CoordSystemAdaptor->fetch_by_name('chromosome');
my $fg_cs = $self->db->get_FGCoordSystemAdaptor->validate_and_store_coord_system($dnadb_cs);
if ($array_chip->has_status('IMPORTED')) {
$imported = 1;
$self->log("Skipping ArrayChip probe import (".$array_chip->name().") already fully imported");
if(! $self->get_probe_cache_by_Array($array)){
$self->get_probe_cache_by_Array($array, 1);
}
} elsif ($self->recovery()) {
$self->log("Rolling back partially imported ArrayChip:\t".$array_chip->name());
$self->db->rollback_ArrayChip($array_chip); }
if ($array_chip->has_status('IMPORTED_CS_'.$fg_cs->dbID())) {
$fimported = 1;
$self->log("Skipping ArrayChip feature import (".$array_chip->name().") already fully imported for ".$self->data_version());
} elsif ($self->recovery()) {
$self->log("Rolling back partially imported ArrayChip features:\t".$array_chip->name());
$self->db->rollback_ArrayChip_features($array_chip, $fg_cs);
}
if (! $fimported) {
if (! $array_file) {
if (! defined $self->get_dir('input')) {
throw("No input_dir defined, if you are running in a non Experiment context please use -array_file");
}
for my $suffix ("gff", "adf") {
$cmd = $self->get_dir('input')."/".$self->array_name()."*".$suffix;
@list = `ls $cmd 2>/dev/null`;
if ((scalar(@list) == 1) &&
($list[0] !~ /No such file or directory/o)) {
if (! defined $array_file) {
$array_file = $list[0];
} else {
throw("Found more than one array file : $array_file\t$list[0]\nSpecify one with -array_file");
}
}
}
throw("Cannot find array file. Specify one with -array_file") if (! defined $array_file);
}
if ($array_file =~ /gff/io) {
$array_file_format = "GFF";
} elsif ($array_file =~ /adf/io) {
$array_file_format = "ADF";
throw("Does not yet accomodate Sanger adf format");
} else {
throw("Could not determine array file format: $array_file");
}
$fanal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name(($array_file_format eq "ADF") ? "VendorMap" : "LiftOver");
$self->log("Parsing ".$self->vendor()." array data (".localtime().")");
$fh = open_file($array_file);
my @lines = <$fh>;
close($fh);
my ($chr, $start, $end, $strand, $pid);
my $ac_dbid = $array->get_ArrayChip_by_design_id($array->name())->dbID();
@lines = sort {(split/\t|\;/o, $a)[8] cmp (split/\t|\;/o, $b)[8]} @lines;
foreach $line(@lines) {
$line =~ s/\r*\n//;
($chr, undef, undef, $start, $end, undef, $strand, undef, $pid) = split/\t|\;/o, $line;
if($self->ucsc_coords){
$start += 1;
$end += 1;
}
$pid =~ s/reporter_id=//o;
$chr =~ s/chr//;
$strand = ($strand eq "+") ? 0 : 1;
if (! $self->cache_slice($chr)) {
warn("-- Skipping non standard probe (${pid}) with location:\t${chr}:${start}-${end}\n");
next;
}
if (! $imported) {
$op = Bio::EnsEMBL::Funcgen::Probe->new(
-NAME => $pid,
-LENGTH => ($end - $start),
-ARRAY => $array,
-ARRAY_CHIP_ID => $ac_dbid,
-CLASS => 'EXPERIMENTAL',
);
($op) = @{$op_adaptor->store($op)};
} else {
}
$of = Bio::EnsEMBL::Funcgen::ProbeFeature->new(
-START => $start,
-END => $end,
-STRAND => $strand,
-SLICE => $self->cache_slice($chr),
-ANALYSIS => $fanal,
-MISMATCHCOUNT => 0,
-PROBE_ID => ($imported) ?
$self->get_probe_id_by_name_Array($pid, $array) : $op->dbID(),
);
$of_adaptor->store($of);
}
$array_chip->adaptor->set_status('IMPORTED_CS_'.$fg_cs->dbID(), $array_chip) if ! $fimported;
$self->log("ArrayChip:\t".$array_chip->design_id()." has been IMPORTED_CS_".$fg_cs->dbID());
}
if (! $imported) {
$array_chip->adaptor->set_status('IMPORTED', $array_chip);
$self->log("ArrayChip:\t".$array_chip->design_id()." has been IMPORTED");
$self->resolve_probe_data();
}
$self->log("Finished parsing ".$self->vendor()." array/probe data (".localtime().")");
return; } |
This module was written by Nathan Johnson.