Source code for frontier.IO.BamcheckReader

__author__ = "Sam Nicholls <sn8@sanger.ac.uk>"
__copyright__ = "Copyright (c) Sam Nicholls"
__version__ = "0.1.2"
__maintainer__ = "Sam Nicholls <sam@samnicholls.net>"

import os

from frontier.IO.AbstractReader import AbstractReader

[docs]def tidy_key(key): """Sanitize summary number key.""" key = key[:-1].replace(" ", "-") key = key.replace(".", "-") key = key.replace("_", "-") return key.strip()
[docs]class BamcheckReader(AbstractReader): """Wraps a file handler and provides access to bamcheckr'd file contents.""" def __init__(self, filepath, CLASSES=None, auto_close=True): """Initialise the structures for storing data and construct the reader.""" self._id = os.path.basename(filepath).split(".")[0] self.summary = SummaryNumbers() self.indel = IndelDistribution() super(BamcheckReader, self).__init__(filepath, CLASSES, auto_close, 0)
[docs] def process_line(self, line): """Process a record of the bamcheckr'd file.""" if line[0] == "#": # Skip comments return fields = line.split("\t") if fields[0] == "SN": name = tidy_key(fields[1]) try: value = float(fields[2]) except ValueError: value = fields[2] # Check whether key already exists in summary if name in self.summary: print("[NOTE] Duplicate key for %s found in %s" % (name, self.handler.name)) # Check whether the duplicate value is equal to the current if self.summary[name] != value: raise Exception("[FAIL] Duplicate differing key for %s found in %s" % (name, self.handler.name)) return self.summary[name] = value elif fields[0] == "ID": self.indel.lengths.append(int(fields[1])) self.indel.inserts.append(int(fields[2])) self.indel.deletes.append(int(fields[3]))
[docs] def get_id(self): return self._id
[docs] def get_data(self): """Return read summary data.""" return self.summary
[docs]class SummaryNumbers(dict): """Wraps a dictionary and provides functionality to search for keys.""" def __init__(self, *args): dict.__init__(self, args)
[docs] def search(self, query): """Search the structure for keys matching a given query.""" matches = [] for key in self: if query.lower() in key.lower(): matches.append(key) return matches
[docs]class IndelDistribution(object): """Novel object to hold a trio of lengths, inserts and deletes counters.""" def __init__(self): self.lengths = [] self.inserts = [] self.deletes = []
[docs] def total_inserts(self): """Return sum of total inserts.""" return sum(self.inserts)
[docs] def total_deletes(self): """Return sum of total deletes.""" return sum(self.deletes)