Source code for plbenchmark.targets

"""
targets.py
Functions and classes for handling the target data.
"""

import os
import yaml
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx

from . import __path__, ligands, edges, utils


data_path = os.path.abspath(os.path.join(os.path.join(__path__[0], "sample_data")))
file = open(os.path.join(data_path, "targets.yml"))
target_dict = yaml.full_load(file)
file.close()


[docs]def set_data_dir(path=os.path.abspath(os.path.join(__path__[0], "sample_data"))): """ Gets the directory name of the target :param path: string with path to data directory """ global data_path data_path = os.path.abspath(path) file = open(os.path.join(data_path, "targets.yml")) global target_dict target_dict = yaml.full_load(file) file.close()
[docs]def get_target_dir(target): """ Gets the directory name of the target :param target: string with target name :return: string with directory name """ if target in target_dict: return target_dict[target]["dir"] else: raise ValueError(f"Directory for target {target} not found.")
[docs]def get_target_data_path(target): """ Gets the file path of the target data :param target: string with target name :return: list of directories (have to be joined with '/' to get the file path relative to the plbenchmark repository) """ if target in target_dict: return os.path.join(data_path, target_dict[target]["dir"], "00_data", "") else: raise ValueError(f"Path for target {target} not found.")
[docs]class Target: """ Class to store the data of one target. """ def __init__(self, name: str): """ Store and convert the data of one target in a :py:class:`pandas.Series`. :param name: string with target name :return: None """ self._name = name path = get_target_data_path(self._name) file = open(os.path.join(path, "target.yml")) data = yaml.full_load(file) self._data = pd.Series(data) file.close() self.ligand_data = None self.html_data = None self._ligands = None self._edges = None
[docs] def get_name(self): """ Access the name of the target. :return: name as a string """ return self._name
[docs] def get_ligand_set(self): """ Get :py:class:`~plbenchmark.ligands.ligandSet` associated with the target :return: :py:class:`plbenchmark.ligands.ligandSet` object """ if self._ligands is None: self._ligands = ligands.LigandSet(self._name) return self._ligands
[docs] def add_ligand_data(self): """ Adds data from ligands to :py:class:`plbenchmark.targets.target`. Molecule images and the minimum and maximum affinity are added. :return: None """ lgs = self.get_ligand_set() self.ligand_data = pd.Series({"numLigands": len(lgs)}) affinities = [] for key, item in lgs.items(): affinities.append( item._data[("DerivedMeasurement", "value")].to("kcal/mole").magnitude ) self.ligand_data["maxDG"] = round( max(affinities) * utils.unit("kcal / mole"), 1 ) self.ligand_data["minDG"] = round( min(affinities) * utils.unit("kcal / mole"), 1 ) # calculation of the standard deviation std = np.std(affinities) self.ligand_data["std(DG)"] = round(std * utils.unit("kcal / mole"), 1)
def get_ligand_data(self): if self.ligand_data is None: self.add_ligand_data() return self.ligand_data
[docs] def get_ligand_set_dataframe(self, columns=None): """ Get :py:class:`~plbenchmark.ligands.ligandSet` associated with the target in a :py:class:`pandas.DataFrame` :param columns: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame` :return: :py:class:`pandas.DataFrame` """ return self.get_ligand_set().get_dataframe(columns)
[docs] def get_ligand_set_html(self, columns=None): """ Get :py:class:`~plbenchmark.ligands.ligandSet` associated with the target in a html string :param columns: list of columns which should be returned :return: html string """ return self.get_ligand_set().get_html(columns)
[docs] def get_edge_set(self): """ Get :py:class:`~plbenchmark:edges:edgeSet` associated with the target :return: :py:class:`plbenchmark:edges:edgeSet` object """ if self._edges is None: self._edges = edges.EdgeSet(self._name) return self._edges
[docs] def get_edge_set_dataframe(self, columns=None): """ Get :py:class:`~plbenchmark:edges:edgeSet` associated with the target as a :py:class:`pandas.DataFrame` :param columns: list of columns which should be returned in the :py:class:`pandas.DataFrame` :return: :py:class:`plbenchmark:edges:edgeSet` object """ return self.get_edge_set().get_dataframe(columns)
[docs] def get_edge_set_html(self, columns=None): """ Get :py:class:`~plbenchmark:edges:edgeSet` associated with the target in a html string :param columns: :py:class:`list` of edge which should be returned :return: html string """ return self.get_edge_set().get_html(columns)
[docs] def get_dataframe(self, columns=None): """ Access the target data as a :py:class:`pandas.DataFrame` :param cols: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame` :return: :py:class:`pandas.DataFrame` """ df = self._data df = df.append(self.get_ligand_data()) df = df.append(self.get_html_data()) if columns: return df[columns] else: return df
def get_html_data(self): if self.html_data is None: self.find_links() return self.html_data
[docs] def get_graph(self): """ Get a graph representation of the ligand perturbations associated with the target in a :py:class:`matplotlib.figure` :return: :py:class:`matplotlib.figure` """ graph = nx.Graph() for key, item in self.get_ligand_set().items(): graph.add_node(key.split("_")[1], image=item.get_image()) graph.add_edges_from( [ [item["ligand_a"].split("_")[1], item["ligand_b"].split("_")[1]] for key, item in self.get_edge_set().get_dict().items() ] ) pos = nx.circular_layout(graph) fig = plt.figure(figsize=(60, 40)) ax = fig.gca() nx.draw(graph, pos, node_size=35000, ax=ax, node_color=[[1, 1, 1, 0]]) trans = ax.transData.transform trans2 = fig.transFigure.inverted().transform imsize = 0.075 # this is the image size for n in graph.nodes(): (x, y) = pos[n] xx, yy = trans((x, y)) # figure coordinates xa, ya = trans2((xx, yy)) # axes coordinates img = graph.nodes[n]["image"] a = plt.axes( [xa - imsize / 2.0, ya - imsize / 2.0, imsize, imsize], fc=(1, 1, 1, 0.0), ) a.set_xticks([]) a.set_yticks([]) a.spines["right"].set_visible(False) a.spines["top"].set_visible(False) a.spines["bottom"].set_visible(False) a.spines["left"].set_visible(False) a.imshow(img, alpha=1) a.set_aspect("equal") a.axis("off") return fig
[docs]class TargetSet(dict): """ Class inherited from dict to store all available targets in plbenchmark. """ def __init__(self, *arg, **kw): """ Initializes the :py:class:`~targets.targetSet` class :param target: string name of target :param arg: arguments for :py:class:`dict` (base class) :param kw: keywords for :py:class:`dict` (base class) """ super(TargetSet, self).__init__(*arg, **kw) for name in target_dict.keys(): target = Target(name) self[target.get_name()] = target self._df = None def __eq__(self, other): if not isinstance(other, TargetSet): return False return dict.__eq__(self, other) and self._df == other._df def __ne__(self, other): if not isinstance(other, TargetSet): return True return dict.__ne__(self, other) or self._df != other._df
[docs] def get_target(self, name): """ Accesses one target of the targetSet :param name: string name of the target :return: :py:class:`plbenchmark.targets.target` class """ if name in self: return self[name] else: raise ValueError(f"Target {name} not part of set.")
[docs] def get_dataframe(self, columns=None): """ Convert targetSet class to :py:class:`pandas.DataFrame` :param columns: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame` :return: :py:class:`pandas.DataFrame` """ if self._df is None: dfs = [] for key in self.keys(): self[key].add_ligand_data() self[key].find_links() dfs.append(self[key].get_dataframe()) df = pd.DataFrame(dfs) self._df = df if columns is None: return self._df elif all(item in list(self._df.columns) for item in columns): return self._df[columns] else: for item in columns: if item not in list(self._df.columns): raise ValueError( f"Column {item} is not known and cannot be generated." )
[docs] def get_html(self, columns=None): """ Access the :py:class:`~plbenchmark:targets:targetSet` as a HTML string :param cols: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame` :return: HTML string """ df = self.get_dataframe(columns=columns) html_string = df.to_html() html_string = html_string.replace("REP1", '<a target="_blank" href="') html_string = html_string.replace("REP2", '">') html_string = html_string.replace("REP3", "</a>") html_string = html_string.replace("\\n", "<br>") return html_string
[docs] def get_names(self): """ Get a list of available target names :return: :py:class:`list` of strings """ return [key for key in self.keys()]