"""
targets.py
Functions and classes for handling the target data.
"""
import os
import yaml
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
from . import __path__, ligands, edges, utils
data_path = os.path.abspath(os.path.join(os.path.join(__path__[0], "sample_data")))
file = open(os.path.join(data_path, "targets.yml"))
target_dict = yaml.full_load(file)
file.close()
[docs]def set_data_dir(path=os.path.abspath(os.path.join(__path__[0], "sample_data"))):
"""
Gets the directory name of the target
:param path: string with path to data directory
"""
global data_path
data_path = os.path.abspath(path)
file = open(os.path.join(data_path, "targets.yml"))
global target_dict
target_dict = yaml.full_load(file)
file.close()
[docs]def get_target_dir(target):
"""
Gets the directory name of the target
:param target: string with target name
:return: string with directory name
"""
if target in target_dict:
return target_dict[target]["dir"]
else:
raise ValueError(f"Directory for target {target} not found.")
[docs]def get_target_data_path(target):
"""
Gets the file path of the target data
:param target: string with target name
:return: list of directories (have to be joined with '/' to get the file path relative to the plbenchmark repository)
"""
if target in target_dict:
return os.path.join(data_path, target_dict[target]["dir"], "00_data", "")
else:
raise ValueError(f"Path for target {target} not found.")
[docs]class Target:
"""
Class to store the data of one target.
"""
def __init__(self, name: str):
"""
Store and convert the data of one target in a :py:class:`pandas.Series`.
:param name: string with target name
:return: None
"""
self._name = name
path = get_target_data_path(self._name)
file = open(os.path.join(path, "target.yml"))
data = yaml.full_load(file)
self._data = pd.Series(data)
file.close()
self.ligand_data = None
self.html_data = None
self._ligands = None
self._edges = None
[docs] def get_name(self):
"""
Access the name of the target.
:return: name as a string
"""
return self._name
[docs] def get_ligand_set(self):
"""
Get :py:class:`~plbenchmark.ligands.ligandSet` associated with the target
:return: :py:class:`plbenchmark.ligands.ligandSet` object
"""
if self._ligands is None:
self._ligands = ligands.LigandSet(self._name)
return self._ligands
[docs] def add_ligand_data(self):
"""
Adds data from ligands to :py:class:`plbenchmark.targets.target`. Molecule images and the minimum and maximum affinity are added.
:return: None
"""
lgs = self.get_ligand_set()
self.ligand_data = pd.Series({"numLigands": len(lgs)})
affinities = []
for key, item in lgs.items():
affinities.append(
item._data[("DerivedMeasurement", "value")].to("kcal/mole").magnitude
)
self.ligand_data["maxDG"] = round(
max(affinities) * utils.unit("kcal / mole"), 1
)
self.ligand_data["minDG"] = round(
min(affinities) * utils.unit("kcal / mole"), 1
)
# calculation of the standard deviation
std = np.std(affinities)
self.ligand_data["std(DG)"] = round(std * utils.unit("kcal / mole"), 1)
def get_ligand_data(self):
if self.ligand_data is None:
self.add_ligand_data()
return self.ligand_data
[docs] def get_ligand_set_dataframe(self, columns=None):
"""
Get :py:class:`~plbenchmark.ligands.ligandSet` associated with the target in a :py:class:`pandas.DataFrame`
:param columns: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame`
:return: :py:class:`pandas.DataFrame`
"""
return self.get_ligand_set().get_dataframe(columns)
[docs] def get_ligand_set_html(self, columns=None):
"""
Get :py:class:`~plbenchmark.ligands.ligandSet` associated with the target in a html string
:param columns: list of columns which should be returned
:return: html string
"""
return self.get_ligand_set().get_html(columns)
[docs] def get_edge_set(self):
"""
Get :py:class:`~plbenchmark:edges:edgeSet` associated with the target
:return: :py:class:`plbenchmark:edges:edgeSet` object
"""
if self._edges is None:
self._edges = edges.EdgeSet(self._name)
return self._edges
[docs] def get_edge_set_dataframe(self, columns=None):
"""
Get :py:class:`~plbenchmark:edges:edgeSet` associated with the target as a :py:class:`pandas.DataFrame`
:param columns: list of columns which should be returned in the :py:class:`pandas.DataFrame`
:return: :py:class:`plbenchmark:edges:edgeSet` object
"""
return self.get_edge_set().get_dataframe(columns)
[docs] def get_edge_set_html(self, columns=None):
"""
Get :py:class:`~plbenchmark:edges:edgeSet` associated with the target in a html string
:param columns: :py:class:`list` of edge which should be returned
:return: html string
"""
return self.get_edge_set().get_html(columns)
[docs] def get_dataframe(self, columns=None):
"""
Access the target data as a :py:class:`pandas.DataFrame`
:param cols: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame`
:return: :py:class:`pandas.DataFrame`
"""
df = self._data
df = df.append(self.get_ligand_data())
df = df.append(self.get_html_data())
if columns:
return df[columns]
else:
return df
[docs] def find_links(self):
"""
Processes primary data to have links in the html string of the target data
:return: None
"""
self.html_data = pd.Series(dtype=object)
if "references" in list(self._data.index):
# self._data.index = pd.MultiIndex.from_arrays([list(self._data.index), ['' for i in self._data.index]])
refs = self._data["references"]
for key, item in refs.items():
res = []
if item is None:
continue
for doi in item:
if str(doi) != "nan":
res.append(utils.find_doi_url(doi))
self.html_data[key] = (r"\n").join(res)
if ("pdb") in list(self._data.index):
pdb = self._data["pdb"]
if pdb is None:
self.html_data["pdblinks"] = ""
else:
self.html_data["pdblinks"] = utils.find_pdb_url(
" ".join(pdb.split(","))
)
def get_html_data(self):
if self.html_data is None:
self.find_links()
return self.html_data
[docs] def get_graph(self):
"""
Get a graph representation of the ligand perturbations associated with the target in a :py:class:`matplotlib.figure`
:return: :py:class:`matplotlib.figure`
"""
graph = nx.Graph()
for key, item in self.get_ligand_set().items():
graph.add_node(key.split("_")[1], image=item.get_image())
graph.add_edges_from(
[
[item["ligand_a"].split("_")[1], item["ligand_b"].split("_")[1]]
for key, item in self.get_edge_set().get_dict().items()
]
)
pos = nx.circular_layout(graph)
fig = plt.figure(figsize=(60, 40))
ax = fig.gca()
nx.draw(graph, pos, node_size=35000, ax=ax, node_color=[[1, 1, 1, 0]])
trans = ax.transData.transform
trans2 = fig.transFigure.inverted().transform
imsize = 0.075 # this is the image size
for n in graph.nodes():
(x, y) = pos[n]
xx, yy = trans((x, y)) # figure coordinates
xa, ya = trans2((xx, yy)) # axes coordinates
img = graph.nodes[n]["image"]
a = plt.axes(
[xa - imsize / 2.0, ya - imsize / 2.0, imsize, imsize],
fc=(1, 1, 1, 0.0),
)
a.set_xticks([])
a.set_yticks([])
a.spines["right"].set_visible(False)
a.spines["top"].set_visible(False)
a.spines["bottom"].set_visible(False)
a.spines["left"].set_visible(False)
a.imshow(img, alpha=1)
a.set_aspect("equal")
a.axis("off")
return fig
[docs]class TargetSet(dict):
"""
Class inherited from dict to store all available targets in plbenchmark.
"""
def __init__(self, *arg, **kw):
"""
Initializes the :py:class:`~targets.targetSet` class
:param target: string name of target
:param arg: arguments for :py:class:`dict` (base class)
:param kw: keywords for :py:class:`dict` (base class)
"""
super(TargetSet, self).__init__(*arg, **kw)
for name in target_dict.keys():
target = Target(name)
self[target.get_name()] = target
self._df = None
def __eq__(self, other):
if not isinstance(other, TargetSet):
return False
return dict.__eq__(self, other) and self._df == other._df
def __ne__(self, other):
if not isinstance(other, TargetSet):
return True
return dict.__ne__(self, other) or self._df != other._df
[docs] def get_target(self, name):
"""
Accesses one target of the targetSet
:param name: string name of the target
:return: :py:class:`plbenchmark.targets.target` class
"""
if name in self:
return self[name]
else:
raise ValueError(f"Target {name} not part of set.")
[docs] def get_dataframe(self, columns=None):
"""
Convert targetSet class to :py:class:`pandas.DataFrame`
:param columns: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame`
:return: :py:class:`pandas.DataFrame`
"""
if self._df is None:
dfs = []
for key in self.keys():
self[key].add_ligand_data()
self[key].find_links()
dfs.append(self[key].get_dataframe())
df = pd.DataFrame(dfs)
self._df = df
if columns is None:
return self._df
elif all(item in list(self._df.columns) for item in columns):
return self._df[columns]
else:
for item in columns:
if item not in list(self._df.columns):
raise ValueError(
f"Column {item} is not known and cannot be generated."
)
[docs] def get_html(self, columns=None):
"""
Access the :py:class:`~plbenchmark:targets:targetSet` as a HTML string
:param cols: :py:class:`list` of columns which should be returned in the :py:class:`pandas.DataFrame`
:return: HTML string
"""
df = self.get_dataframe(columns=columns)
html_string = df.to_html()
html_string = html_string.replace("REP1", '<a target="_blank" href="')
html_string = html_string.replace("REP2", '">')
html_string = html_string.replace("REP3", "</a>")
html_string = html_string.replace("\\n", "<br>")
return html_string
[docs] def get_names(self):
"""
Get a list of available target names
:return: :py:class:`list` of strings
"""
return [key for key in self.keys()]