from typing import List, Union
from os import path
import ntpath
import json
import pandas as pd
import numpy as np
from metrics.feature.continuous_feature import ContinuousFeature
from metrics.feature.discrete_feature import DiscreteFeature
from metrics.feature.feature import Feature
[docs]class Simulation:
"""
Container for simulated tumor object data.
Attributes
----------
file :
Path and file name for the simulation file.
path :
Directory to the folder of simulation file.
key :
File name without extension or seed.
seed :
Seed of the simulation.
extension :
Extension of the simulation file.
timepoints :
Time point(s) (in days) in the simulation file.
max_radius :
Maximum radius of the simulation.
"""
def __init__(self, simulation_file: str):
self.file = simulation_file
self.path: str = ""
self.key: str = ""
self.seed: int = 0
self.extension: str = ""
self.timepoints: List[float] = []
self.max_radius: int = 0
self.parse_file()
self.parse_config()
def __str__(self) -> str:
attributes = [
("file", self.file),
("path", self.path),
("key", self.key),
("seed", self.seed),
("timepoints", self.timepoints),
("max_radius", self.max_radius),
]
attribute_strings = [f"{key:10} = {value}" for key, value in attributes]
string = "\n\t".join(attribute_strings)
return "SIMULATION\n\t" + string
[docs] def load_simulation(self, suffix: str = "") -> dict:
"""
Load simulation file into memory.
Parameters
----------
suffix :
Suffix of the file.
Returns
-------
:
Loaded simulation file.
"""
file_name = (
f"{self.path}/{self.key}{suffix}/{self.key}_{self.seed:02}{suffix}{self.extension}"
)
with open(file_name, "r", encoding="utf-8") as json_file:
loaded_simulation = json.load(json_file)
return loaded_simulation
[docs] def parse_file(self) -> None:
"""
Parse out attributes from file name.
"""
self.path = ntpath.dirname(self.file)
base = ntpath.basename(self.file)
remove_extension = path.splitext(base)[0]
remove_suffix = path.splitext(remove_extension)[0]
self.seed = int(remove_suffix[-2:])
self.extension = path.splitext(base)[1]
self.key = remove_suffix[:-3]
[docs] def parse_config(self) -> None:
"""
Parse out attributes from loaded simulation file.
"""
loaded_simulation = self.load_simulation()
self.timepoints = [tp["time"] for tp in loaded_simulation["timepoints"]]
self.max_radius = loaded_simulation["config"]["size"]["radius"]
[docs] def parse_timepoint(self, timepoint: float) -> pd.DataFrame:
"""
Parse data from simulation.
Parameters
----------
timepoint :
Time point to parse simulation.
Returns
-------
:
Dataframe with simulation data.
"""
loaded_simulation = self.load_simulation()
loaded_param_simulation = self.load_simulation(suffix=".PARAM")
if timepoint not in self.timepoints:
raise ValueError("The timepoint not included in simulation file.")
time_index = self.timepoints.index(timepoint)
parsed_data = []
sim_timepoint = loaded_simulation["timepoints"][time_index]["cells"]
param_timepoint = loaded_param_simulation["timepoints"][time_index]["cells"]
for (location, cells), (_, param_cells) in zip(sim_timepoint, param_timepoint):
u = int(location[0])
v = int(location[1])
w = int(location[2])
z = int(location[3])
szudzik_coordinate = self.get_szudzik_pair(u, v)
for cell, param_cell in zip(cells, param_cells):
population = cell[1]
state = cell[2]
position = cell[3]
volume = np.round(cell[4])
cycle = np.round(np.mean(cell[5]))
max_height = param_cell[4][3]
meta_pref = param_cell[4][8]
migra_threshold = param_cell[4][9]
data_list = [
self.key,
self.seed,
timepoint,
szudzik_coordinate,
u,
v,
w,
z,
position,
str(population),
str(state),
volume,
cycle,
max_height,
meta_pref,
migra_threshold,
]
parsed_data.append(data_list)
columns = [feature.name for feature in self.get_feature_list()]
return pd.DataFrame(parsed_data, columns=columns)
[docs] @staticmethod
def get_szudzik_pair(u: int, v: int) -> float:
"""
Convert positions with positive or negative UV coordinates into a coordinate ID with
signed Szudzik pairing function.
A pairing function on a set associates each pair of numbers with a unique number through
mathematical functions.
Parameters
----------
u :
U coordinate of the position.
v :
V coordinate of the position.
Returns
-------
:
The unique ID of the position coordinate.
"""
if u >= 0:
new_u = 2 * u
else:
new_u = (-2 * u) - 1
if v >= 0:
new_v = 2 * v
else:
new_v = (-2 * v) - 1
if new_u >= new_v:
return (new_u**2 + new_u + new_v) * 0.5
return (new_v**2 + new_u) * 0.5
[docs] @staticmethod
def get_feature_list() -> List[Feature]:
"""
Return a list of valid Feature objects.
Returns
-------
:
List of Feature objects.
"""
return [
Feature("key", "TEXT", False),
Feature("seed", "INTEGER", False),
Feature("time", "REAL", False),
Feature("coordinate", "INTEGER", False),
Feature("u", "INTEGER", False),
Feature("v", "INTEGER", False),
Feature("w", "INTEGER", False),
Feature("z", "INTEGER", False),
Feature("p", "INTEGER", False),
DiscreteFeature("population", "TEXT", False),
DiscreteFeature("state", "TEXT", False),
ContinuousFeature("volume", "REAL", False),
ContinuousFeature("cycle", "REAL", True),
ContinuousFeature("max_height", "REAL", False),
ContinuousFeature("meta_pref", "REAL", False),
ContinuousFeature("migra_threshold", "REAL", False),
]
[docs] @staticmethod
def get_feature_object(feature_name: str) -> Union[ContinuousFeature, DiscreteFeature]:
"""
Return feature object valid for statistics calculation.
Parameters
----------
feature_name :
Name of feature.
Returns
-------
:
Feature object.
"""
feature_list = Simulation.get_feature_list()
for feature in feature_list:
if feature.name == feature_name:
if isinstance(feature, (ContinuousFeature, DiscreteFeature)):
return feature
raise ValueError("Feature is not valid for statistics calculation.")
raise ValueError("Feature does not exist.")