Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 55 additions & 27 deletions tools/flatmap_diagnostic.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ class digestive_43 Anatomical Map Anatomical map class
import subprocess
import tempfile
from urllib.parse import urlparse
from mapmaker.knowledgebase.sckan import NODE_TYPE_BY_PHENOTYPE
import ast

#===============================================================================

Expand Down Expand Up @@ -195,7 +197,7 @@ def __init__(self, manifest_file, sckan_knowledge):
# check class, id, type
self.__svg_tags = {'id': {}, 'class': {}}
for svg_source in self.__manifest['sources']:
tree = etree.parse(manifest_file.with_name(svg_source['href']))
tree = etree.parse(manifest_file.parent / svg_source["href"])
root = tree.getroot()
ns = {'svg': 'http://www.w3.org/2000/svg'}
parent_map = {c: p for p in root.iter() for c in p}
Expand All @@ -220,24 +222,29 @@ def __init__(self, manifest_file, sckan_knowledge):


# anatomical_map info
with open(manifest_file.with_name(self.__manifest['anatomicalMap']), 'r') as fp:
self.__anatomical_map = json.load(fp)
if (anatomical_file:=self.__manifest.get('anatomicalMap')):
with open(manifest_file.with_name(anatomical_file), 'r') as fp:
self.__anatomical_map = json.load(fp)
else:
self.__anatomical_map = dict()

# property info
with open(manifest_file.with_name(self.__manifest['properties']), 'r') as fp:
self.__properties = json.load(fp)
self.__properties['points'] = [
p
for network in self.__properties.get('networks', [])
for nt in ['centrelines', 'no-centrelines'] if nt in network
for c in network[nt]
for point in c.get('connects', [])
for p in point.split('/')
]
self.__properties['networks'] = {
c['id']: c
for network in self.__properties.get('networks', [])
for nt in ['centrelines', 'no-centrelines'] if nt in network
for c in network[nt]
}
self.__properties['points'] = [
p
for network in self.__properties.get('networks', {}).values()
for point in network['connects']
for p in point.split('/')
]
self.__properties['terms'] = defaultdict(list)
self.__properties['a_classes'] = defaultdict(list)
for feature_id, feature in self.__properties.get('features', {}).items():
Expand Down Expand Up @@ -276,10 +283,10 @@ def __is_node_or_term_in_property_or_proxy(self, node_or_term):
if isinstance(node_or_term, str):
return node_or_term in self.__properties['terms'] or node_or_term in self.__have_proxies
elif isinstance(node_or_term, tuple):
for term in [node_or_term[0]] + list(node_or_term[1]):
if term not in self.__properties['terms'] and term not in self.__have_proxies:
return False
return True
terms_to_check = [node_or_term[0]] + list(node_or_term[1])
props_terms = self.__properties['terms']
proxies = self.__have_proxies
return all(term in props_terms or term in proxies for term in terms_to_check)

def __get_term_by_id(self, id):
if id in (dt:=self.__properties['features']) or id in (dt:=self.__properties['networks']):
Expand Down Expand Up @@ -342,11 +349,12 @@ def __anatomical_map_validation(self):
def __properties_validation(self):
# validating classes in properties
for k, v in self.__properties['classes'].items():
if (k in self.__anatomical_map or v.get('models')) and (k in self.__svg_tags['class'] or k in self.__properties['a_classes']):
continue
if k not in self.__svg_tags['class'] and k not in self.__properties['a_classes']:
in_anat_or_models = k in self.__anatomical_map or v.get('models')
in_svg_or_props = k in self.__svg_tags['class'] or k in self.__properties['a_classes']

if not in_svg_or_props:
self.__record_issue('class', k, 'Properties', 'Properties class not in SVG and property features')
elif k not in self.__anatomical_map and not v.get('models'):
elif not in_anat_or_models:
self.__record_issue('class', k, 'Properties', 'Properties class not in anatomical map and has no models')

# validating anatomical classes in properties
Expand All @@ -368,10 +376,11 @@ def __properties_validation(self):
for k in self.__properties['features']:
if k not in self.__svg_tags['id']:
self.__record_issue('id', k, 'Properties', 'Properties feature id not in SVG')
if not self.__properties['features'][k].get('models') and self.__anatomical_map.get(self.__properties['features'][k].get('class'), {}).get('term') is None:
feature = self.__properties['features'][k]
if not feature.get('models') and self.__anatomical_map.get(feature.get('class'), {}).get('term') is None:
self.__record_issue('id', k, 'Properties', 'Properties feature id has no models and its class has no term in anatomical map')

def analize(self):
def analyze(self):
self.__svg_validation()
self.__alias_validation()
self.__proxy_validation()
Expand Down Expand Up @@ -415,12 +424,12 @@ def analyse_flatmap_source(manifest_files, sckan_knowledge, output_dir='.'):
# Path to the file
file_path = tmp_path / file_relative
sv = SourceValidation(file_path, sckan_knowledge)
sv.analize()
sv.analyze()
svs.append(sv)
else:
# local file
sv = SourceValidation(manifest_file, sckan_knowledge)
sv.analize()
sv.analyze()
svs.append(sv)

sources = [sv.get_manifest_id() for sv in svs]
Expand All @@ -442,7 +451,7 @@ def analyse_flatmap_source(manifest_files, sckan_knowledge, output_dir='.'):

def identify_missing_nodes(sckan_knowledge, log_data_list, map_type, g_rdf, output_dir='.'):
# group and aggregate paths
missing_paths = {log_data['path'] for log_data in log_data_list if log_data.get('msg') == 'Path is not rendered due to partial rendering'}
missing_paths = {log_data['path'] for log_data in log_data_list if log_data.get('msg') == 'Path suppressed: source/destination nodes are not in the connectivity graph'}
missing_nodes = defaultdict(dict)
for log_data in log_data_list:
if log_data.get('msg') == 'Cannot find feature for connectivity node':
Expand All @@ -454,6 +463,14 @@ def identify_missing_nodes(sckan_knowledge, log_data_list, map_type, g_rdf, outp
}
if log_data.get('map'):
missing_nodes[node_key]['map'] += (log_data['map'],)
elif (missings:=log_data.get('missing')):
for node_key in ast.literal_eval(missings):
missing_nodes[node_key] = {
'name': '/'.join(sckan_knowledge['terms'].get(term) for term in [node_key[0]]+list(node_key[1])),
'map': tuple()
}
if log_data.get('map'):
missing_nodes[node_key]['map'] += (log_data['map'],)

path_dicts = {}
for path_kn in sckan_knowledge['knowledge']:
Expand All @@ -462,9 +479,17 @@ def identify_missing_nodes(sckan_knowledge, log_data_list, map_type, g_rdf, outp
G = nx.Graph()
G.add_edges_from(conn)
degrees = dict(G.degree()) # type: ignore
min_degree = min(degrees.values())

node_phenotypes = path_kn.get('node-phenotypes', {})
path_somas_axons = set(
(node[0], tuple(node[1]))
for phenotype, node_type in NODE_TYPE_BY_PHENOTYPE.items()
if node_type in ('source', 'destination')
for node in node_phenotypes.get(phenotype, [])
)

path_dicts[path_kn['id']] = {
'terminals': set([node for node, deg in degrees.items() if deg == min_degree]),
'terminals': path_somas_axons,
'nodes': set(degrees.keys()),
'phenotypes': path_kn.get('phenotypes')
}
Expand Down Expand Up @@ -594,7 +619,8 @@ def loading_sources(generated_folders):

# Standardize log entries, add 'map' field and convert 'node' lists to tuples
last_idx_version = max(i for i, line in enumerate(log_data) if 'Using knowledge source' in line.get('msg', ''))
for log_entry in log_data[last_idx_version:]:
log_data = log_data[last_idx_version:]
for log_entry in log_data:
log_entry['map'] = map_name
if 'node' in log_entry and isinstance(log_entry['node'], list):
log_entry['node'] = (log_entry['node'][0], tuple(log_entry['node'][1]))
Expand Down Expand Up @@ -623,14 +649,16 @@ def loading_sources(generated_folders):
sckan_version=sckan_version,
store_directory=Path(store_directory).parent
)
sckan_knowledge = {'source': sckan_version, 'knowledge': [], 'nodes': set(), 'terms': set()}
sckan_knowledge = {'source': sckan_version, 'knowledge': [], 'nodes': set(), 'terms': dict()}
for path in store.connectivity_paths():
path_kn = store.entity_knowledge(path)
sckan_knowledge['knowledge'].append(path_kn)
for edge in path_kn['connectivity']:
for node in edge:
sckan_knowledge['nodes'].add((node[0], tuple(node[1])))
sckan_knowledge['terms'].update([node[0], *node[1]])
for term in [node[0], *node[1]]:
if term not in sckan_knowledge['terms']:
sckan_knowledge['terms'][term] = store.entity_knowledge(term).get('label', term)
store.close()


Expand Down
Loading