diff --git a/tools/flatmap_diagnostic.py b/tools/flatmap_diagnostic.py index 2caf8c68..7aa650ea 100755 --- a/tools/flatmap_diagnostic.py +++ b/tools/flatmap_diagnostic.py @@ -140,6 +140,8 @@ class digestive_43 Anatomical Map Anatomical map class import subprocess import tempfile from urllib.parse import urlparse +from mapmaker.knowledgebase.sckan import NODE_TYPE_BY_PHENOTYPE +import ast #=============================================================================== @@ -195,7 +197,7 @@ def __init__(self, manifest_file, sckan_knowledge): # check class, id, type self.__svg_tags = {'id': {}, 'class': {}} for svg_source in self.__manifest['sources']: - tree = etree.parse(manifest_file.with_name(svg_source['href'])) + tree = etree.parse(manifest_file.parent / svg_source["href"]) root = tree.getroot() ns = {'svg': 'http://www.w3.org/2000/svg'} parent_map = {c: p for p in root.iter() for c in p} @@ -220,24 +222,29 @@ def __init__(self, manifest_file, sckan_knowledge): # anatomical_map info - with open(manifest_file.with_name(self.__manifest['anatomicalMap']), 'r') as fp: - self.__anatomical_map = json.load(fp) + if (anatomical_file:=self.__manifest.get('anatomicalMap')): + with open(manifest_file.with_name(anatomical_file), 'r') as fp: + self.__anatomical_map = json.load(fp) + else: + self.__anatomical_map = dict() # property info with open(manifest_file.with_name(self.__manifest['properties']), 'r') as fp: self.__properties = json.load(fp) + self.__properties['points'] = [ + p + for network in self.__properties.get('networks', []) + for nt in ['centrelines', 'no-centrelines'] if nt in network + for c in network[nt] + for point in c.get('connects', []) + for p in point.split('/') + ] self.__properties['networks'] = { c['id']: c for network in self.__properties.get('networks', []) for nt in ['centrelines', 'no-centrelines'] if nt in network for c in network[nt] } - self.__properties['points'] = [ - p - for network in self.__properties.get('networks', {}).values() - for point in network['connects'] - for p in point.split('/') - ] self.__properties['terms'] = defaultdict(list) self.__properties['a_classes'] = defaultdict(list) for feature_id, feature in self.__properties.get('features', {}).items(): @@ -276,10 +283,10 @@ def __is_node_or_term_in_property_or_proxy(self, node_or_term): if isinstance(node_or_term, str): return node_or_term in self.__properties['terms'] or node_or_term in self.__have_proxies elif isinstance(node_or_term, tuple): - for term in [node_or_term[0]] + list(node_or_term[1]): - if term not in self.__properties['terms'] and term not in self.__have_proxies: - return False - return True + terms_to_check = [node_or_term[0]] + list(node_or_term[1]) + props_terms = self.__properties['terms'] + proxies = self.__have_proxies + return all(term in props_terms or term in proxies for term in terms_to_check) def __get_term_by_id(self, id): if id in (dt:=self.__properties['features']) or id in (dt:=self.__properties['networks']): @@ -342,11 +349,12 @@ def __anatomical_map_validation(self): def __properties_validation(self): # validating classes in properties for k, v in self.__properties['classes'].items(): - if (k in self.__anatomical_map or v.get('models')) and (k in self.__svg_tags['class'] or k in self.__properties['a_classes']): - continue - if k not in self.__svg_tags['class'] and k not in self.__properties['a_classes']: + in_anat_or_models = k in self.__anatomical_map or v.get('models') + in_svg_or_props = k in self.__svg_tags['class'] or k in self.__properties['a_classes'] + + if not in_svg_or_props: self.__record_issue('class', k, 'Properties', 'Properties class not in SVG and property features') - elif k not in self.__anatomical_map and not v.get('models'): + elif not in_anat_or_models: self.__record_issue('class', k, 'Properties', 'Properties class not in anatomical map and has no models') # validating anatomical classes in properties @@ -368,10 +376,11 @@ def __properties_validation(self): for k in self.__properties['features']: if k not in self.__svg_tags['id']: self.__record_issue('id', k, 'Properties', 'Properties feature id not in SVG') - if not self.__properties['features'][k].get('models') and self.__anatomical_map.get(self.__properties['features'][k].get('class'), {}).get('term') is None: + feature = self.__properties['features'][k] + if not feature.get('models') and self.__anatomical_map.get(feature.get('class'), {}).get('term') is None: self.__record_issue('id', k, 'Properties', 'Properties feature id has no models and its class has no term in anatomical map') - def analize(self): + def analyze(self): self.__svg_validation() self.__alias_validation() self.__proxy_validation() @@ -415,12 +424,12 @@ def analyse_flatmap_source(manifest_files, sckan_knowledge, output_dir='.'): # Path to the file file_path = tmp_path / file_relative sv = SourceValidation(file_path, sckan_knowledge) - sv.analize() + sv.analyze() svs.append(sv) else: # local file sv = SourceValidation(manifest_file, sckan_knowledge) - sv.analize() + sv.analyze() svs.append(sv) sources = [sv.get_manifest_id() for sv in svs] @@ -442,7 +451,7 @@ def analyse_flatmap_source(manifest_files, sckan_knowledge, output_dir='.'): def identify_missing_nodes(sckan_knowledge, log_data_list, map_type, g_rdf, output_dir='.'): # group and aggregate paths - missing_paths = {log_data['path'] for log_data in log_data_list if log_data.get('msg') == 'Path is not rendered due to partial rendering'} + missing_paths = {log_data['path'] for log_data in log_data_list if log_data.get('msg') == 'Path suppressed: source/destination nodes are not in the connectivity graph'} missing_nodes = defaultdict(dict) for log_data in log_data_list: if log_data.get('msg') == 'Cannot find feature for connectivity node': @@ -454,6 +463,14 @@ def identify_missing_nodes(sckan_knowledge, log_data_list, map_type, g_rdf, outp } if log_data.get('map'): missing_nodes[node_key]['map'] += (log_data['map'],) + elif (missings:=log_data.get('missing')): + for node_key in ast.literal_eval(missings): + missing_nodes[node_key] = { + 'name': '/'.join(sckan_knowledge['terms'].get(term) for term in [node_key[0]]+list(node_key[1])), + 'map': tuple() + } + if log_data.get('map'): + missing_nodes[node_key]['map'] += (log_data['map'],) path_dicts = {} for path_kn in sckan_knowledge['knowledge']: @@ -462,9 +479,17 @@ def identify_missing_nodes(sckan_knowledge, log_data_list, map_type, g_rdf, outp G = nx.Graph() G.add_edges_from(conn) degrees = dict(G.degree()) # type: ignore - min_degree = min(degrees.values()) + + node_phenotypes = path_kn.get('node-phenotypes', {}) + path_somas_axons = set( + (node[0], tuple(node[1])) + for phenotype, node_type in NODE_TYPE_BY_PHENOTYPE.items() + if node_type in ('source', 'destination') + for node in node_phenotypes.get(phenotype, []) + ) + path_dicts[path_kn['id']] = { - 'terminals': set([node for node, deg in degrees.items() if deg == min_degree]), + 'terminals': path_somas_axons, 'nodes': set(degrees.keys()), 'phenotypes': path_kn.get('phenotypes') } @@ -594,7 +619,8 @@ def loading_sources(generated_folders): # Standardize log entries, add 'map' field and convert 'node' lists to tuples last_idx_version = max(i for i, line in enumerate(log_data) if 'Using knowledge source' in line.get('msg', '')) - for log_entry in log_data[last_idx_version:]: + log_data = log_data[last_idx_version:] + for log_entry in log_data: log_entry['map'] = map_name if 'node' in log_entry and isinstance(log_entry['node'], list): log_entry['node'] = (log_entry['node'][0], tuple(log_entry['node'][1])) @@ -623,14 +649,16 @@ def loading_sources(generated_folders): sckan_version=sckan_version, store_directory=Path(store_directory).parent ) - sckan_knowledge = {'source': sckan_version, 'knowledge': [], 'nodes': set(), 'terms': set()} + sckan_knowledge = {'source': sckan_version, 'knowledge': [], 'nodes': set(), 'terms': dict()} for path in store.connectivity_paths(): path_kn = store.entity_knowledge(path) sckan_knowledge['knowledge'].append(path_kn) for edge in path_kn['connectivity']: for node in edge: sckan_knowledge['nodes'].add((node[0], tuple(node[1]))) - sckan_knowledge['terms'].update([node[0], *node[1]]) + for term in [node[0], *node[1]]: + if term not in sckan_knowledge['terms']: + sckan_knowledge['terms'][term] = store.entity_knowledge(term).get('label', term) store.close()