network

network contains functions to arrange and analyze glycans in the context of networks. In such a network, each node represents a glycan and edges represent, for instance, their connection via a biosynthetic step. It should be noted, since glycowork treats glycans as molecular graphs, that these networks represent hierarchical graphs, with the network being one graph and each node within the network also a graph. network contains the following modules:

biosynthesis

constructing and analyzing biosynthetic glycan networks


construct_network


def construct_network(
    glycans:list, # List of glycans
    allowed_ptms:frozenset=frozenset({'3P', '6P', 'OS', '3S', '9Ac', 'OAc', '4Ac', '1P', '6S', 'OP'}), # Set of allowed PTMs
    edge_type:str='monolink', # Edge label type: monolink/monosaccharide/enzyme
    permitted_roots:frozenset[str] | None=None, # Allowed root nodes
    abundances:list=[], # Glycan abundances in the same order as glycans; default:empty
)->DiGraph: # Biosynthetic network

Construct glycan biosynthetic network

glycans = ["Gal(b1-4)Glc-ol", "GlcNAc(b1-3)Gal(b1-4)Glc-ol",
           "GlcNAc6S(b1-3)Gal(b1-4)Glc-ol",
           "Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc-ol", "Fuc(a1-2)Gal(b1-4)Glc-ol",
          "Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)[Gal(b1-3)GlcNAc(b1-6)]Gal(b1-4)Glc-ol"]
network = construct_network(glycans)
network.nodes()
NodeView(('Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)[Gal(b1-3)GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc-ol', 'GlcNAc6S(b1-3)Gal(b1-4)Glc-ol', 'GlcNAc(b1-3)Gal(b1-4)Glc-ol', 'Fuc(a1-2)Gal(b1-4)Glc-ol', 'Gal(b1-4)Glc-ol', 'Gal(b1-4)GlcNAc(b1-3)[Gal(b1-3)GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)[GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Gal(b1-4)GlcNAc(b1-3)[GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc-ol', 'GlcNAc(b1-3)[GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'GlcNAc(b1-6)Gal(b1-4)Glc-ol', 'Gal(b1-3)GlcNAc(b1-6)[GlcNAc(b1-3)]Gal(b1-4)Glc-ol', 'Gal(b1-3)GlcNAc(b1-6)Gal(b1-4)Glc-ol'))

plot_network


def plot_network(
    network:DiGraph, # Biosynthetic network
    plot_format:str='hierarchical', # Layout type: hierarchical/pydot2/kamada_kawai/spring
    edge_label_draw:bool=True, # Whether to draw edge labels
    lfc_dict:dict[str, float] | None=None, # Enzyme:log2FC mapping for edge width
)->None: # Displays plot

Visualize biosynthetic network

plot_network(network)
Loading BokehJS ...
figure(
id = 'p1004', …)

infer_network


def infer_network(
    network:DiGraph, # Network to infer
    network_species:str, # Source species
    species_list:list, # Species to compare against
    network_dic:dict, # Species:network mapping
)->Graph: # Network with inferred nodes

Replace virtual nodes observed in other species


retrieve_inferred_nodes


def retrieve_inferred_nodes(
    network:DiGraph, # Network with inferred nodes
    species:str | None=None, # Source species if multiple
)->list[str] | dict[str, list[str]]: # Inferred nodes list or dict

Get inferred virtual nodes from network


update_network


def update_network(
    network_in:DiGraph, # Input network
    edge_list:list, # List of edges to add
    edge_labels:list[str] | None=None, # Labels for new edges
    node_labels:dict[str, int] | None=None, # Node virtual status (0: observed, 1: virtual)
)->DiGraph: # Updated network

Update network with new edges and labels


trace_diamonds


def trace_diamonds(
    network:DiGraph, # Biosynthetic network
    species_list:list, # Species to compare against
    network_dic:dict, # Species:network mapping
    threshold:float=0.0, # Cutoff threshold
    nb_intermediates:int=2, # Number of intermediate nodes; has to be a multiple of 2
    mode:str='presence', # Analysis mode: presence/abundance
)->DataFrame: # Path analysis results, with proportion (0-1) of how often glycan has been experimentally observed in this path (or average abundance)

Analyze diamond motif (A->B,A->C,B->D,C->D) path preferences using evolutionary data


evoprune_network


def evoprune_network(
    network:DiGraph, # Biosynthetic network
    network_dic:dict[str, networkx.classes.digraph.DiGraph] | None=None, # Species:network mapping
    species_list:list[str] | None=None, # Species to compare against
    node_attr:str='abundance', # Node attribute to use for pruning
    threshold:float=0.01, # Cutoff threshold
    nb_intermediates:int=2, # Number of intermediate nodes; has to be a multiple of 2
    mode:str='presence', # Analysis mode: presence/abundance
)->DiGraph: # Evolutionarily pruned network (with virtual node probability as a new node attribute)

Prune network using evolutionary path preferences

plot_network(evoprune_network(network))
Loading BokehJS ...
figure(
id = 'p1187', …)

highlight_network


def highlight_network(
    network:DiGraph, # Biosynthetic network
    highlight:str, # What to highlight: motif/species/abundance/conservation
    motif:str | None=None, # Motif to highlight; highlight=motif
    abundance_df:pandas.DataFrame | None=None, # Glycan abundance data; highlight=abundance
    glycan_col:str='glycan', # Glycan column name; highlight=abundance
    intensity_col:str='rel_intensity', # Intensity column name; highlight=abundance
    conservation_df:pandas.DataFrame | None=None, # Species-glycan data; highlight=conservation
    network_dic:dict[str, networkx.classes.digraph.DiGraph] | None=None, # Species:network mapping; highlight=conservation/species
    species:str | None=None, # Species to highlight; highlight=species
)->DiGraph: # Network with highlight attributes ('origin' (motif/species) or 'abundance' (abundance/conservation) node attribute)

Add visual highlighting to network nodes, to be used in plot_network


export_network


def export_network(
    network:DiGraph, # Biosynthetic network
    filepath:str, # Output path prefix, will be appended by file description and type
    other_node_attributes:list[str] | None=None, # Additional attributes for extraction
)->None: # Saves network files (edge list/labels + node IDs and labels)

Export network to Cytoscape/Gephi compatible files


get_maximum_flow


def get_maximum_flow(
    network:DiGraph, # Biosynthetic network
    source:str='Gal(b1-4)Glc-ol', # Source node
    sinks:list[str] | None=None, # Target nodes; default:all terminal nodes
)->dict: # Flow results; sink: {maximum flow value, flow path dictionary}

Estimate maximum flow and flow paths between source and sinks


get_max_flow_path


def get_max_flow_path(
    network:DiGraph, # Biosynthetic network
    flow_dict:dict, # Flow dictionary as returned by get_maximum_flow
    sink:str, # Target node
    source:str='Gal(b1-4)Glc-ol', # Source node
)->list: # Path edge list

Get path giving maximum flow value


get_reaction_flow


def get_reaction_flow(
    network:DiGraph, # Biosynthetic network
    res:dict, # Flow results as returned by get_maximum_flow
    aggregate:str | None=None, # Aggregation: sum/mean/None
)->dict[str, list[float]] | dict[str, float]: # Reaction flows (reaction: flow)

Get aggregated flows by reaction type


get_differential_biosynthesis


def get_differential_biosynthesis(
    df:pandas.DataFrame | str, # Glycan abundance data (first column: glycan sequences)
    group1:list, # First group column indices/names (or time points in longitudinal analysis)
    group2:list[str | int] | None=None, # Second group column indices/names (or time points in longitudinal analysis)
    analysis:str='reaction', # Type: reaction/flow
    paired:bool=False, # Whether samples are paired
    longitudinal:bool=False, # Whether to do perform longitudinal analysis
    id_column:str='ID', # Sample ID column for longitudinal analysis in the ID-style of participant_time_replicate
)->DataFrame: # Differential analysis results (differential flow features and statistics OR reaction changes over time

Compare biosynthetic patterns between conditions/timepoints

get_differential_biosynthesis(human_skin_O_PMC5871710_BCC, [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39],
                              [2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40], paired = True)
You're working with an alpha of 0.044390023979542614 that has been adjusted for your sample size of 40.
Mean abundance Log2FC p-val corr p-val significant Effect size
Glycan
Fuc(a1-2) 2.173742 -0.665850 0.002498 0.014987 True -0.778493
Neu5Ac(a2-6) 5.143191 -0.496132 0.007521 0.020831 True -0.668654
Gal(b1-3) 11.802605 -0.199873 0.012880 0.020831 True -0.613748
Neu5Ac(a2-8) 2.894141 -0.464140 0.013887 0.020831 True -0.605967
OS 2.249050 -0.521844 0.019236 0.023084 True -0.571950
Gal(b1-3/4) 9.231010 -0.131545 0.050540 0.050540 False -0.466800
6S 4.485673 0.302445 0.081933 0.070228 False 0.410724
Neu5Ac(a2-3/6/8) 6.634273 -0.125647 0.187364 0.140523 False -0.305830
Neu5Ac(a2-3) 11.865486 0.093563 0.419533 0.242468 False 0.184506
GlcNAc(b1-6) 6.659414 -0.002146 0.444526 0.242468 False -0.174598
Gal(b1-4) 6.659414 -0.002146 0.444526 0.242468 False -0.174598

get_biosynthetic_coherence


def get_biosynthetic_coherence(
    df:DataFrame, # Glycan abundances (glycans as index or first column, samples as columns)
    group1:list, # First group column names
    group2:list, # Second group column names
    network:networkx.classes.digraph.DiGraph | None=None, # Pre-built network; built from df if not provided
    paired:bool=False, # Whether samples are paired
)->DataFrame: # Test results with group means, difference, t-statistic, p-value, and Cohen's d

Test whether biosynthetic coherence differs between two conditions using per-sample variance-weighted R²


extend_network


def extend_network(
    network:DiGraph, # Biosynthetic network
    steps:int=1, # Number of extension steps; default:1 (becomes max_steps when auto_steps is True)
    to_extend:str | dict[str, int] | list[str]='all', # Nodes to extend (all, specific leaf node, target composition)
    strict_context:bool=False, # Whether to use network only to derive allowed reaction products; default:False
    auto_steps:bool=False, # Infer minimum steps to reach target composition; converts steps into max_steps when to_extend is a composition
)->tuple: # (Extended network, New glycans), optionally minimum number of steps from auto_steps

Extend biosynthetic network physiologically

new_network, new_glycans = extend_network(network, strict_context = True)
len(new_glycans)
20

evolution

investigating evolutionary relationships of glycans


distance_from_embeddings


def distance_from_embeddings(
    df:DataFrame, # DataFrame with glycans (rows) and taxonomic info (columns)
    embeddings:DataFrame, # DataFrame with glycans (rows) and embeddings (columns) (e.g., from glycans_to_emb)
    cut_off:int=10, # Minimum glycans per rank to be included; default:10
    rank:str='Species', # Taxonomic rank for grouping; default:Species
    averaging:str='median', # How to average embeddings: median/mean
)->DataFrame: # Rank x rank distance matrix

Calculate cosine distance matrix from learned embeddings


distance_from_metric


def distance_from_metric(
    df:DataFrame, # DataFrame with glycans (rows) and taxonomic info (columns)
    networks:list, # List of networkx networks
    metric:str='Jaccard', # Distance metric to use
    cut_off:int=10, # Minimum glycans per rank to be included; default:10
    rank:str='Species', # Taxonomic rank for grouping; default:Species
)->DataFrame: # Rank x rank distance matrix

Calculate distance matrix between networks using provided metric


dendrogram_from_distance


def dendrogram_from_distance(
    dm:DataFrame, # Rank x rank distance matrix (e.g., from distance_from_embeddings)
    ylabel:str='Mammalia', # Y-axis label
    filepath:str='', # Path to save plot including filename
)->None: # Displays or saves dendrogram plot

Plot dendrogram from distance matrix


check_conservation


def check_conservation(
    glycan:str, # Glycan or motif in IUPAC-condensed format
    df:DataFrame, # DataFrame with glycans (rows) and taxonomic levels (columns)
    network_dic:dict[str, networkx.classes.graph.Graph] | None=None, # Species:biosynthetic network mapping
    rank:str='Order', # Taxonomic level to assess
    threshold:int=5, # Minimum glycans per species to be included
    motif:bool=False, # Whether glycan is a motif vs sequence
)->dict: # Taxonomic group-to-conservation mapping

Estimate evolutionary conservation of glycans via biosynthetic networks


get_communities


def get_communities(
    network_list:list, # List of undirected biosynthetic networks
    label_list:list[str] | None=None, # Labels for community names, running_number + _ + label_list[k]  for network_list[k]; default:range(len(graph_list))
)->dict: # Community-to-glycan list mapping

Find communities for each graph in list of graphs