Skip to content

save

save_cli(format, filename, timestamp)

Source code in roc/jupyter/save.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
@click.command()
@click.option(
    "-f",
    "--format",
    default="gexf",
    type=click.Choice(
        [
            "gexf",
            "gml",
            "dot",
            "graphml",
            "json-node-link",
            "json-adj",
            "cytoscape",
            "pajek",
            "matrix-market",
            "adj-list",
            "multi-adj-list",
            "edge-list",
        ],
        case_sensitive=False,
    ),
)
@click.option("--timestamp/--no-timestamp", is_flag=True, default=True)
@click.argument("filename", nargs=1, type=click.Path(), default="graph", required=False)
def save_cli(format: str, filename: str, timestamp: bool) -> None:
    ids = Node.all_ids()
    print(f"Saving {len(ids)} nodes...")  # noqa: T201
    start_time = time.time()

    # tqdm options: https://github.com/tqdm/tqdm?tab=readme-ov-file#parameters
    with tqdm(total=len(ids), desc="Nodes", unit="node", ncols=80, colour="blue") as pbar:

        def progress_update(n: Node) -> bool:
            pbar.update(1)
            return True

        G = GraphDB.to_networkx(node_ids=ids, filter=progress_update)

    # format timestamp
    if timestamp:
        # time format: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
        timestr = datetime.now().strftime("%Y.%m.%d-%H.%M.%S")
        filename = f"{filename}-{timestr}"

    print(f"Writing graph to '{filename}'...")  # noqa: T201
    match format:
        case "gexf":
            nx.write_gexf(G, f"{filename}.gexf")
        case "gml":
            nx.write_gml(G, f"{filename}.gml")
        case "dot":
            # XXX: pydot uses the 'name' attribute internally, so rename ours if it exists
            for n in G.nodes(data=True):
                if "name" in n[1]:
                    n[1]["nme"] = n[1]["name"]
                    del n[1]["name"]
            write_dot(G, f"{filename}.dot")
        case "graphml":
            nx.write_graphml(G, f"{filename}.graphml")
        # case "json-tree":
        #     with open(f"{filename}.tree.json", "w", encoding="utf8") as f:
        #         json.dump(nx.tree_data(G), f)
        case "json-node-link":
            with open(f"{filename}.node-link.json", "w", encoding="utf8") as f:
                json.dump(nx.node_link_data(G), f)
        case "json-adj":
            with open(f"{filename}.adj.json", "w", encoding="utf8") as f:
                json.dump(nx.adjacency_data(G), f)
        case "cytoscape":
            with open(f"{filename}.cytoscape.json", "w", encoding="utf8") as f:
                json.dump(nx.cytoscape_data(G), f)
        case "pajek":
            nx.write_pajek(G, f"{filename}.pajek")
        case "matrix-market":
            np_graph = nx.to_numpy_array(G)
            sp.io.mmwrite(f"{filename}.mm", np_graph)
        case "adj-list":
            nx.write_adjlist(G, f"{filename}.adjlist")
        case "multi-adj-list":
            nx.write_multiline_adjlist(G, f"{filename}.madjlist")
        case "edge-list":
            nx.write_edgelist(G, f"{filename}.edges")

    end_time = time.time()

    nc = Node.get_cache()
    ec = Edge.get_cache()
    assert len(nc) == len(ids)
    print(  # noqa: T201
        f"Saved {len(ids)} nodes and {len(ec)} edges. Elapsed time: {timedelta(seconds=(end_time-start_time))}"
    )