Source code for torch_geometric.nn.model_hub

import os.path as osp
from pathlib import Path
from typing import Any, Dict, Optional, Union

import torch

from torch_geometric.io import fs

try:
    from huggingface_hub import ModelHubMixin, hf_hub_download
except ImportError:
    ModelHubMixin = object
    hf_hub_download = None

CONFIG_NAME = 'config.json'
MODEL_HUB_ORGANIZATION = "pytorch_geometric"
MODEL_WEIGHTS_NAME = 'model.pth'
TAGS = ['graph-machine-learning']


[docs]class PyGModelHubMixin(ModelHubMixin):
    r"""A mixin for saving and loading models to the
    `Huggingface Model Hub <https://huggingface.co/docs/hub/index>`_.

    .. code-block:: python

       from torch_geometric.datasets import Planetoid
       from torch_geometric.nn import Node2Vec
       from torch_geometric.nn.model_hub import PyGModelHubMixin

       # Define your class with the mixin:
       class N2V(Node2Vec, PyGModelHubMixin):
           def __init__(self,model_name, dataset_name, model_kwargs):
               Node2Vec.__init__(self,**model_kwargs)
               PyGModelHubMixin.__init__(self, model_name,
                   dataset_name, model_kwargs)

       # Instantiate your model:
       n2v = N2V(model_name='node2vec',
           dataset_name='Cora', model_kwargs=dict(
           edge_index=data.edge_index, embedding_dim=128,
           walk_length=20, context_size=10, walks_per_node=10,
           num_negative_samples=1, p=1, q=1, sparse=True))

       # Train the model:
       ...

       # Push to the HuggingFace hub:
       repo_id = ...  # your repo id
       n2v.save_pretrained(
           local_file_path,
           push_to_hub=True,
           repo_id=repo_id,
        )

       # Load the model for inference:
       # The required arguments are the repo id/local folder, and any model
       # initialisation arguments that are not native python types (e.g
       # Node2Vec requires the edge_index argument which is not stored in the
       # model hub).
       model = N2V.from_pretrained(
           repo_id,
           model_name='node2vec',
           dataset_name='Cora',
           edge_index=data.edge_index,
       )

    Args:
        model_name (str): Name of the model.
        dataset_name (str): Name of the dataset the model was trained against.
        model_kwargs (Dict[str, Any]): The arguments to initialise the model.
    """
    def __init__(self, model_name: str, dataset_name: str, model_kwargs: Dict):
        ModelHubMixin.__init__(self)

        # Huggingface Hub API only accepts saving the config as a dict.
        # If the model is instantiated with non-native python types
        # such as torch Tensors (node2vec being an example), we have to remove
        # these as they are not json serialisable
        self.model_config = {
            k: v
            for k, v in model_kwargs.items() if type(v) in [str, int, float]
        }
        self.model_name = model_name
        self.dataset_name = dataset_name

    def construct_model_card(self, model_name: str, dataset_name: str) -> Any:
        from huggingface_hub import ModelCard, ModelCardData
        card_data = ModelCardData(
            language='en',
            license='mit',
            library_name=MODEL_HUB_ORGANIZATION,
            tags=TAGS,
            datasets=dataset_name,
            model_name=model_name,
        )
        card = ModelCard.from_template(card_data)
        return card

    def _save_pretrained(self, save_directory: Union[Path, str]):
        path = osp.join(save_directory, MODEL_WEIGHTS_NAME)
        model_to_save = self.module if hasattr(self, 'module') else self
        torch.save(model_to_save.state_dict(), path)

[docs]    def save_pretrained(self, save_directory: Union[str, Path],
                        push_to_hub: bool = False,
                        repo_id: Optional[str] = None, **kwargs):
        r"""Save a trained model to a local directory or to the HuggingFace
        model hub.

        Args:
            save_directory (str): The directory where weights are saved.
            push_to_hub (bool, optional): If :obj:`True`, push the model to the
                HuggingFace model hub. (default: :obj:`False`)
            repo_id (str, optional): The repository name in the hub.
                If not provided will default to the name of
                :obj:`save_directory` in your namespace. (default: :obj:`None`)
            **kwargs: Additional keyword arguments passed to
                :meth:`huggingface_hub.ModelHubMixin.save_pretrained`.
        """
        config = self.model_config
        # due to way huggingface hub handles the loading/saving of models,
        # the model config can end up in one of the items in the kwargs
        # this has to be removed to prevent a duplication of arguments to
        # ModelHubMixin.save_pretrained
        kwargs.pop('config', None)

        super().save_pretrained(
            save_directory=save_directory,
            config=config,
            push_to_hub=push_to_hub,
            repo_id=repo_id,
            **kwargs,
        )
        model_card = self.construct_model_card(self.model_name,
                                               self.dataset_name)
        if push_to_hub:
            model_card.push_to_hub(repo_id)

    @classmethod
    def _from_pretrained(
        cls,
        model_id,
        revision,
        cache_dir,
        force_download,
        proxies,
        resume_download,
        local_files_only,
        token,
        dataset_name='',
        model_name='',
        map_location='cpu',
        strict=False,
        **model_kwargs,
    ):
        map_location = torch.device(map_location)

        if osp.isdir(model_id):
            model_file = osp.join(model_id, MODEL_WEIGHTS_NAME)
        else:
            model_file = hf_hub_download(
                repo_id=model_id,
                filename=MODEL_WEIGHTS_NAME,
                revision=revision,
                cache_dir=cache_dir,
                force_download=force_download,
                proxies=proxies,
                resume_download=resume_download,
                token=token,
                local_files_only=local_files_only,
            )

        config = model_kwargs.pop('config', None)
        if config is not None:
            model_kwargs = {**model_kwargs, **config}

        model = cls(dataset_name, model_name, model_kwargs)

        state_dict = fs.torch_load(model_file, map_location=map_location)
        model.load_state_dict(state_dict, strict=strict)
        model.eval()

        return model

[docs]    @classmethod
    def from_pretrained(
        cls,
        pretrained_model_name_or_path: str,
        force_download: bool = False,
        resume_download: bool = False,
        proxies: Optional[Dict] = None,
        token: Optional[Union[str, bool]] = None,
        cache_dir: Optional[str] = None,
        local_files_only: bool = False,
        **model_kwargs,
    ) -> Any:
        r"""Downloads and instantiates a model from the HuggingFace hub.

        Args:
            pretrained_model_name_or_path (str): Can be either:

                - The :obj:`model_id` of a pretrained model hosted inside the
                  HuggingFace hub.

                - You can add a :obj:`revision` by appending :obj:`@` at the
                  end of :obj:`model_id` to load a specific model version.

                - A path to a directory containing the saved model weights.

                - :obj:`None` if you are both providing the configuration
                  :obj:`config` and state dictionary :obj:`state_dict`.

            force_download (bool, optional): Whether to force the
                (re-)download of the model weights and configuration files,
                overriding the cached versions if they exist.
                (default: :obj:`False`)
            resume_download (bool, optional): Whether to delete incompletely
                received files. Will attempt to resume the download if such a
                file exists. (default: :obj:`False`)
            proxies (Dict[str, str], optional): A dictionary of proxy servers
                to use by protocol or endpoint, *e.g.*,
                :obj:`{'http': 'foo.bar:3128', 'http://host': 'foo.bar:4012'}`.
                The proxies are used on each request. (default: :obj:`None`)
            token (str or bool, optional): The token to use as HTTP bearer
                authorization for remote files. If set to :obj:`True`, will use
                the token generated when running :obj:`transformers-cli login`
                (stored in :obj:`~/.huggingface`). It is **required** if you
                want to use a private model. (default: :obj:`None`)
            cache_dir (str, optional): The path to a directory in which a
                downloaded model configuration should be cached if the
                standard cache should not be used. (default: :obj:`None`)
            local_files_only (bool, optional): Whether to only look at local
                files, *i.e.* do not try to download the model.
                (default: :obj:`False`)
            **model_kwargs: Additional keyword arguments passed to the
                model during initialization.
        """
        return super().from_pretrained(
            pretrained_model_name_or_path,
            force_download=force_download,
            resume_download=resume_download,
            proxies=proxies,
            use_auth_token=token,
            cache_dir=cache_dir,
            local_files_only=local_files_only,
            **model_kwargs,
        )