Source code for invenio_records_rest.serializers.citeproc

# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016-2018 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""CSL Citation Formatter serializer for records."""

import json
import re

from citeproc import (
    Citation,
    CitationItem,
    CitationStylesBibliography,
    CitationStylesStyle,
    formatter,
)
from citeproc.source.bibtex import BibTeX
from citeproc.source.json import CiteProcJSON
from flask import has_request_context, request
from webargs import fields
from webargs.flaskparser import FlaskParser

from ..errors import StyleNotFoundRESTError

try:
    from citeproc_styles import get_style_filepath
    from citeproc_styles.errors import StyleNotFoundError
except Exception:
    import warnings

    warnings.warn(
        "citeproc_styles not found. " "Please install to enable Citeproc Serialization."
    )


[docs]class CiteprocSerializer(object): """CSL Citation Formatter serializer for records. In order to produce a formatted citation of a record through citeproc-py, we need a CSL-JSON or BibTeX serialized version of it. Since there may be already an implementation of such a serializer, it can be passed in the constructor of this class. This serializer has to implement a `serialize` method that returns the CSL-JSON/BibTeX result. """ _default_style = "harvard1" """The `citeproc-py` library supports by default the 'harvard1' style.""" _default_locale = "en-US" """The `citeproc-py` library supports by default the 'harvard1' style.""" _user_args = { "style": fields.Str(missing=_default_style), "locale": fields.Str(missing=_default_locale), } """Arguments for the webargs parser.""" _valid_formats = ("csl", "bibtex") """Supported formats by citeproc-py.""" def __init__(self, serializer, record_format="csl"): """Initialize the inner record serializer. :param serializer: Serializer object that does the record serialization to a format that `citeproc-py` can process (CSL-JSON or BibTeX). The object has to implement a `serialize` method that matches the signature of the `serialize` method of this class. :param record_format: Format that the serializer produces. """ assert record_format in self._valid_formats assert getattr(serializer, "serialize", None) assert callable(getattr(serializer, "serialize")) self.serializer = serializer self.record_format = record_format @classmethod def _get_args(cls, **kwargs): """Parse style and locale. Argument location precedence: kwargs > view_args > query """ csl_args = {"style": cls._default_style, "locale": cls._default_locale} if has_request_context(): parser = FlaskParser(locations=("view_args", "query")) csl_args.update(parser.parse(cls._user_args, request)) csl_args.update({k: kwargs[k] for k in ("style", "locale") if k in kwargs}) try: csl_args["style"] = get_style_filepath(csl_args["style"].lower()) except StyleNotFoundError: if has_request_context(): raise StyleNotFoundRESTError(csl_args["style"]) raise return csl_args def _get_source(self, data): """Get source data object for citeproc-py.""" if self.record_format == "csl": return CiteProcJSON([json.loads(data)]) elif self.record_format == "bibtex": return BibTeX(data) def _clean_result(self, text): """Remove double spaces, punctuation and escapes apostrophes.""" text = re.sub(r"\s\s+", " ", text) text = re.sub(r"\.\.+", ".", text) text = text.replace("'", "\\'") return text
[docs] def serialize(self, pid, record, links_factory=None, **kwargs): """Serialize a single record. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for record links. """ data = self.serializer.serialize(pid, record, links_factory) source = self._get_source(data) style = CitationStylesStyle(validate=False, **self._get_args(**kwargs)) bib = CitationStylesBibliography(style, source, formatter.plain) citation = Citation([CitationItem(pid.pid_value)]) bib.register(citation) return self._clean_result("".join(bib.bibliography()[0]))