Source code for bids2cite.bids2cite

"""
Add a datacite to your BIDS dataset.

details on the format of datacite for GIN: https://gin.g-node.org/G-Node/Info/wiki/DOIfile
"""

from __future__ import annotations

import json
import logging
import sys
from argparse import ArgumentParser, HelpFormatter
from pathlib import Path
from typing import Any

import ruamel.yaml
from cffconvert.cli.create_citation import create_citation
from cffconvert.cli.validate_or_write_output import validate_or_write_output
from rich import print
from rich.prompt import Prompt
from rich_argparse import RichHelpFormatter

from bids2cite._authors import authors_for_citation, authors_for_desc, update_authors
from bids2cite._license import supported_licenses, update_license
from bids2cite._references import (
    references_for_citation,
    references_for_datacite,
    update_references,
)
from bids2cite._utils import (
    bids2cite_log,
    default_log_level,
    log_levels,
    print_ordered_list,
    prompt_format,
)
from bids2cite._version import __version__

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, sequence=4, offset=2)

log = logging.getLogger("bids2datacite")


def _update_bidsignore(bids_dir: Path) -> None:
    """Update the .bidsignore file."""
    log.info("updating .bidsignore")
    bidsignore = bids_dir / ".bidsignore"
    if not bidsignore.exists():
        with bidsignore.open("w") as f:
            f.write("datacite.yml")
    else:
        with bidsignore.open("r") as f:
            content = f.read()
        if "datacite.yml" not in content:
            with bidsignore.open("a") as f:
                f.write("datacite.yml")


def _update_description(description: str | None = None, skip_prompt: bool = False) -> str:
    """Update the description of the dataset."""
    log.info("update description")
    if description not in [None, ""]:
        description = description
    elif not skip_prompt:
        description = Prompt.ask(
            prompt_format("\nPlease enter a description for the dataset")
        )
        print()
    if description is None:
        description = ""
    return description


def _update_keywords(
    keywords: list[Any] | None = None, skip_prompt: bool = False
) -> list[str]:
    """Update the keywords of the dataset."""
    log.info("updating keywords")

    if keywords is None:
        keywords = []

    if not skip_prompt:
        add_keyword = "yes"
        while add_keyword == "yes":
            print_ordered_list(msg="Current keywords:", items=keywords)
            add_keyword = Prompt.ask(
                prompt_format("Do you want to add more keywords?"),
                default="yes",
                choices=["yes", "no"],
            )
            if add_keyword != "yes":
                break
            new_keywords = Prompt.ask(
                prompt_format(
                    """Please enter keywords separated by comma
(for example: 'keyword1, keyword2')"""
                )
            )
            tmp = new_keywords.strip().split(",")
            for keyword in tmp:
                keywords.append(keyword.strip())

    return keywords


def _update_funding(ds_desc: dict[str, Any], skip_prompt: bool = False) -> list[str]:
    """Update the funding of the dataset."""
    log.info("update funding")

    funding = []
    if "Funding" in ds_desc and ds_desc["Funding"] not in [None, []]:
        funding = ds_desc["Funding"]

    if skip_prompt:
        return funding

    add_funding = "yes"
    while add_funding == "yes":
        print_ordered_list(msg="Current fundings:", items=funding)
        add_funding = Prompt.ask(
            prompt_format("Do you want to add more funding?"),
            default="yes",
            choices=["yes", "no"],
        )
        print()
        if add_funding != "yes":
            break

        grant = Prompt.ask(
            prompt_format("Please enter a funding (for example: 'EU, EU.12345')")
        )
        funding.append(grant)
    return funding


def _cli(argv: Any = sys.argv) -> None:
    """Execute the main script for CLI."""
    log = bids2cite_log(name="bids2datacite")

    parser = _common_parser(formatter_class=RichHelpFormatter)

    args = parser.parse_args(argv[1:])

    # https://stackoverflow.com/a/53293042/14223310
    log_level = log_levels().index(default_log_level())
    # For each "-v" flag, adjust the logging verbosity accordingly
    # making sure to clamp off the value from 0 to 4, inclusive of both
    for adjustment in args.log_level or ():
        log_level = min(len(log_levels()) - 1, max(log_level + adjustment, 0))
    log_level_name = log_levels()[log_level]
    log.setLevel(log_level_name)

    tmp = args.keywords.split(",") if args.keywords else []
    keywords = [x.strip() for x in tmp]

    authors_file = None
    if args.authors_file not in ["", None]:
        authors_file = Path(args.authors_file)
        if not authors_file.exists():
            authors_file = None

    licenses = supported_licenses()
    licenses_choices = list(licenses.keys())
    if args.license and args.license not in licenses_choices:
        log.error(
            f"""License '{args.license}' not supported.
        Supported types are {licenses_choices}"""
        )
        sys.exit(1)

    if args.output_format not in ["datacite", "citation"]:
        log.error(
            f"""Format '{args.output_format}' not supported.
        Supported types are 'datacite' and 'citation'"""
        )
        sys.exit(1)

    bids2cite(
        bids_dir=Path(args.bids_dir).resolve(),
        output_format=args.output_format,
        description=args.description,
        keywords=keywords,
        license=args.license,
        skip_prompt=args.skip_prompt,
        authors_file=authors_file,
    )


[docs] def bids2cite( bids_dir: Path, output_format: str, description: str | None = None, keywords: list[str] | None = None, license: str | None = None, skip_prompt: bool = False, authors_file: Path | None = None, ) -> None: # sourcery skip: merge-dict-assign """Create a datacite.yml file for a BIDS dataset.""" log = bids2cite_log(name="bids2datacite") log.info(f"bids_dir: {bids_dir}") output_dir = bids_dir / "derivatives" / "bids2cite" output_dir.mkdir(exist_ok=True, parents=True) ds_descr_file = bids_dir / "dataset_description.json" if not ds_descr_file.exists(): log.error(f"dataset_description.json not found in {bids_dir}") sys.exit(1) with ds_descr_file.open() as f: ds_desc: dict[str, Any] = json.load(f) description = _update_description(description, skip_prompt) authors = update_authors(ds_desc, skip_prompt, authors_file) references = update_references(ds_desc, skip_prompt) funding = _update_funding(ds_desc, skip_prompt) if license is not None: ds_desc["License"] = license (license_name, license_url) = update_license( bids_dir, output_dir, ds_desc, skip_prompt ) keywords = _update_keywords(keywords, skip_prompt) _update_bidsignore(bids_dir) """dataset_description.json""" ds_desc["Authors"] = authors_for_desc(authors) ds_desc["ReferencesAndLinks"] = references_for_datacite(references) ds_desc["Funding"] = funding ds_desc["License"] = license_name output_file = output_dir / "dataset_description.json" log.info(f"updating {output_file}") with output_file.open("w", encoding="utf-8") as f: json.dump(ds_desc, f, indent=4) """datacite.yml""" if output_format == "datacite": datacite: dict[str, Any] = { "authors": [], "title": ds_desc["Name"], "description": "", "keywords": [], "license": {"name": "", "url": ""}, "resourcetype": "Dataset", "references": [], "templateversion": 1.2, "funding": [], } datacite["description"] = description datacite["authors"] = authors datacite["references"] = references datacite["funding"] = funding datacite["license"]["name"] = license_name datacite["license"]["url"] = license_url datacite["keywords"] = keywords datacite_file = output_dir / "datacite.yml" log.info(f"creating {datacite_file}") with datacite_file.open("w", encoding="utf-8") as f: yaml.dump(datacite, f) """CITATION.cff""" if output_format == "citation": citation: dict[str, Any] = { "authors": [], "title": ds_desc["Name"], "message": "", "license": "", "type": "dataset", "identifiers": [], "cff-version": "1.2.0", } if keywords not in [None, []]: citation["keywords"] = keywords citation["license"] = license_name citation["authors"] = authors_for_citation(authors) citation["message"] = description if description == "": citation["message"] = "TODO" citation["identifiers"] = references_for_citation(references) citation_file = output_dir / "CITATION.cff" log.info(f"creating {citation_file}") with citation_file.open("w", encoding="utf-8") as f: yaml.dump(citation, f) citation = create_citation(infile=citation_file, url=None) validate_or_write_output( outfile=None, outputformat=None, validate_only=True, citation=citation )
def _common_parser( formatter_class: type[HelpFormatter] = HelpFormatter, ) -> ArgumentParser: """Execute the main script.""" parser = ArgumentParser( description="BIDS app to create citation file for your BIDS dataset.", epilog=""" For a more readable version of this help section, see the online ". """, formatter_class=formatter_class, ) parser.add_argument( "bids_dir", help=""" The directory with the input dataset formatted according to the BIDS standard. """, ) parser.add_argument( "-o", "--output-format", help="""Choose the output format between 'citation' for CITATION.cff and 'datacite' for datacite.yml.""", default="datacite", ) parser.add_argument( "-d", "--description", help="Description to add to the dataset.", default="" ) parser.add_argument( "-k", "--keywords", help="List of key words separated by commas to add to the citation file.", default="", ) licenses = supported_licenses() licenses_choices = list(licenses.keys()) parser.add_argument( "-l", "--license", help=f"License to add to choose from: {licenses_choices}", default=None, ) parser.add_argument( "-s", "--skip-prompt", help="If you do not want to use the prompt interface.", action="store_true", ) parser.add_argument( "--authors-file", help=""".tsv file containing list of potential new authors with the columns: first_name, last_name, ORCID (optional), affiliation (optional)""", default="", ) parser.add_argument( "--verbose", "-v", dest="log_level", action="append_const", const=-1, ) parser.add_argument( "--version", action="version", help="show program's version number and exit", version=f"{__version__}", ) return parser