在python中引用bazel数据文件的正确方法是什么?

Yin*_*ong 3 python bazel

假设我有以下BUILD文件

py_library(
  name = "foo",
  src = ["foo.py"],
  data = ["//bar:data.json"],
)
Run Code Online (Sandbox Code Playgroud)

我应该如何引用data.jsoninfoo.py文件?我想要像下面这样的东西,我应该用some_path什么?

with open(os.path.join(some_path, "bar/data.json"), 'r') as fp:
    data = json.load(fp)
Run Code Online (Sandbox Code Playgroud)

我找不到太多关于*.runfiles在线的一般文档——任何指针都将不胜感激!

Lau*_*ent 6

简短回答:os.path.dirname(__file__)

这是完整的示例:

$ ls
bar/  BUILD  foo.py  WORKSPACE

$ cat BUILD
py_binary(
    name = "foo",
    srcs = ["foo.py"],
    data = ["//bar:data.json"],
)

$ cat foo.py
import json
import os

ws = os.path.dirname(__file__)
with open(os.path.join(ws, "bar/data.json"), 'r') as fp:
  print(json.load(fp))

$ cat bar/BUILD
exports_files(["data.json"])

$ bazel run :foo
Run Code Online (Sandbox Code Playgroud)

编辑:当您的包位于子目录中时,它不能很好地工作。您可能需要返回使用os.path.dirname.


小智 5

这是一个函数,它应该在我知道的所有情况下返回任何 py_binary 的运行文件根路径:

import os
import re

def find_runfiles():
    """Find the runfiles tree (useful when _not_ run from a zip file)"""
    # Follow symlinks, looking for my module space
    stub_filename = os.path.abspath(sys.argv[0])
    while True:
        # Found it?
        module_space = stub_filename + '.runfiles'
        if os.path.isdir(module_space):
            break

        runfiles_pattern = r"(.*\.runfiles)"
        matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
        if matchobj:
            module_space = matchobj.group(1)
            break

        raise RuntimeError('Cannot find .runfiles directory for %s' %
                           sys.argv[0])
    return module_space
Run Code Online (Sandbox Code Playgroud)

对于您问题中的示例,您可以像这样使用它:

with open(os.path.join(find_runfiles(), "name_of_workspace/bar/data.json"), 'r') as fp:
    data = json.load(fp)
Run Code Online (Sandbox Code Playgroud)

请注意,如果您构建 Python 应用程序的压缩可执行文件(可能使用subpar),则此功能将无济于事;对于那些你需要更多的代码。下一个片段包括get_resource_filename()and get_resource_directory(),它适用于常规的 py_binary 和 .par 二进制文件:

import atexit
import os
import re
import shutil
import sys
import tempfile
import zipfile


 def get_resource_filename(path):
    zip_path = get_zip_path(sys.modules.get("__main__").__file__)
    if zip_path:
        tmpdir = tempfile.mkdtemp()
        atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
        zf = BetterZipFile(zip_path)
        zf.extract(member=path, path=tmpdir)
        return os.path.join(tmpdir, path)
    elif os.path.exists(path):
        return path
    else:
        path_in_runfiles = os.path.join(find_runfiles(), path)
        if os.path.exists(path_in_runfiles):
            return path_in_runfiles
        else:
            raise ResourceNotFoundError


def get_resource_directory(path):
    """Find or extract an entire subtree and return its location."""
    zip_path = get_zip_path(sys.modules.get("__main__").__file__)
    if zip_path:
        tmpdir = tempfile.mkdtemp()
        atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
        zf = BetterZipFile(zip_path)
        members = []
        for fn in zf.namelist():
            if fn.startswith(path):
                members += [fn]
        zf.extractall(members=members, path=tmpdir)
        return os.path.join(tmpdir, path)
    elif os.path.exists(path):
        return path
    else:
        path_in_runfiles = os.path.join(find_runfiles(), path)
        if os.path.exists(path_in_runfiles):
            return path_in_runfiles
        else:
            raise ResourceNotFoundError


def get_zip_path(path):
    """If path is inside a zip file, return the zip file's path."""
    if path == os.path.sep:
        return None
    elif zipfile.is_zipfile(path):
        return path
    return get_zip_path(os.path.dirname(path))


class ResourceNotFoundError(RuntimeError):
    pass

def find_runfiles():
    """Find the runfiles tree (useful when _not_ run from a zip file)"""
    # Follow symlinks, looking for my module space
    stub_filename = os.path.abspath(sys.argv[0])
    while True:
        # Found it?
        module_space = stub_filename + '.runfiles'
        if os.path.isdir(module_space):
            break

        runfiles_pattern = r"(.*\.runfiles)"
        matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
        if matchobj:
            module_space = matchobj.group(1)
            break

        raise RuntimeError('Cannot find .runfiles directory for %s' %
                           sys.argv[0])
    return module_space


class BetterZipFile(zipfile.ZipFile):
    """Shim around ZipFile that preserves permissions on extract."""

    def extract(self, member, path=None, pwd=None):

        if not isinstance(member, zipfile.ZipInfo):
            member = self.getinfo(member)

        if path is None:
            path = os.getcwd()

        ret_val = self._extract_member(member, path, pwd)
        attr = member.external_attr >> 16
        os.chmod(ret_val, attr)
        return ret_val
Run Code Online (Sandbox Code Playgroud)

使用第二个代码片段,您的示例将如下所示:

with open(get_resource_filename("name_of_workspace/bar/data.json"), 'r') as fp:
    data = json.load(fp)
Run Code Online (Sandbox Code Playgroud)