phs_v1.0.1.0/build/scripts/util/md5_check.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import difflib
import hashlib
import itertools
import json
import os
import zipfile
from .pycache import pycache_enabled
from .pycache import pycache

# When set and a difference is detected, a diff of what changed is printed.
PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))

# An escape hatch that causes all targets to be rebuilt.
_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))


def get_new_metadata(input_strings, input_paths):
    new_metadata = _Metadata()
    new_metadata.add_strings(input_strings)

    for path in input_paths:
        if _is_zip_file(path):
            entries = _extract_zip_entries(path)
            new_metadata.add_zip_file(path, entries)
        else:
            new_metadata.add_file(path, _md5_for_path(path))
    return new_metadata


def get_old_metadata(record_path):
    old_metadata = None
    if os.path.exists(record_path):
        with open(record_path, 'r') as jsonfile:
            try:
                old_metadata = _Metadata.from_file(jsonfile)
            except:  # noqa: E722 pylint: disable=bare-except
                pass
    return old_metadata


def print_explanations(record_path, changes):
    if PRINT_EXPLANATIONS:
        print('=' * 80)
        print('Target is stale: %s' % record_path)
        print(changes.describe_difference())
        print('=' * 80)


def call_and_record_if_stale(
        function,  # pylint: disable=invalid-name
        record_path=None,
        input_paths=None,
        input_strings=None,
        output_paths=None,
        force=False,
        pass_changes=False):
    """Calls function if outputs are stale.

    Outputs are considered stale if:
    - any output_paths are missing, or
    - the contents of any file within input_paths has changed, or
    - the contents of input_strings has changed.

    To debug which files are out-of-date, set the environment variable:
        PRINT_MD5_DIFFS=1

    Args:
      function: The function to call.
      record_path: Path to record metadata.
        Defaults to output_paths[0] + '.md5.stamp'
      input_paths: List of paths to calculate a md5 sum on.
      input_strings: List of strings to record verbatim.
      output_paths: List of output paths.
      force: Whether to treat outputs as missing regardless of whether they
        actually are.
      pass_changes: Whether to pass a Changes instance to |function|.
    """
    assert record_path or output_paths
    input_paths = input_paths or []
    input_strings = input_strings or []
    output_paths = output_paths or []

    new_metadata = get_new_metadata(input_strings, input_paths)
    force = force or _FORCE_REBUILD
    missing_outputs = [
        x for x in output_paths if force or not os.path.exists(x)
    ]

    if pycache_enabled:
        # Input strings, input files and outputs names together compose
        # cache manifest, which is the only identifier of a python action.
        manifest = '-'.join(
            [new_metadata.strings_md5(),
             new_metadata.files_md5()] + sorted(output_paths))
        record_path = pycache.get_manifest_path('{}.manifest'.format(manifest))
        old_metadata = get_old_metadata(record_path)
    else:
        record_path = record_path or output_paths[0] + '.md5.stamp'
        # When outputs are missing, don't bother gathering change information.
        if not missing_outputs:
            old_metadata = get_old_metadata(record_path)
        else:
            old_metadata = None

    changes = Changes(old_metadata, new_metadata, force, missing_outputs)
    if not changes.has_changes():
        if not pycache_enabled:
            return
        if pycache_enabled and pycache.retrieve(output_paths, prefix=manifest):
            return

    print_explanations(record_path, changes)

    args = (changes, ) if pass_changes else ()
    function(*args)
    if pycache_enabled:
        try:
            pycache.report_cache_stat('cache_miss')
        except:  # noqa: E722 pylint: disable=bare-except
            pass
        pycache.save(output_paths, prefix=manifest)

    with open(record_path, 'w') as record:
        new_metadata.to_file(record)


class Changes(object):
    """Provides and API for querying what changed between runs."""
    def __init__(self, old_metadata, new_metadata, force, missing_outputs):
        self.old_metadata = old_metadata
        self.new_metadata = new_metadata
        self.force = force
        self.missing_outputs = missing_outputs

    def _get_old_tag(self, path, subpath=None):
        return self.old_metadata and self.old_metadata.get_tag(path, subpath)

    def has_changes(self):
        """Returns whether any changes exist."""
        return (
            self.force or not self.old_metadata or
            self.old_metadata.strings_md5() != self.new_metadata.strings_md5()
            or self.old_metadata.files_md5() != self.new_metadata.files_md5())

    def added_or_modified_only(self):
        """Returns whether the only changes were from added or modified (sub)files.

        No missing outputs, no removed paths/subpaths.
        """
        if (self.force or not self.old_metadata
                or self.old_metadata.strings_md5() !=
                self.new_metadata.strings_md5()):
            return False
        if any(self.iter_removed_paths()):
            return False
        for path in self.iter_modified_paths():
            if any(self.iter_removed_subpaths(path)):
                return False
        return True

    def iter_all_paths(self):
        """Generator for paths."""
        return self.new_metadata.iter_paths()

    def iter_all_subpaths(self, path):
        """Generator for subpaths."""
        return self.new_metadata.iter_subpaths(path)

    def iter_added_paths(self):
        """Generator for paths that were added."""
        for path in self.new_metadata.iter_paths():
            if self._get_old_tag(path) is None:
                yield path

    def iter_added_subpaths(self, path):
        """Generator for paths that were added within the given zip file."""
        for subpath in self.new_metadata.iter_subpaths(path):
            if self._get_old_tag(path, subpath) is None:
                yield subpath

    def iter_removed_paths(self):
        """Generator for paths that were removed."""
        if self.old_metadata:
            for path in self.old_metadata.iter_paths():
                if self.new_metadata.get_tag(path) is None:
                    yield path

    def iter_removed_subpaths(self, path):
        """Generator for paths that were removed within the given zip file."""
        if self.old_metadata:
            for subpath in self.old_metadata.iter_subpaths(path):
                if self.new_metadata.get_tag(path, subpath) is None:
                    yield subpath

    def iter_modified_paths(self):
        """Generator for paths whose contents have changed."""
        for path in self.new_metadata.iter_paths():
            old_tag = self._get_old_tag(path)
            new_tag = self.new_metadata.get_tag(path)
            if old_tag is not None and old_tag != new_tag:
                yield path

    def iter_modified_subpaths(self, path):
        """Generator for paths within a zip file whose contents have changed."""
        for subpath in self.new_metadata.iter_subpaths(path):
            old_tag = self._get_old_tag(path, subpath)
            new_tag = self.new_metadata.get_tag(path, subpath)
            if old_tag is not None and old_tag != new_tag:
                yield subpath

    def iter_changed_paths(self):
        """Generator for all changed paths (added/removed/modified)."""
        return itertools.chain(self.iter_removed_paths(),
                               self.iter_modified_paths(),
                               self.iter_added_paths())

    def iter_changed_subpaths(self, path):
        """Generator for paths within a zip that were added/removed/modified."""
        return itertools.chain(self.iter_removed_subpaths(path),
                               self.iter_modified_subpaths(path),
                               self.iter_added_subpaths(path))

    def describe_difference(self):
        """Returns a human-readable description of what changed."""
        if self.force:
            return 'force=True'
        elif self.old_metadata is None:
            return 'Previous stamp file not found.'

        if self.old_metadata.strings_md5() != self.new_metadata.strings_md5():
            ndiff = difflib.ndiff(self.old_metadata.get_strings(),
                                  self.new_metadata.get_strings())
            changed = [s for s in ndiff if not s.startswith(' ')]
            return 'Input strings changed:\n  ' + '\n  '.join(changed)

        if self.old_metadata.files_md5() == self.new_metadata.files_md5():
            return "There's no difference."

        lines = []
        lines.extend('Added: {}'.format(p for p in self.iter_added_paths()))
        lines.extend('Removed: {}'.format(p
                                          for p in self.iter_removed_paths()))
        for path in self.iter_modified_paths():
            lines.append('Modified: {}'.format(path))
            lines.extend('  -> Subpath added: {}'.format(
                p for p in self.iter_added_subpaths(path)))
            lines.extend('  -> Subpath removed: {}'.format(
                p for p in self.iter_removed_subpaths(path)))
            lines.extend('  -> Subpath modified: {}'.format(
                p for p in self.iter_modified_subpaths(path)))
        if lines:
            return 'Input files changed:\n  {}'.format('\n  '.join(lines))

        if self.missing_outputs:
            return 'Outputs do not exist:\n  {}'.format('\n  '.join(
                self.missing_outputs))

        return 'I have no idea what changed (there is a bug).'


class _Metadata(object):
    """Data model for tracking change metadata."""
    def __init__(self):
        self._files_md5 = None
        self._strings_md5 = None
        self._files = []
        self._strings = []
        # Map of (path, subpath) -> entry. Created upon first call to _get_entry().
        self._file_map = None

    @classmethod
    def from_file(cls, fileobj):
        """Returns a _Metadata initialized from a file object."""
        ret = cls()
        obj = json.load(fileobj)
        ret._files_md5 = obj['files-md5']
        ret._strings_md5 = obj['strings-md5']
        ret._files = obj['input-files']
        ret._strings = obj['input-strings']
        return ret

    def to_file(self, fileobj):
        """Serializes metadata to the given file object."""
        obj = {
            "files-md5": self.files_md5(),
            "strings-md5": self.strings_md5(),
            "input-files": self._files,
            "input-strings": self._strings,
        }
        json.dump(obj, fileobj, indent=2, sort_keys=True)

    def _assert_not_queried(self):
        assert self._files_md5 is None
        assert self._strings_md5 is None
        assert self._file_map is None

    def add_strings(self, values):
        self._assert_not_queried()
        self._strings.extend(str(v) for v in values)

    def add_file(self, path, tag):
        """Adds metadata for a non-zip file.

        Args:
          path: Path to the file.
          tag: A short string representative of the file contents.
        """
        self._assert_not_queried()
        self._files.append({
            'path': path,
            'tag': tag,
        })

    def add_zip_file(self, path, entries):
        """Adds metadata for a zip file.

        Args:
          path: Path to the file.
          entries: List of (subpath, tag) tuples for entries within the zip.
        """
        self._assert_not_queried()
        tag = _compute_inline_md5(
            itertools.chain((e[0] for e in entries), (e[1] for e in entries)))
        self._files.append({
            'path':
            path,
            'tag':
            tag,
            'entries': [{
                "path": e[0],
                "tag": e[1]
            } for e in entries],
        })

    def get_strings(self):
        """Returns the list of input strings."""
        return self._strings

    def files_md5(self):
        """Lazily computes and returns the aggregate md5 of input files."""
        if self._files_md5 is None:
            # Omit paths from md5 since temporary files have random names.
            self._files_md5 = _compute_inline_md5(
                self.get_tag(p) for p in sorted(self.iter_paths()))
        return self._files_md5

    def strings_md5(self):
        """Lazily computes and returns the aggregate md5 of input strings."""
        if self._strings_md5 is None:
            self._strings_md5 = _compute_inline_md5(self._strings)
        return self._strings_md5

    def _get_entry(self, path, subpath=None):
        """Returns the JSON entry for the given path / subpath."""
        if self._file_map is None:
            self._file_map = {}
            for entry in self._files:
                self._file_map[(entry['path'], None)] = entry
                for subentry in entry.get('entries', ()):
                    self._file_map[(entry['path'],
                                    subentry['path'])] = subentry
        return self._file_map.get((path, subpath))

    def get_tag(self, path, subpath=None):
        """Returns the tag for the given path / subpath."""
        ret = self._get_entry(path, subpath)
        return ret and ret['tag']

    def iter_paths(self):
        """Returns a generator for all top-level paths."""
        return (e['path'] for e in self._files)

    def iter_subpaths(self, path):
        """Returns a generator for all subpaths in the given zip.

        If the given path is not a zip file or doesn't exist, returns an empty
        iterable.
        """
        outer_entry = self._get_entry(path)
        if not outer_entry:
            return ()
        subentries = outer_entry.get('entries', [])
        return (entry['path'] for entry in subentries)


def _update_md5_for_file(md5, path, block_size=2**16):
    # record md5 of linkto for dead link.
    if os.path.islink(path):
        linkto = os.readlink(path)
        if not os.path.exists(linkto):
            md5.update(linkto.encode())
            return

    with open(path, 'rb') as infile:
        while True:
            data = infile.read(block_size)
            if not data:
                break
            md5.update(data)


def _update_md5_for_directory(md5, dir_path):
    for root, _, files in os.walk(dir_path):
        for f in files:
            _update_md5_for_file(md5, os.path.join(root, f))


def _md5_for_path(path):
    md5 = hashlib.md5()
    if os.path.isdir(path):
        _update_md5_for_directory(md5, path)
    else:
        _update_md5_for_file(md5, path)
    return md5.hexdigest()


def _compute_inline_md5(iterable):
    """Computes the md5 of the concatenated parameters."""
    md5 = hashlib.md5()
    for item in iterable:
        md5.update(str(item).encode())
    return md5.hexdigest()


def _is_zip_file(path):
    """Returns whether to treat the given file as a zip file."""
    return path[-4:] in ('.zip')


def _extract_zip_entries(path):
    """Returns a list of (path, CRC32) of all files within |path|."""
    entries = []
    with zipfile.ZipFile(path) as zip_file:
        for zip_info in zip_file.infolist():
            # Skip directories and empty files.
            if zip_info.CRC:
                entries.append(
                    (zip_info.filename, zip_info.CRC + zip_info.compress_type))
    return entries
提交 #1 2024-09-27 19:16:49 +08:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`# Copyright 2013 The Chromium Authors. All rights reserved.`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`

			`import difflib`
			`import hashlib`
			`import itertools`
			`import json`
			`import os`
			`import zipfile`
			`from .pycache import pycache_enabled`
			`from .pycache import pycache`

			`# When set and a difference is detected, a diff of what changed is printed.`
			`PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))`

			`# An escape hatch that causes all targets to be rebuilt.`
			`_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))`


			`def get_new_metadata(input_strings, input_paths):`
			`new_metadata = _Metadata()`
			`new_metadata.add_strings(input_strings)`

			`for path in input_paths:`
			`if _is_zip_file(path):`
			`entries = _extract_zip_entries(path)`
			`new_metadata.add_zip_file(path, entries)`
			`else:`
			`new_metadata.add_file(path, _md5_for_path(path))`
			`return new_metadata`


			`def get_old_metadata(record_path):`
			`old_metadata = None`
			`if os.path.exists(record_path):`
			`with open(record_path, 'r') as jsonfile:`
			`try:`
			`old_metadata = _Metadata.from_file(jsonfile)`
			`except: # noqa: E722 pylint: disable=bare-except`
			`pass`
			`return old_metadata`


			`def print_explanations(record_path, changes):`
			`if PRINT_EXPLANATIONS:`
			`print('=' * 80)`
			`print('Target is stale: %s' % record_path)`
			`print(changes.describe_difference())`
			`print('=' * 80)`


			`def call_and_record_if_stale(`
			`function, # pylint: disable=invalid-name`
			`record_path=None,`
			`input_paths=None,`
			`input_strings=None,`
			`output_paths=None,`
			`force=False,`
			`pass_changes=False):`
			`"""Calls function if outputs are stale.`

			`Outputs are considered stale if:`
			`- any output_paths are missing, or`
			`- the contents of any file within input_paths has changed, or`
			`- the contents of input_strings has changed.`

			`To debug which files are out-of-date, set the environment variable:`
			`PRINT_MD5_DIFFS=1`

			`Args:`
			`function: The function to call.`
			`record_path: Path to record metadata.`
			`Defaults to output_paths[0] + '.md5.stamp'`
			`input_paths: List of paths to calculate a md5 sum on.`
			`input_strings: List of strings to record verbatim.`
			`output_paths: List of output paths.`
			`force: Whether to treat outputs as missing regardless of whether they`
			`actually are.`
			`pass_changes: Whether to pass a Changes instance to \|function\|.`
			`"""`
			`assert record_path or output_paths`
			`input_paths = input_paths or []`
			`input_strings = input_strings or []`
			`output_paths = output_paths or []`

			`new_metadata = get_new_metadata(input_strings, input_paths)`
			`force = force or _FORCE_REBUILD`
			`missing_outputs = [`
			`x for x in output_paths if force or not os.path.exists(x)`
			`]`

			`if pycache_enabled:`
			`# Input strings, input files and outputs names together compose`
			`# cache manifest, which is the only identifier of a python action.`
			`manifest = '-'.join(`
			`[new_metadata.strings_md5(),`
			`new_metadata.files_md5()] + sorted(output_paths))`
			`record_path = pycache.get_manifest_path('{}.manifest'.format(manifest))`
			`old_metadata = get_old_metadata(record_path)`
			`else:`
			`record_path = record_path or output_paths[0] + '.md5.stamp'`
			`# When outputs are missing, don't bother gathering change information.`
			`if not missing_outputs:`
			`old_metadata = get_old_metadata(record_path)`
			`else:`
			`old_metadata = None`

			`changes = Changes(old_metadata, new_metadata, force, missing_outputs)`
			`if not changes.has_changes():`
			`if not pycache_enabled:`
			`return`
			`if pycache_enabled and pycache.retrieve(output_paths, prefix=manifest):`
			`return`

			`print_explanations(record_path, changes)`

			`args = (changes, ) if pass_changes else ()`
			`function(*args)`
			`if pycache_enabled:`
			`try:`
			`pycache.report_cache_stat('cache_miss')`
			`except: # noqa: E722 pylint: disable=bare-except`
			`pass`
			`pycache.save(output_paths, prefix=manifest)`

			`with open(record_path, 'w') as record:`
			`new_metadata.to_file(record)`


			`class Changes(object):`
			`"""Provides and API for querying what changed between runs."""`
			`def __init__(self, old_metadata, new_metadata, force, missing_outputs):`
			`self.old_metadata = old_metadata`
			`self.new_metadata = new_metadata`
			`self.force = force`
			`self.missing_outputs = missing_outputs`

			`def _get_old_tag(self, path, subpath=None):`
			`return self.old_metadata and self.old_metadata.get_tag(path, subpath)`

			`def has_changes(self):`
			`"""Returns whether any changes exist."""`
			`return (`
			`self.force or not self.old_metadata or`
			`self.old_metadata.strings_md5() != self.new_metadata.strings_md5()`
			`or self.old_metadata.files_md5() != self.new_metadata.files_md5())`

			`def added_or_modified_only(self):`
			`"""Returns whether the only changes were from added or modified (sub)files.`

			`No missing outputs, no removed paths/subpaths.`
			`"""`
			`if (self.force or not self.old_metadata`
			`or self.old_metadata.strings_md5() !=`
			`self.new_metadata.strings_md5()):`
			`return False`
			`if any(self.iter_removed_paths()):`
			`return False`
			`for path in self.iter_modified_paths():`
			`if any(self.iter_removed_subpaths(path)):`
			`return False`
			`return True`

			`def iter_all_paths(self):`
			`"""Generator for paths."""`
			`return self.new_metadata.iter_paths()`

			`def iter_all_subpaths(self, path):`
			`"""Generator for subpaths."""`
			`return self.new_metadata.iter_subpaths(path)`

			`def iter_added_paths(self):`
			`"""Generator for paths that were added."""`
			`for path in self.new_metadata.iter_paths():`
			`if self._get_old_tag(path) is None:`
			`yield path`

			`def iter_added_subpaths(self, path):`
			`"""Generator for paths that were added within the given zip file."""`
			`for subpath in self.new_metadata.iter_subpaths(path):`
			`if self._get_old_tag(path, subpath) is None:`
			`yield subpath`

			`def iter_removed_paths(self):`
			`"""Generator for paths that were removed."""`
			`if self.old_metadata:`
			`for path in self.old_metadata.iter_paths():`
			`if self.new_metadata.get_tag(path) is None:`
			`yield path`

			`def iter_removed_subpaths(self, path):`
			`"""Generator for paths that were removed within the given zip file."""`
			`if self.old_metadata:`
			`for subpath in self.old_metadata.iter_subpaths(path):`
			`if self.new_metadata.get_tag(path, subpath) is None:`
			`yield subpath`

			`def iter_modified_paths(self):`
			`"""Generator for paths whose contents have changed."""`
			`for path in self.new_metadata.iter_paths():`
			`old_tag = self._get_old_tag(path)`
			`new_tag = self.new_metadata.get_tag(path)`
			`if old_tag is not None and old_tag != new_tag:`
			`yield path`

			`def iter_modified_subpaths(self, path):`
			`"""Generator for paths within a zip file whose contents have changed."""`
			`for subpath in self.new_metadata.iter_subpaths(path):`
			`old_tag = self._get_old_tag(path, subpath)`
			`new_tag = self.new_metadata.get_tag(path, subpath)`
			`if old_tag is not None and old_tag != new_tag:`
			`yield subpath`

			`def iter_changed_paths(self):`
			`"""Generator for all changed paths (added/removed/modified)."""`
			`return itertools.chain(self.iter_removed_paths(),`
			`self.iter_modified_paths(),`
			`self.iter_added_paths())`

			`def iter_changed_subpaths(self, path):`
			`"""Generator for paths within a zip that were added/removed/modified."""`
			`return itertools.chain(self.iter_removed_subpaths(path),`
			`self.iter_modified_subpaths(path),`
			`self.iter_added_subpaths(path))`

			`def describe_difference(self):`
			`"""Returns a human-readable description of what changed."""`
			`if self.force:`
			`return 'force=True'`
			`elif self.old_metadata is None:`
			`return 'Previous stamp file not found.'`

			`if self.old_metadata.strings_md5() != self.new_metadata.strings_md5():`
			`ndiff = difflib.ndiff(self.old_metadata.get_strings(),`
			`self.new_metadata.get_strings())`
			`changed = [s for s in ndiff if not s.startswith(' ')]`
			`return 'Input strings changed:\n ' + '\n '.join(changed)`

			`if self.old_metadata.files_md5() == self.new_metadata.files_md5():`
			`return "There's no difference."`

			`lines = []`
			`lines.extend('Added: {}'.format(p for p in self.iter_added_paths()))`
			`lines.extend('Removed: {}'.format(p`
			`for p in self.iter_removed_paths()))`
			`for path in self.iter_modified_paths():`
			`lines.append('Modified: {}'.format(path))`
			`lines.extend(' -> Subpath added: {}'.format(`
			`p for p in self.iter_added_subpaths(path)))`
			`lines.extend(' -> Subpath removed: {}'.format(`
			`p for p in self.iter_removed_subpaths(path)))`
			`lines.extend(' -> Subpath modified: {}'.format(`
			`p for p in self.iter_modified_subpaths(path)))`
			`if lines:`
			`return 'Input files changed:\n {}'.format('\n '.join(lines))`

			`if self.missing_outputs:`
			`return 'Outputs do not exist:\n {}'.format('\n '.join(`
			`self.missing_outputs))`

			`return 'I have no idea what changed (there is a bug).'`


			`class _Metadata(object):`
			`"""Data model for tracking change metadata."""`
			`def __init__(self):`
			`self._files_md5 = None`
			`self._strings_md5 = None`
			`self._files = []`
			`self._strings = []`
			`# Map of (path, subpath) -> entry. Created upon first call to _get_entry().`
			`self._file_map = None`

			`@classmethod`
			`def from_file(cls, fileobj):`
			`"""Returns a _Metadata initialized from a file object."""`
			`ret = cls()`
			`obj = json.load(fileobj)`
			`ret._files_md5 = obj['files-md5']`
			`ret._strings_md5 = obj['strings-md5']`
			`ret._files = obj['input-files']`
			`ret._strings = obj['input-strings']`
			`return ret`

			`def to_file(self, fileobj):`
			`"""Serializes metadata to the given file object."""`
			`obj = {`
			`"files-md5": self.files_md5(),`
			`"strings-md5": self.strings_md5(),`
			`"input-files": self._files,`
			`"input-strings": self._strings,`
			`}`
			`json.dump(obj, fileobj, indent=2, sort_keys=True)`

			`def _assert_not_queried(self):`
			`assert self._files_md5 is None`
			`assert self._strings_md5 is None`
			`assert self._file_map is None`

			`def add_strings(self, values):`
			`self._assert_not_queried()`
			`self._strings.extend(str(v) for v in values)`

			`def add_file(self, path, tag):`
			`"""Adds metadata for a non-zip file.`

			`Args:`
			`path: Path to the file.`
			`tag: A short string representative of the file contents.`
			`"""`
			`self._assert_not_queried()`
			`self._files.append({`
			`'path': path,`
			`'tag': tag,`
			`})`

			`def add_zip_file(self, path, entries):`
			`"""Adds metadata for a zip file.`

			`Args:`
			`path: Path to the file.`
			`entries: List of (subpath, tag) tuples for entries within the zip.`
			`"""`
			`self._assert_not_queried()`
			`tag = _compute_inline_md5(`
			`itertools.chain((e[0] for e in entries), (e[1] for e in entries)))`
			`self._files.append({`
			`'path':`
			`path,`
			`'tag':`
			`tag,`
			`'entries': [{`
			`"path": e[0],`
			`"tag": e[1]`
			`} for e in entries],`
			`})`

			`def get_strings(self):`
			`"""Returns the list of input strings."""`
			`return self._strings`

			`def files_md5(self):`
			`"""Lazily computes and returns the aggregate md5 of input files."""`
			`if self._files_md5 is None:`
			`# Omit paths from md5 since temporary files have random names.`
			`self._files_md5 = _compute_inline_md5(`
			`self.get_tag(p) for p in sorted(self.iter_paths()))`
			`return self._files_md5`

			`def strings_md5(self):`
			`"""Lazily computes and returns the aggregate md5 of input strings."""`
			`if self._strings_md5 is None:`
			`self._strings_md5 = _compute_inline_md5(self._strings)`
			`return self._strings_md5`

			`def _get_entry(self, path, subpath=None):`
			`"""Returns the JSON entry for the given path / subpath."""`
			`if self._file_map is None:`
			`self._file_map = {}`
			`for entry in self._files:`
			`self._file_map[(entry['path'], None)] = entry`
			`for subentry in entry.get('entries', ()):`
			`self._file_map[(entry['path'],`
			`subentry['path'])] = subentry`
			`return self._file_map.get((path, subpath))`

			`def get_tag(self, path, subpath=None):`
			`"""Returns the tag for the given path / subpath."""`
			`ret = self._get_entry(path, subpath)`
			`return ret and ret['tag']`

			`def iter_paths(self):`
			`"""Returns a generator for all top-level paths."""`
			`return (e['path'] for e in self._files)`

			`def iter_subpaths(self, path):`
			`"""Returns a generator for all subpaths in the given zip.`

			`If the given path is not a zip file or doesn't exist, returns an empty`
			`iterable.`
			`"""`
			`outer_entry = self._get_entry(path)`
			`if not outer_entry:`
			`return ()`
			`subentries = outer_entry.get('entries', [])`
			`return (entry['path'] for entry in subentries)`


			`def _update_md5_for_file(md5, path, block_size=2**16):`
			`# record md5 of linkto for dead link.`
			`if os.path.islink(path):`
			`linkto = os.readlink(path)`
			`if not os.path.exists(linkto):`
			`md5.update(linkto.encode())`
			`return`

			`with open(path, 'rb') as infile:`
			`while True:`
			`data = infile.read(block_size)`
			`if not data:`
			`break`
			`md5.update(data)`


			`def _update_md5_for_directory(md5, dir_path):`
			`for root, _, files in os.walk(dir_path):`
			`for f in files:`
			`_update_md5_for_file(md5, os.path.join(root, f))`


			`def _md5_for_path(path):`
			`md5 = hashlib.md5()`
			`if os.path.isdir(path):`
			`_update_md5_for_directory(md5, path)`
			`else:`
			`_update_md5_for_file(md5, path)`
			`return md5.hexdigest()`


			`def _compute_inline_md5(iterable):`
			`"""Computes the md5 of the concatenated parameters."""`
			`md5 = hashlib.md5()`
			`for item in iterable:`
			`md5.update(str(item).encode())`
			`return md5.hexdigest()`


			`def _is_zip_file(path):`
			`"""Returns whether to treat the given file as a zip file."""`
			`return path[-4:] in ('.zip')`


			`def _extract_zip_entries(path):`
			`"""Returns a list of (path, CRC32) of all files within \|path\|."""`
			`entries = []`
			`with zipfile.ZipFile(path) as zip_file:`
			`for zip_info in zip_file.infolist():`
			`# Skip directories and empty files.`
			`if zip_info.CRC:`
			`entries.append(`
			`(zip_info.filename, zip_info.CRC + zip_info.compress_type))`
			`return entries`