Source code for tskit.tables

# MIT License
#
# Copyright (c) 2018-2020 Tskit Developers
# Copyright (c) 2017 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Tree sequence IO via the tables API.
"""
import base64
import datetime
import itertools
import json
import sys
import warnings
from typing import Any
from typing import Tuple

import attr
import numpy as np

import _tskit
import tskit
import tskit.metadata as metadata
import tskit.provenance as provenance
import tskit.util as util
from tskit import UNKNOWN_TIME

attr_options = {"slots": True, "frozen": True, "auto_attribs": True}


@attr.s(eq=False, **attr_options)
class IndividualTableRow:
    flags: int
    location: np.ndarray
    metadata: bytes

    def __eq__(self, other):
        if not isinstance(other, type(self)):
            return False
        else:
            return all(
                (
                    self.flags == other.flags,
                    np.array_equal(self.location, other.location),
                    self.metadata == other.metadata,
                )
            )

    def __neq__(self, other):
        return not self.__eq__(other)


@attr.s(**attr_options)
class NodeTableRow:
    flags: int
    time: float
    population: int
    individual: int
    metadata: bytes


@attr.s(**attr_options)
class EdgeTableRow:
    left: float
    right: float
    parent: int
    child: int
    metadata: bytes


@attr.s(**attr_options)
class MigrationTableRow:
    left: float
    right: float
    node: int
    source: int
    dest: int
    time: float
    metadata: bytes


@attr.s(**attr_options)
class SiteTableRow:
    position: float
    ancestral_state: str
    metadata: bytes


@attr.s(eq=False, **attr_options)
class MutationTableRow:
    site: int
    node: int
    derived_state: str
    parent: int
    metadata: bytes
    time: float

    def __eq__(self, other):
        return (
            isinstance(other, MutationTableRow)
            and self.site == other.site
            and self.node == other.node
            and self.derived_state == other.derived_state
            and self.parent == other.parent
            and self.metadata == other.metadata
            and (
                self.time == other.time
                or (
                    util.is_unknown_time(self.time) and util.is_unknown_time(other.time)
                )
            )
        )


@attr.s(**attr_options)
class PopulationTableRow:
    metadata: bytes


@attr.s(**attr_options)
class ProvenanceTableRow:
    timestamp: str
    record: str


@attr.s(**attr_options)
class TableCollectionIndexes:
    edge_insertion_order: np.ndarray = attr.ib(default=None)
    edge_removal_order: np.ndarray = attr.ib(default=None)

    def asdict(self):
        return attr.asdict(self, filter=lambda k, v: v is not None)

    @property
    def nbytes(self):
        return self.edge_insertion_order.nbytes + self.edge_removal_order.nbytes


def keep_with_offset(keep, data, offset):
    """
    Used when filtering _offset columns in tables
    """
    # We need the astype here for 32 bit machines
    lens = np.diff(offset).astype(np.int32)
    return (
        data[np.repeat(keep, lens)],
        np.concatenate(
            [
                np.array([0], dtype=offset.dtype),
                np.cumsum(lens[keep], dtype=offset.dtype),
            ]
        ),
    )


class BaseTable:
    """
    Superclass of high-level tables. Not intended for direct instantiation.
    """

    # The list of columns in the table. Must be set by subclasses.
    column_names = []

    def __init__(self, ll_table, row_class, **kwargs):
        self.ll_table = ll_table
        self.row_class = row_class
        super().__init__(**kwargs)

    def _check_required_args(self, **kwargs):
        for k, v in kwargs.items():
            if v is None:
                raise TypeError(f"{k} is required")

    @property
    def num_rows(self):
        return self.ll_table.num_rows

    @property
    def max_rows(self):
        return self.ll_table.max_rows

    @property
    def max_rows_increment(self):
        return self.ll_table.max_rows_increment

    @property
    def nbytes(self) -> int:
        """
        Returns the total number of bytes required to store the data
        in this table. Note that this may not be equal to
        the actual memory footprint.
        """
        # It's not ideal that we run asdict() here to do this as we're
        # currently creating copies of the column arrays, so it would
        # be more efficient to have dedicated low-level methods. However,
        # if we do have read-only views on the underlying memory for the
        # column arrays then this will be a perfectly good way of
        # computing the nbytes values and the overhead minimal.
        d = self.asdict()
        nbytes = 0
        # Some tables don't have a metadata_schema
        metadata_schema = d.pop("metadata_schema", None)
        if metadata_schema is not None:
            nbytes += len(metadata_schema.encode())
        nbytes += sum(col.nbytes for col in d.values())
        return nbytes

    def equals(self, other, ignore_metadata=False):
        """
        Returns True if  `self` and `other` are equal. By default, two tables
        are considered equal if their columns and metadata schemas are
        byte-for-byte identical.

        :param other: Another table instance
        :param bool ignore_metadata: If True exclude metadata and metadata schemas
            from the comparison.
        :return: True if other is equal to this table; False otherwise.
        :rtype: bool
        """
        # Note: most tables support ignore_metadata, we can override for those that don't
        ret = False
        if type(other) is type(self):
            ret = bool(
                self.ll_table.equals(other.ll_table, ignore_metadata=ignore_metadata)
            )
        return ret

    def __eq__(self, other):
        return self.equals(other)

    def __len__(self):
        return self.num_rows

    def __getattr__(self, name):
        if name in self.column_names:
            return getattr(self.ll_table, name)
        else:
            raise AttributeError(
                f"{self.__class__.__name__} object has no attribute {name}"
            )

    def __setattr__(self, name, value):
        if name in self.column_names:
            d = self.asdict()
            d[name] = value
            self.set_columns(**d)
        else:
            object.__setattr__(self, name, value)

    def __getitem__(self, index):
        """
        Return the specifed row of this table, decoding metadata if it is present.
        Supports negative indexing, e.g. ``table[-5]``.

        :param int index: the zero-index of the desired row
        """
        if index < 0:
            index += len(self)
        if index < 0 or index >= len(self):
            raise IndexError("Index out of bounds")
        row = self.ll_table.get_row(index)
        try:
            row = self.decode_row(row)
        except AttributeError:
            # This means the class returns the low-level row unchanged.
            pass
        return self.row_class(*row)

    def clear(self):
        """
        Deletes all rows in this table.
        """
        self.ll_table.clear()

    def reset(self):
        # Deprecated alias for clear
        self.clear()

    def truncate(self, num_rows):
        """
        Truncates this table so that the only the first ``num_rows`` are retained.

        :param int num_rows: The number of rows to retain in this table.
        """
        return self.ll_table.truncate(num_rows)

    # Pickle support
    def __getstate__(self):
        return self.asdict()

    # Unpickle support
    def __setstate__(self, state):
        self.__init__()
        self.set_columns(**state)

    def copy(self):
        """
        Returns a deep copy of this table
        """
        copy = self.__class__()
        copy.set_columns(**self.asdict())
        return copy

    def asdict(self):
        """
        Returns a dictionary mapping the names of the columns in this table
        to the corresponding numpy arrays.
        """
        ret = {col: getattr(self, col) for col in self.column_names}
        # Not all tables have metadata
        try:
            ret["metadata_schema"] = str(self.metadata_schema)
        except AttributeError:
            pass
        return ret

    def set_columns(self, **kwargs):
        """
        Sets the values for each column in this :class:`Table` using
        values provided in numpy arrays. Overwrites any data currently stored in
        the table.
        """
        raise NotImplementedError()

    def __str__(self):
        headers, rows = self._text_header_and_rows()
        return "\n".join("\t".join(row) for row in [headers] + rows)

    def _repr_html_(self):
        """
        Called by jupyter notebooks to render tables
        """
        headers, rows = self._text_header_and_rows(limit=40)
        headers = "".join(f"<th>{header}</th>" for header in headers)
        rows = (
            f"<td><em>... skipped {row[11:]} rows ...</em></td>"
            if "__skipped__" in row
            else "".join(f"<td>{cell}</td>" for cell in row)
            for row in rows
        )
        rows = "".join(f"<tr>{row}</tr>\n" for row in rows)
        return f"""
            <div>
                <style scoped="">
                    .tskit-table tbody tr th:only-of-type {{vertical-align: middle;}}
                    .tskit-table tbody tr th {{vertical-align: top;}}
                    .tskit-table tbody td {{text-align: right;}}
                </style>
                <table border="1" class="tskit-table">
                    <thead>
                        <tr>
                            {headers}
                        </tr>
                    </thead>
                    <tbody>
                        {rows}
                    </tbody>
                </table>
            </div>
        """


class MetadataMixin:
    """
    Mixin class for tables that have a metadata column.
    """

    def __init__(self):
        self.metadata_column_index = list(
            attr.fields_dict(self.row_class).keys()
        ).index("metadata")
        self._update_metadata_schema_cache_from_ll()

    def packset_metadata(self, metadatas):
        """
        Packs the specified list of metadata values and updates the ``metadata``
        and ``metadata_offset`` columns. The length of the metadatas array
        must be equal to the number of rows in the table.

        :param list metadatas: A list of metadata bytes values.
        """
        packed, offset = util.pack_bytes(metadatas)
        d = self.asdict()
        d["metadata"] = packed
        d["metadata_offset"] = offset
        self.set_columns(**d)

    @property
    def metadata_schema(self) -> metadata.MetadataSchema:
        """
        The :class:`tskit.MetadataSchema` for this table.
        """
        return self._metadata_schema_cache

    @metadata_schema.setter
    def metadata_schema(self, schema: metadata.MetadataSchema) -> None:
        self.ll_table.metadata_schema = str(schema)
        self._update_metadata_schema_cache_from_ll()

    def decode_row(self, row: Tuple[Any]) -> Tuple:
        return (
            row[: self.metadata_column_index]
            + (self._metadata_schema_cache.decode_row(row[self.metadata_column_index]),)
            + row[self.metadata_column_index + 1 :]
        )

    def _update_metadata_schema_cache_from_ll(self) -> None:
        self._metadata_schema_cache = metadata.parse_metadata_schema(
            self.ll_table.metadata_schema
        )


[docs]class IndividualTable(BaseTable, MetadataMixin):
    """
    A table defining the individuals in a tree sequence. Note that although
    each Individual has associated nodes, reference to these is not stored in
    the individual table, but rather reference to the individual is stored for
    each node in the :class:`NodeTable`.  This is similar to the way in which
    the relationship between sites and mutations is modelled.

    :warning: The numpy arrays returned by table attribute accesses are **copies**
        of the underlying data. In particular, this means that you cannot edit
        the values in the columns by updating the attribute arrays.

        **NOTE:** this behaviour may change in future.

    :ivar flags: The array of flags values.
    :vartype flags: numpy.ndarray, dtype=np.uint32
    :ivar location: The flattened array of floating point location values. See
        :ref:`sec_encoding_ragged_columns` for more details.
    :vartype location: numpy.ndarray, dtype=np.float64
    :ivar location_offset: The array of offsets into the location column. See
        :ref:`sec_encoding_ragged_columns` for more details.
    :vartype location_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata: The flattened array of binary metadata values. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata: numpy.ndarray, dtype=np.int8
    :ivar metadata_offset: The array of offsets into the metadata column. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata_schema: The metadata schema for this table's metadata column
    :vartype metadata_schema: tskit.MetadataSchema
    """

    column_names = [
        "flags",
        "location",
        "location_offset",
        "metadata",
        "metadata_offset",
    ]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.IndividualTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, IndividualTableRow)

    def _text_header_and_rows(self, limit=None):
        flags = self.flags
        location = util.unpack_arrays(self.location, self.location_offset)
        metadata = util.unpack_bytes(self.metadata, self.metadata_offset)
        headers = ("id", "flags", "location", "metadata")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                md = base64.b64encode(metadata[j]).decode("utf8")
                location_str = ",".join(map(str, location[j]))
                rows.append(
                    "{}\t{}\t{}\t{}".format(j, flags[j], location_str, md).split("\t")
                )
        return headers, rows

[docs]    def add_row(self, flags=0, location=None, metadata=None):
        """
        Adds a new row to this :class:`IndividualTable` and returns the ID of the
        corresponding individual. Metadata, if specified, will be validated and encoded
        according to the table's
        :attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.

        :param int flags: The bitwise flags for the new node.
        :param array-like location: A list of numeric values or one-dimensional numpy
            array describing the location of this individual. If not specified
            or None, a zero-dimensional location is stored.
        :param object metadata: Any object that is valid metadata for the table's schema.
        :return: The ID of the newly added node.
        :rtype: int
        """
        metadata = self.metadata_schema.validate_and_encode_row(metadata)
        return self.ll_table.add_row(flags=flags, location=location, metadata=metadata)

[docs]    def set_columns(
        self,
        flags=None,
        location=None,
        location_offset=None,
        metadata=None,
        metadata_offset=None,
        metadata_schema=None,
    ):
        """
        Sets the values for each column in this :class:`IndividualTable` using the
        values in the specified arrays. Overwrites any data currently stored in
        the table.

        The ``flags`` array is mandatory and defines the number of individuals
        the table will contain.
        The ``location`` and ``location_offset`` parameters must be supplied
        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
        The ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param flags: The bitwise flags for each individual. Required.
        :type flags: numpy.ndarray, dtype=np.uint32
        :param location: The flattened location array. Must be specified along
            with ``location_offset``. If not specified or None, an empty location
            value is stored for each individual.
        :type location: numpy.ndarray, dtype=np.float64
        :param location_offset: The offsets into the ``location`` array.
        :type location_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each individual.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata_schema: The encoded metadata schema.
        """
        self._check_required_args(flags=flags)
        self.ll_table.set_columns(
            dict(
                flags=flags,
                location=location,
                location_offset=location_offset,
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=metadata_schema,
            )
        )

[docs]    def append_columns(
        self,
        flags=None,
        location=None,
        location_offset=None,
        metadata=None,
        metadata_offset=None,
    ):
        """
        Appends the specified arrays to the end of the columns in this
        :class:`IndividualTable`. This allows many new rows to be added at once.

        The ``flags`` array is mandatory and defines the number of
        extra individuals to add to the table.
        The ``location`` and ``location_offset`` parameters must be supplied
        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
        The ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param flags: The bitwise flags for each individual. Required.
        :type flags: numpy.ndarray, dtype=np.uint32
        :param location: The flattened location array. Must be specified along
            with ``location_offset``. If not specified or None, an empty location
            value is stored for each individual.
        :type location: numpy.ndarray, dtype=np.float64
        :param location_offset: The offsets into the ``location`` array.
        :type location_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each individual.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        """
        self._check_required_args(flags=flags)
        self.ll_table.append_columns(
            dict(
                flags=flags,
                location=location,
                location_offset=location_offset,
                metadata=metadata,
                metadata_offset=metadata_offset,
            )
        )

[docs]    def packset_location(self, locations):
        """
        Packs the specified list of location values and updates the ``location``
        and ``location_offset`` columns. The length of the locations array
        must be equal to the number of rows in the table.

        :param list locations: A list of locations interpreted as numpy float64
            arrays.
        """
        packed, offset = util.pack_arrays(locations)
        d = self.asdict()
        d["location"] = packed
        d["location_offset"] = offset
        self.set_columns(**d)


[docs]class NodeTable(BaseTable, MetadataMixin):
    """
    A table defining the nodes in a tree sequence. See the
    :ref:`definitions <sec_node_table_definition>` for details on the columns
    in this table and the
    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
    for the properties needed for a node table to be a part of a valid tree sequence.

    :warning: The numpy arrays returned by table attribute accesses are **copies**
        of the underlying data. In particular, this means that you cannot edit
        the values in the columns by updating the attribute arrays.

        **NOTE:** this behaviour may change in future.

    :ivar time: The array of time values.
    :vartype time: numpy.ndarray, dtype=np.float64
    :ivar flags: The array of flags values.
    :vartype flags: numpy.ndarray, dtype=np.uint32
    :ivar population: The array of population IDs.
    :vartype population: numpy.ndarray, dtype=np.int32
    :ivar individual: The array of individual IDs that each node belongs to.
    :vartype individual: numpy.ndarray, dtype=np.int32
    :ivar metadata: The flattened array of binary metadata values. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata: numpy.ndarray, dtype=np.int8
    :ivar metadata_offset: The array of offsets into the metadata column. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata_schema: The metadata schema for this table's metadata column
    :vartype metadata_schema: tskit.MetadataSchema
    """

    column_names = [
        "time",
        "flags",
        "population",
        "individual",
        "metadata",
        "metadata_offset",
    ]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.NodeTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, NodeTableRow)

    def _text_header_and_rows(self, limit=None):
        time = self.time
        flags = self.flags
        population = self.population
        individual = self.individual
        metadata = util.unpack_bytes(self.metadata, self.metadata_offset)
        headers = ("id", "flags", "population", "individual", "time", "metadata")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                md = base64.b64encode(metadata[j]).decode("utf8")
                rows.append(
                    "{}\t{}\t{}\t{}\t{:.14f}\t{}".format(
                        j, flags[j], population[j], individual[j], time[j], md
                    ).split("\t")
                )
        return headers, rows

[docs]    def add_row(self, flags=0, time=0, population=-1, individual=-1, metadata=None):
        """
        Adds a new row to this :class:`NodeTable` and returns the ID of the
        corresponding node. Metadata, if specified, will be validated and encoded
        according to the table's
        :attr:`metadata_schema<tskit.NodeTable.metadata_schema>`.

        :param int flags: The bitwise flags for the new node.
        :param float time: The birth time for the new node.
        :param int population: The ID of the population in which the new node was born.
            Defaults to :data:`tskit.NULL`.
        :param int individual: The ID of the individual in which the new node was born.
            Defaults to :data:`tskit.NULL`.
        :param object metadata: Any object that is valid metadata for the table's schema.
        :return: The ID of the newly added node.
        :rtype: int
        """
        metadata = self.metadata_schema.validate_and_encode_row(metadata)
        return self.ll_table.add_row(flags, time, population, individual, metadata)

[docs]    def set_columns(
        self,
        flags=None,
        time=None,
        population=None,
        individual=None,
        metadata=None,
        metadata_offset=None,
        metadata_schema=None,
    ):
        """
        Sets the values for each column in this :class:`NodeTable` using the values in
        the specified arrays. Overwrites any data currently stored in the table.

        The ``flags``, ``time`` and ``population`` arrays must all be of the same length,
        which is equal to the number of nodes the table will contain. The
        ``metadata`` and ``metadata_offset`` parameters must be supplied together, and
        meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param flags: The bitwise flags for each node. Required.
        :type flags: numpy.ndarray, dtype=np.uint32
        :param time: The time values for each node. Required.
        :type time: numpy.ndarray, dtype=np.float64
        :param population: The population values for each node. If not specified
            or None, the :data:`tskit.NULL` value is stored for each node.
        :type population: numpy.ndarray, dtype=np.int32
        :param individual: The individual values for each node. If not specified
            or None, the :data:`tskit.NULL` value is stored for each node.
        :type individual: numpy.ndarray, dtype=np.int32
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata_schema: The encoded metadata schema.
        """
        self._check_required_args(flags=flags, time=time)
        self.ll_table.set_columns(
            dict(
                flags=flags,
                time=time,
                population=population,
                individual=individual,
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=metadata_schema,
            )
        )

[docs]    def append_columns(
        self,
        flags=None,
        time=None,
        population=None,
        individual=None,
        metadata=None,
        metadata_offset=None,
    ):
        """
        Appends the specified arrays to the end of the columns in this
        :class:`NodeTable`. This allows many new rows to be added at once.

        The ``flags``, ``time`` and ``population`` arrays must all be of the same length,
        which is equal to the number of nodes that will be added to the table. The
        ``metadata`` and ``metadata_offset`` parameters must be supplied together, and
        meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param flags: The bitwise flags for each node. Required.
        :type flags: numpy.ndarray, dtype=np.uint32
        :param time: The time values for each node. Required.
        :type time: numpy.ndarray, dtype=np.float64
        :param population: The population values for each node. If not specified
            or None, the :data:`tskit.NULL` value is stored for each node.
        :type population: numpy.ndarray, dtype=np.int32
        :param individual: The individual values for each node. If not specified
            or None, the :data:`tskit.NULL` value is stored for each node.
        :type individual: numpy.ndarray, dtype=np.int32
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        """
        self._check_required_args(flags=flags, time=time)
        self.ll_table.append_columns(
            dict(
                flags=flags,
                time=time,
                population=population,
                individual=individual,
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=None,
            )
        )


[docs]class EdgeTable(BaseTable, MetadataMixin):
    """
    A table defining the edges in a tree sequence. See the
    :ref:`definitions <sec_edge_table_definition>` for details on the columns
    in this table and the
    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
    for the properties needed for an edge table to be a part of a valid tree sequence.

    :warning: The numpy arrays returned by table attribute accesses are **copies**
        of the underlying data. In particular, this means that you cannot edit
        the values in the columns by updating the attribute arrays.

        **NOTE:** this behaviour may change in future.

    :ivar left: The array of left coordinates.
    :vartype left: numpy.ndarray, dtype=np.float64
    :ivar right: The array of right coordinates.
    :vartype right: numpy.ndarray, dtype=np.float64
    :ivar parent: The array of parent node IDs.
    :vartype parent: numpy.ndarray, dtype=np.int32
    :ivar child: The array of child node IDs.
    :vartype child: numpy.ndarray, dtype=np.int32
    :ivar metadata: The flattened array of binary metadata values. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata: numpy.ndarray, dtype=np.int8
    :ivar metadata_offset: The array of offsets into the metadata column. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata_schema: The metadata schema for this table's metadata column
    :vartype metadata_schema: tskit.MetadataSchema
    """

    column_names = [
        "left",
        "right",
        "parent",
        "child",
        "metadata",
        "metadata_offset",
    ]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.EdgeTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, EdgeTableRow)

    def _text_header_and_rows(self, limit=None):
        left = self.left
        right = self.right
        parent = self.parent
        child = self.child
        metadata = util.unpack_bytes(self.metadata, self.metadata_offset)
        headers = ("id", "left\t", "right\t", "parent", "child", "metadata")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                md = base64.b64encode(metadata[j]).decode("utf8")
                rows.append(
                    "{}\t{:.8f}\t{:.8f}\t{}\t{}\t{}".format(
                        j, left[j], right[j], parent[j], child[j], md
                    ).split("\t")
                )
        return headers, rows

[docs]    def add_row(self, left, right, parent, child, metadata=None):
        """
        Adds a new row to this :class:`EdgeTable` and returns the ID of the
        corresponding edge. Metadata, if specified, will be validated and encoded
        according to the table's
        :attr:`metadata_schema<tskit.EdgeTable.metadata_schema>`.

        :param float left: The left coordinate (inclusive).
        :param float right: The right coordinate (exclusive).
        :param int parent: The ID of parent node.
        :param int child: The ID of child node.
        :param object metadata: Any object that is valid metadata for the table's schema.
        :return: The ID of the newly added edge.
        :rtype: int
        """
        metadata = self.metadata_schema.validate_and_encode_row(metadata)
        return self.ll_table.add_row(left, right, parent, child, metadata)

[docs]    def set_columns(
        self,
        left=None,
        right=None,
        parent=None,
        child=None,
        metadata=None,
        metadata_offset=None,
        metadata_schema=None,
    ):
        """
        Sets the values for each column in this :class:`EdgeTable` using the values
        in the specified arrays. Overwrites any data currently stored in the table.

        The ``left``, ``right``, ``parent`` and ``child`` parameters are mandatory,
        and must be numpy arrays of the same length (which is equal to the number of
        edges the table will contain).
        The ``metadata`` and ``metadata_offset`` parameters must be supplied together,
        and meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.


        :param left: The left coordinates (inclusive).
        :type left: numpy.ndarray, dtype=np.float64
        :param right: The right coordinates (exclusive).
        :type right: numpy.ndarray, dtype=np.float64
        :param parent: The parent node IDs.
        :type parent: numpy.ndarray, dtype=np.int32
        :param child: The child node IDs.
        :type child: numpy.ndarray, dtype=np.int32
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata_schema: The encoded metadata schema.
        """
        self._check_required_args(left=left, right=right, parent=parent, child=child)
        self.ll_table.set_columns(
            dict(
                left=left,
                right=right,
                parent=parent,
                child=child,
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=metadata_schema,
            )
        )

[docs]    def append_columns(
        self, left, right, parent, child, metadata=None, metadata_offset=None
    ):
        """
        Appends the specified arrays to the end of the columns of this
        :class:`EdgeTable`. This allows many new rows to be added at once.

        The ``left``, ``right``, ``parent`` and ``child`` parameters are mandatory,
        and must be numpy arrays of the same length (which is equal to the number of
        additional edges to add to the table). The ``metadata`` and
        ``metadata_offset`` parameters must be supplied together, and
        meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.


        :param left: The left coordinates (inclusive).
        :type left: numpy.ndarray, dtype=np.float64
        :param right: The right coordinates (exclusive).
        :type right: numpy.ndarray, dtype=np.float64
        :param parent: The parent node IDs.
        :type parent: numpy.ndarray, dtype=np.int32
        :param child: The child node IDs.
        :type child: numpy.ndarray, dtype=np.int32
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        """
        self.ll_table.append_columns(
            dict(
                left=left,
                right=right,
                parent=parent,
                child=child,
                metadata=metadata,
                metadata_offset=metadata_offset,
            )
        )

[docs]    def squash(self):
        """
        Sorts, then condenses the table into the smallest possible number of rows by
        combining any adjacent edges.
        A pair of edges is said to be `adjacent` if they have the same parent and child
        nodes, and if the left coordinate of one of the edges is equal to the right
        coordinate of the other edge.
        The ``squash`` method modifies an :class:`EdgeTable` in place so that any set of
        adjacent edges is replaced by a single edge.
        The new edge will have the same parent and child node, a left coordinate
        equal to the smallest left coordinate in the set, and a right coordinate
        equal to the largest right coordinate in the set.
        The new edge table will be sorted in the canonical order (P, C, L, R).
        """
        self.ll_table.squash()


[docs]class MigrationTable(BaseTable, MetadataMixin):
    """
    A table defining the migrations in a tree sequence. See the
    :ref:`definitions <sec_migration_table_definition>` for details on the columns
    in this table and the
    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
    for the properties needed for a migration table to be a part of a valid tree
    sequence.

    :warning: The numpy arrays returned by table attribute accesses are **copies**
        of the underlying data. In particular, this means that you cannot edit
        the values in the columns by updating the attribute arrays.

        **NOTE:** this behaviour may change in future.

    :ivar left: The array of left coordinates.
    :vartype left: numpy.ndarray, dtype=np.float64
    :ivar right: The array of right coordinates.
    :vartype right: numpy.ndarray, dtype=np.float64
    :ivar node: The array of node IDs.
    :vartype node: numpy.ndarray, dtype=np.int32
    :ivar source: The array of source population IDs.
    :vartype source: numpy.ndarray, dtype=np.int32
    :ivar dest: The array of destination population IDs.
    :vartype dest: numpy.ndarray, dtype=np.int32
    :ivar time: The array of time values.
    :vartype time: numpy.ndarray, dtype=np.float64
    :ivar metadata: The flattened array of binary metadata values. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata: numpy.ndarray, dtype=np.int8
    :ivar metadata_offset: The array of offsets into the metadata column. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata_schema: The metadata schema for this table's metadata column
    :vartype metadata_schema: tskit.MetadataSchema
    """

    column_names = [
        "left",
        "right",
        "node",
        "source",
        "dest",
        "time",
        "metadata",
        "metadata_offset",
    ]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.MigrationTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, MigrationTableRow)

    def _text_header_and_rows(self, limit=None):
        left = self.left
        right = self.right
        node = self.node
        source = self.source
        dest = self.dest
        time = self.time
        metadata = util.unpack_bytes(self.metadata, self.metadata_offset)
        headers = ("id", "left", "right", "node", "source", "dest", "time", "metadata")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                md = base64.b64encode(metadata[j]).decode("utf8")
                rows.append(
                    "{}\t{:.8f}\t{:.8f}\t{}\t{}\t{}\t{:.8f}\t{}".format(
                        j, left[j], right[j], node[j], source[j], dest[j], time[j], md
                    ).split("\t")
                )
        return headers, rows

[docs]    def add_row(self, left, right, node, source, dest, time, metadata=None):
        """
        Adds a new row to this :class:`MigrationTable` and returns the ID of the
        corresponding migration. Metadata, if specified, will be validated and encoded
        according to the table's
        :attr:`metadata_schema<tskit.MigrationTable.metadata_schema>`.

        :param float left: The left coordinate (inclusive).
        :param float right: The right coordinate (exclusive).
        :param int node: The node ID.
        :param int source: The ID of the source population.
        :param int dest: The ID of the destination population.
        :param float time: The time of the migration event.
        :param object metadata: Any object that is valid metadata for the table's schema.
        :return: The ID of the newly added migration.
        :rtype: int
        """
        metadata = self.metadata_schema.validate_and_encode_row(metadata)
        return self.ll_table.add_row(left, right, node, source, dest, time, metadata)

[docs]    def set_columns(
        self,
        left=None,
        right=None,
        node=None,
        source=None,
        dest=None,
        time=None,
        metadata=None,
        metadata_offset=None,
        metadata_schema=None,
    ):
        """
        Sets the values for each column in this :class:`MigrationTable` using the values
        in the specified arrays. Overwrites any data currently stored in the table.

        All parameters except ``metadata`` and ``metadata_offset`` and are mandatory,
        and must be numpy arrays of the same length (which is equal to the number of
        migrations the table will contain).
        The ``metadata`` and ``metadata_offset`` parameters must be supplied together,
        and meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param left: The left coordinates (inclusive).
        :type left: numpy.ndarray, dtype=np.float64
        :param right: The right coordinates (exclusive).
        :type right: numpy.ndarray, dtype=np.float64
        :param node: The node IDs.
        :type node: numpy.ndarray, dtype=np.int32
        :param source: The source population IDs.
        :type source: numpy.ndarray, dtype=np.int32
        :param dest: The destination population IDs.
        :type dest: numpy.ndarray, dtype=np.int32
        :param time: The time of each migration.
        :type time: numpy.ndarray, dtype=np.int64
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each migration.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata_schema: The encoded metadata schema.
        """
        self._check_required_args(
            left=left, right=right, node=node, source=source, dest=dest, time=time
        )
        self.ll_table.set_columns(
            dict(
                left=left,
                right=right,
                node=node,
                source=source,
                dest=dest,
                time=time,
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=metadata_schema,
            )
        )

[docs]    def append_columns(
        self,
        left,
        right,
        node,
        source,
        dest,
        time,
        metadata=None,
        metadata_offset=None,
    ):
        """
        Appends the specified arrays to the end of the columns of this
        :class:`MigrationTable`. This allows many new rows to be added at once.

        All parameters except ``metadata`` and ``metadata_offset`` and are mandatory,
        and must be numpy arrays of the same length (which is equal to the number of
        additional migrations to add to the table). The ``metadata`` and
        ``metadata_offset`` parameters must be supplied together, and
        meet the requirements for :ref:`sec_encoding_ragged_columns`.
        See :ref:`sec_tables_api_binary_columns` for more information and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param left: The left coordinates (inclusive).
        :type left: numpy.ndarray, dtype=np.float64
        :param right: The right coordinates (exclusive).
        :type right: numpy.ndarray, dtype=np.float64
        :param node: The node IDs.
        :type node: numpy.ndarray, dtype=np.int32
        :param source: The source population IDs.
        :type source: numpy.ndarray, dtype=np.int32
        :param dest: The destination population IDs.
        :type dest: numpy.ndarray, dtype=np.int32
        :param time: The time of each migration.
        :type time: numpy.ndarray, dtype=np.int64
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each migration.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        """
        self.ll_table.append_columns(
            dict(
                left=left,
                right=right,
                node=node,
                source=source,
                dest=dest,
                time=time,
                metadata=metadata,
                metadata_offset=metadata_offset,
            )
        )


[docs]class SiteTable(BaseTable, MetadataMixin):
    """
    A table defining the sites in a tree sequence. See the
    :ref:`definitions <sec_site_table_definition>` for details on the columns
    in this table and the
    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
    for the properties needed for a site table to be a part of a valid tree
    sequence.

    :warning: The numpy arrays returned by table attribute accesses are **copies**
        of the underlying data. In particular, this means that you cannot edit
        the values in the columns by updating the attribute arrays.

        **NOTE:** this behaviour may change in future.

    :ivar position: The array of site position coordinates.
    :vartype position: numpy.ndarray, dtype=np.float64
    :ivar ancestral_state: The flattened array of ancestral state strings.
        See :ref:`sec_tables_api_text_columns` for more details.
    :vartype ancestral_state: numpy.ndarray, dtype=np.int8
    :ivar ancestral_state_offset: The offsets of rows in the ancestral_state
        array. See :ref:`sec_tables_api_text_columns` for more details.
    :vartype ancestral_state_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata: The flattened array of binary metadata values. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata: numpy.ndarray, dtype=np.int8
    :ivar metadata_offset: The array of offsets into the metadata column. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata_schema: The metadata schema for this table's metadata column
    :vartype metadata_schema: tskit.MetadataSchema
    """

    column_names = [
        "position",
        "ancestral_state",
        "ancestral_state_offset",
        "metadata",
        "metadata_offset",
    ]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.SiteTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, SiteTableRow)

    def _text_header_and_rows(self, limit=None):
        position = self.position
        ancestral_state = util.unpack_strings(
            self.ancestral_state, self.ancestral_state_offset
        )
        metadata = util.unpack_bytes(self.metadata, self.metadata_offset)
        headers = ("id", "position", "ancestral_state", "metadata")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                md = base64.b64encode(metadata[j]).decode("utf8")
                rows.append(
                    "{}\t{:.8f}\t{}\t{}".format(
                        j, position[j], ancestral_state[j], md
                    ).split("\t")
                )
        return headers, rows

[docs]    def add_row(self, position, ancestral_state, metadata=None):
        """
        Adds a new row to this :class:`SiteTable` and returns the ID of the
        corresponding site. Metadata, if specified, will be validated and encoded
        according to the table's
        :attr:`metadata_schema<tskit.SiteTable.metadata_schema>`.

        :param float position: The position of this site in genome coordinates.
        :param str ancestral_state: The state of this site at the root of the tree.
        :param object metadata: Any object that is valid metadata for the table's schema.
        :return: The ID of the newly added site.
        :rtype: int
        """
        metadata = self.metadata_schema.validate_and_encode_row(metadata)
        return self.ll_table.add_row(position, ancestral_state, metadata)

[docs]    def set_columns(
        self,
        position=None,
        ancestral_state=None,
        ancestral_state_offset=None,
        metadata=None,
        metadata_offset=None,
        metadata_schema=None,
    ):
        """
        Sets the values for each column in this :class:`SiteTable` using the values
        in the specified arrays. Overwrites any data currently stored in the table.

        The ``position``, ``ancestral_state`` and ``ancestral_state_offset``
        parameters are mandatory, and must be 1D numpy arrays. The length
        of the ``position`` array determines the number of rows in table.
        The ``ancestral_state`` and ``ancestral_state_offset`` parameters must
        be supplied together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_text_columns` for more information). The
        ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information) and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param position: The position of each site in genome coordinates.
        :type position: numpy.ndarray, dtype=np.float64
        :param ancestral_state: The flattened ancestral_state array. Required.
        :type ancestral_state: numpy.ndarray, dtype=np.int8
        :param ancestral_state_offset: The offsets into the ``ancestral_state`` array.
        :type ancestral_state_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata_schema: The encoded metadata schema.
        """
        self._check_required_args(
            position=position,
            ancestral_state=ancestral_state,
            ancestral_state_offset=ancestral_state_offset,
        )
        self.ll_table.set_columns(
            dict(
                position=position,
                ancestral_state=ancestral_state,
                ancestral_state_offset=ancestral_state_offset,
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=metadata_schema,
            )
        )

[docs]    def append_columns(
        self,
        position,
        ancestral_state,
        ancestral_state_offset,
        metadata=None,
        metadata_offset=None,
    ):
        """
        Appends the specified arrays to the end of the columns of this
        :class:`SiteTable`. This allows many new rows to be added at once.

        The ``position``, ``ancestral_state`` and ``ancestral_state_offset``
        parameters are mandatory, and must be 1D numpy arrays. The length
        of the ``position`` array determines the number of additional rows
        to add the table.
        The ``ancestral_state`` and ``ancestral_state_offset`` parameters must
        be supplied together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_text_columns` for more information). The
        ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information) and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param position: The position of each site in genome coordinates.
        :type position: numpy.ndarray, dtype=np.float64
        :param ancestral_state: The flattened ancestral_state array. Required.
        :type ancestral_state: numpy.ndarray, dtype=np.int8
        :param ancestral_state_offset: The offsets into the ``ancestral_state`` array.
        :type ancestral_state_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        """
        self.ll_table.append_columns(
            dict(
                position=position,
                ancestral_state=ancestral_state,
                ancestral_state_offset=ancestral_state_offset,
                metadata=metadata,
                metadata_offset=metadata_offset,
            )
        )

[docs]    def packset_ancestral_state(self, ancestral_states):
        """
        Packs the specified list of ancestral_state values and updates the
        ``ancestral_state`` and ``ancestral_state_offset`` columns. The length
        of the ancestral_states array must be equal to the number of rows in
        the table.

        :param list(str) ancestral_states: A list of string ancestral state values.
        """
        packed, offset = util.pack_strings(ancestral_states)
        d = self.asdict()
        d["ancestral_state"] = packed
        d["ancestral_state_offset"] = offset
        self.set_columns(**d)


[docs]class MutationTable(BaseTable, MetadataMixin):
    """
    A table defining the mutations in a tree sequence. See the
    :ref:`definitions <sec_mutation_table_definition>` for details on the columns
    in this table and the
    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
    for the properties needed for a mutation table to be a part of a valid tree
    sequence.

    :warning: The numpy arrays returned by table attribute accesses are **copies**
        of the underlying data. In particular, this means that you cannot edit
        the values in the columns by updating the attribute arrays.

        **NOTE:** this behaviour may change in future.

    :ivar site: The array of site IDs.
    :vartype site: numpy.ndarray, dtype=np.int32
    :ivar node: The array of node IDs.
    :vartype node: numpy.ndarray, dtype=np.int32
    :ivar time: The array of time values.
    :vartype time: numpy.ndarray, dtype=np.float64
    :ivar derived_state: The flattened array of derived state strings.
        See :ref:`sec_tables_api_text_columns` for more details.
    :vartype derived_state: numpy.ndarray, dtype=np.int8
    :ivar derived_state_offset: The offsets of rows in the derived_state
        array. See :ref:`sec_tables_api_text_columns` for more details.
    :vartype derived_state_offset: numpy.ndarray, dtype=np.uint32
    :ivar parent: The array of parent mutation IDs.
    :vartype parent: numpy.ndarray, dtype=np.int32
    :ivar metadata: The flattened array of binary metadata values. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata: numpy.ndarray, dtype=np.int8
    :ivar metadata_offset: The array of offsets into the metadata column. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata_schema: The metadata schema for this table's metadata column
    :vartype metadata_schema: tskit.MetadataSchema
    """

    column_names = [
        "site",
        "node",
        "time",
        "derived_state",
        "derived_state_offset",
        "parent",
        "metadata",
        "metadata_offset",
    ]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.MutationTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, MutationTableRow)

    def _text_header_and_rows(self, limit=None):
        site = self.site
        node = self.node
        parent = self.parent
        time = self.time
        derived_state = util.unpack_strings(
            self.derived_state, self.derived_state_offset
        )
        metadata = util.unpack_bytes(self.metadata, self.metadata_offset)
        headers = ("id", "site", "node", "time", "derived_state", "parent", "metadata")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                md = base64.b64encode(metadata[j]).decode("utf8")
                rows.append(
                    "{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
                        j, site[j], node[j], time[j], derived_state[j], parent[j], md
                    ).split("\t")
                )
        return headers, rows

[docs]    def add_row(self, site, node, derived_state, parent=-1, metadata=None, time=None):
        """
        Adds a new row to this :class:`MutationTable` and returns the ID of the
        corresponding mutation. Metadata, if specified, will be validated and encoded
        according to the table's
        :attr:`metadata_schema<tskit.MutationTable.metadata_schema>`.

        :param int site: The ID of the site that this mutation occurs at.
        :param int node: The ID of the first node inheriting this mutation.
        :param str derived_state: The state of the site at this mutation's node.
        :param int parent: The ID of the parent mutation. If not specified,
            defaults to :attr:`NULL`.
        :param object metadata: Any object that is valid metadata for the table's schema.
        :return: The ID of the newly added mutation.
        :param float time: The occurrence time for the new mutation. If not specified,
            defaults to ``UNKNOWN_TIME``, indicating the time is unknown.
        :rtype: int
        """
        metadata = self.metadata_schema.validate_and_encode_row(metadata)
        return self.ll_table.add_row(
            site,
            node,
            derived_state,
            parent,
            metadata,
            UNKNOWN_TIME if time is None else time,
        )

[docs]    def set_columns(
        self,
        site=None,
        node=None,
        time=None,
        derived_state=None,
        derived_state_offset=None,
        parent=None,
        metadata=None,
        metadata_offset=None,
        metadata_schema=None,
    ):
        """
        Sets the values for each column in this :class:`MutationTable` using the values
        in the specified arrays. Overwrites any data currently stored in the table.

        The ``site``, ``node``, ``derived_state`` and ``derived_state_offset``
        parameters are mandatory, and must be 1D numpy arrays. The
        ``site`` and ``node`` (also ``parent`` and ``time``, if supplied) arrays
        must be of equal length, and determine the number of rows in the table.
        The ``derived_state`` and ``derived_state_offset`` parameters must
        be supplied together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_text_columns` for more information). The
        ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information) and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param site: The ID of the site each mutation occurs at.
        :type site: numpy.ndarray, dtype=np.int32
        :param node: The ID of the node each mutation is associated with.
        :type node: numpy.ndarray, dtype=np.int32
        :param time: The time values for each mutation.
        :type time: numpy.ndarray, dtype=np.float64
        :param derived_state: The flattened derived_state array. Required.
        :type derived_state: numpy.ndarray, dtype=np.int8
        :param derived_state_offset: The offsets into the ``derived_state`` array.
        :type derived_state_offset: numpy.ndarray, dtype=np.uint32.
        :param parent: The ID of the parent mutation for each mutation.
        :type parent: numpy.ndarray, dtype=np.int32
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata_schema: The encoded metadata schema.
        """
        self._check_required_args(
            site=site,
            node=node,
            derived_state=derived_state,
            derived_state_offset=derived_state_offset,
        )
        self.ll_table.set_columns(
            dict(
                site=site,
                node=node,
                parent=parent,
                time=time,
                derived_state=derived_state,
                derived_state_offset=derived_state_offset,
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=metadata_schema,
            )
        )

[docs]    def append_columns(
        self,
        site,
        node,
        derived_state,
        derived_state_offset,
        parent=None,
        time=None,
        metadata=None,
        metadata_offset=None,
    ):
        """
        Appends the specified arrays to the end of the columns of this
        :class:`MutationTable`. This allows many new rows to be added at once.

        The ``site``, ``node``, ``derived_state`` and ``derived_state_offset``
        parameters are mandatory, and must be 1D numpy arrays. The
        ``site`` and ``node`` (also ``time`` and ``parent``, if supplied) arrays
        must be of equal length, and determine the number of additional
        rows to add to the table.
        The ``derived_state`` and ``derived_state_offset`` parameters must
        be supplied together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_text_columns` for more information). The
        ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information) and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param site: The ID of the site each mutation occurs at.
        :type site: numpy.ndarray, dtype=np.int32
        :param node: The ID of the node each mutation is associated with.
        :type node: numpy.ndarray, dtype=np.int32
        :param time: The time values for each mutation.
        :type time: numpy.ndarray, dtype=np.float64
        :param derived_state: The flattened derived_state array. Required.
        :type derived_state: numpy.ndarray, dtype=np.int8
        :param derived_state_offset: The offsets into the ``derived_state`` array.
        :type derived_state_offset: numpy.ndarray, dtype=np.uint32.
        :param parent: The ID of the parent mutation for each mutation.
        :type parent: numpy.ndarray, dtype=np.int32
        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        """
        self.ll_table.append_columns(
            dict(
                site=site,
                node=node,
                time=time,
                parent=parent,
                derived_state=derived_state,
                derived_state_offset=derived_state_offset,
                metadata=metadata,
                metadata_offset=metadata_offset,
            )
        )

[docs]    def packset_derived_state(self, derived_states):
        """
        Packs the specified list of derived_state values and updates the
        ``derived_state`` and ``derived_state_offset`` columns. The length
        of the derived_states array must be equal to the number of rows in
        the table.

        :param list(str) derived_states: A list of string derived state values.
        """
        packed, offset = util.pack_strings(derived_states)
        d = self.asdict()
        d["derived_state"] = packed
        d["derived_state_offset"] = offset
        self.set_columns(**d)


[docs]class PopulationTable(BaseTable, MetadataMixin):
    """
    A table defining the populations referred to in a tree sequence.
    The PopulationTable stores metadata for populations that may be referred to
    in the NodeTable and MigrationTable".  Note that although nodes
    may be associated with populations, this association is stored in
    the :class:`NodeTable`: only metadata on each population is stored
    in the population table.

    :warning: The numpy arrays returned by table attribute accesses are **copies**
        of the underlying data. In particular, this means that you cannot edit
        the values in the columns by updating the attribute arrays.

        **NOTE:** this behaviour may change in future.

    :ivar metadata: The flattened array of binary metadata values. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata: numpy.ndarray, dtype=np.int8
    :ivar metadata_offset: The array of offsets into the metadata column. See
        :ref:`sec_tables_api_binary_columns` for more details.
    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
    :ivar metadata_schema: The metadata schema for this table's metadata column
    :vartype metadata_schema: tskit.MetadataSchema
    """

    column_names = ["metadata", "metadata_offset"]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.PopulationTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, PopulationTableRow)

[docs]    def add_row(self, metadata=None):
        """
        Adds a new row to this :class:`PopulationTable` and returns the ID of the
        corresponding population. Metadata, if specified, will be validated and encoded
        according to the table's
        :attr:`metadata_schema<tskit.PopulationTable.metadata_schema>`.

        :param object metadata: Any object that is valid metadata for the table's schema.
        :return: The ID of the newly added population.
        :rtype: int
        """
        metadata = self.metadata_schema.validate_and_encode_row(metadata)
        return self.ll_table.add_row(metadata=metadata)

    def _text_header_and_rows(self, limit=None):
        metadata = util.unpack_bytes(self.metadata, self.metadata_offset)
        headers = ("id", "metadata")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                md = base64.b64encode(metadata[j]).decode("utf8")
                rows.append((str(j), str(md)))
        return headers, rows

[docs]    def set_columns(self, metadata=None, metadata_offset=None, metadata_schema=None):
        """
        Sets the values for each column in this :class:`PopulationTable` using the
        values in the specified arrays. Overwrites any data currently stored in the
        table.

        The ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information) and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        :param metadata_schema: The encoded metadata schema.
        """
        self.ll_table.set_columns(
            dict(
                metadata=metadata,
                metadata_offset=metadata_offset,
                metadata_schema=metadata_schema,
            )
        )

[docs]    def append_columns(self, metadata=None, metadata_offset=None):
        """
        Appends the specified arrays to the end of the columns of this
        :class:`PopulationTable`. This allows many new rows to be added at once.

        The ``metadata`` and ``metadata_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information) and
        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.

        :param metadata: The flattened metadata array. Must be specified along
            with ``metadata_offset``. If not specified or None, an empty metadata
            value is stored for each node.
        :type metadata: numpy.ndarray, dtype=np.int8
        :param metadata_offset: The offsets into the ``metadata`` array.
        :type metadata_offset: numpy.ndarray, dtype=np.uint32.
        """
        self.ll_table.append_columns(
            dict(metadata=metadata, metadata_offset=metadata_offset)
        )


[docs]class ProvenanceTable(BaseTable):
    """
    A table recording the provenance (i.e., history) of this table, so that the
    origin of the underlying data and sequence of subsequent operations can be
    traced. Each row contains a "record" string (recommended format: JSON) and
    a timestamp.

    .. todo::
        The format of the `record` field will be more precisely specified in
        the future.

    :ivar record: The flattened array containing the record strings.
        :ref:`sec_tables_api_text_columns` for more details.
    :vartype record: numpy.ndarray, dtype=np.int8
    :ivar record_offset: The array of offsets into the record column. See
        :ref:`sec_tables_api_text_columns` for more details.
    :vartype record_offset: numpy.ndarray, dtype=np.uint32
    :ivar timestamp: The flattened array containing the timestamp strings.
        :ref:`sec_tables_api_text_columns` for more details.
    :vartype timestamp: numpy.ndarray, dtype=np.int8
    :ivar timestamp_offset: The array of offsets into the timestamp column. See
        :ref:`sec_tables_api_text_columns` for more details.
    :vartype timestamp_offset: numpy.ndarray, dtype=np.uint32
    """

    column_names = ["record", "record_offset", "timestamp", "timestamp_offset"]

    def __init__(self, max_rows_increment=0, ll_table=None):
        if ll_table is None:
            ll_table = _tskit.ProvenanceTable(max_rows_increment=max_rows_increment)
        super().__init__(ll_table, ProvenanceTableRow)

[docs]    def equals(self, other, ignore_timestamps=False):
        """
        Returns True if  `self` and `other` are equal. By default, two provenance
        tables are considered equal if their columns are byte-for-byte identical.

        :param other: Another provenance table instance
        :param bool ignore_timestamps: If True exclude the timestamp column
            from the comparison.
        :return: True if other is equal to this provenance table; False otherwise.
        :rtype: bool
        """
        ret = False
        if type(other) is type(self):
            ret = bool(
                self.ll_table.equals(
                    other.ll_table, ignore_timestamps=ignore_timestamps
                )
            )
        return ret

[docs]    def add_row(self, record, timestamp=None):
        """
        Adds a new row to this ProvenanceTable consisting of the specified record and
        timestamp. If timestamp is not specified, it is automatically generated from
        the current time.

        :param str record: A provenance record, describing the parameters and
            environment used to generate the current set of tables.
        :param str timestamp: A string timestamp. This should be in ISO8601 form.
        """
        if timestamp is None:
            timestamp = datetime.datetime.now().isoformat()
        # Note that the order of the positional arguments has been reversed
        # from the low-level module, which is a bit confusing. However, we
        # want the default behaviour here to be to add a row to the table at
        # the current time as simply as possible.
        return self.ll_table.add_row(record=record, timestamp=timestamp)

[docs]    def set_columns(
        self, timestamp=None, timestamp_offset=None, record=None, record_offset=None
    ):
        """
        Sets the values for each column in this :class:`ProvenanceTable` using the
        values in the specified arrays. Overwrites any data currently stored in the
        table.

        The ``timestamp`` and ``timestamp_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information). Likewise
        for the ``record`` and ``record_offset`` columns

        :param timestamp: The flattened timestamp array. Must be specified along
            with ``timestamp_offset``. If not specified or None, an empty timestamp
            value is stored for each node.
        :type timestamp: numpy.ndarray, dtype=np.int8
        :param timestamp_offset: The offsets into the ``timestamp`` array.
        :type timestamp_offset: numpy.ndarray, dtype=np.uint32.
        :param record: The flattened record array. Must be specified along
            with ``record_offset``. If not specified or None, an empty record
            value is stored for each node.
        :type record: numpy.ndarray, dtype=np.int8
        :param record_offset: The offsets into the ``record`` array.
        :type record_offset: numpy.ndarray, dtype=np.uint32.
        """
        self.ll_table.set_columns(
            dict(
                timestamp=timestamp,
                timestamp_offset=timestamp_offset,
                record=record,
                record_offset=record_offset,
            )
        )

[docs]    def append_columns(
        self, timestamp=None, timestamp_offset=None, record=None, record_offset=None
    ):
        """
        Appends the specified arrays to the end of the columns of this
        :class:`ProvenanceTable`. This allows many new rows to be added at once.

        The ``timestamp`` and ``timestamp_offset`` parameters must be supplied
        together, and meet the requirements for
        :ref:`sec_encoding_ragged_columns` (see
        :ref:`sec_tables_api_binary_columns` for more information). Likewise
        for the ``record`` and ``record_offset`` columns

        :param timestamp: The flattened timestamp array. Must be specified along
            with ``timestamp_offset``. If not specified or None, an empty timestamp
            value is stored for each node.
        :type timestamp: numpy.ndarray, dtype=np.int8
        :param timestamp_offset: The offsets into the ``timestamp`` array.
        :type timestamp_offset: numpy.ndarray, dtype=np.uint32.
        :param record: The flattened record array. Must be specified along
            with ``record_offset``. If not specified or None, an empty record
            value is stored for each node.
        :type record: numpy.ndarray, dtype=np.int8
        :param record_offset: The offsets into the ``record`` array.
        :type record_offset: numpy.ndarray, dtype=np.uint32.
        """
        self.ll_table.append_columns(
            dict(
                timestamp=timestamp,
                timestamp_offset=timestamp_offset,
                record=record,
                record_offset=record_offset,
            )
        )

    def _text_header_and_rows(self, limit=None):
        timestamp = util.unpack_strings(self.timestamp, self.timestamp_offset)
        record = util.unpack_strings(self.record, self.record_offset)
        headers = ("id", "timestamp", "record")
        rows = []
        if limit is None or self.num_rows <= limit:
            indexes = range(self.num_rows)
        else:
            indexes = itertools.chain(
                range(limit // 2),
                [-1],
                range(self.num_rows - (limit - (limit // 2)), self.num_rows),
            )
        for j in indexes:
            if j == -1:
                rows.append(f"__skipped__{self.num_rows-limit}")
            else:
                rows.append((str(j), str(timestamp[j]), str(record[j])))
        return headers, rows

[docs]    def packset_record(self, records):
        """
        Packs the specified list of record values and updates the
        ``record`` and ``record_offset`` columns. The length
        of the records array must be equal to the number of rows in
        the table.

        :param list(str) records: A list of string record values.
        """
        packed, offset = util.pack_strings(records)
        d = self.asdict()
        d["record"] = packed
        d["record_offset"] = offset
        self.set_columns(**d)

[docs]    def packset_timestamp(self, timestamps):
        """
        Packs the specified list of timestamp values and updates the
        ``timestamp`` and ``timestamp_offset`` columns. The length
        of the timestamps array must be equal to the number of rows in
        the table.

        :param list(str) timestamps: A list of string timestamp values.
        """
        packed, offset = util.pack_strings(timestamps)
        d = self.asdict()
        d["timestamp"] = packed
        d["timestamp_offset"] = offset
        self.set_columns(**d)


[docs]class TableCollection:
    """
    A collection of mutable tables defining a tree sequence. See the
    :ref:`sec_data_model` section for definition on the various tables
    and how they together define a :class:`TreeSequence`. Arbitrary
    data can be stored in a TableCollection, but there are certain
    :ref:`requirements <sec_valid_tree_sequence_requirements>` that must be
    satisfied for these tables to be interpreted as a tree sequence.

    To obtain an immutable :class:`TreeSequence` instance corresponding to the
    current state of a ``TableCollection``, please use the :meth:`.tree_sequence`
    method.

    :ivar individuals: The individual table.
    :vartype individuals: IndividualTable
    :ivar nodes: The node table.
    :vartype nodes: NodeTable
    :ivar edges: The edge table.
    :vartype edges: EdgeTable
    :ivar migrations: The migration table.
    :vartype migrations: MigrationTable
    :ivar sites: The site table.
    :vartype sites: SiteTable
    :ivar mutations: The mutation table.
    :vartype mutations: MutationTable
    :ivar populations: The population table.
    :vartype populations: PopulationTable
    :ivar provenances: The provenance table.
    :vartype provenances: ProvenanceTable
    :ivar index: The edge insertion and removal index.
    :ivar sequence_length: The sequence length defining the coordinate
        space.
    :vartype sequence_length: float
    :ivar file_uuid: The UUID for the file this TableCollection is derived
        from, or None if not derived from a file.
    :vartype file_uuid: str
    """

    def __init__(self, sequence_length=0):
        self._ll_tables = _tskit.TableCollection(sequence_length)

    @property
    def individuals(self):
        return IndividualTable(ll_table=self._ll_tables.individuals)

    @property
    def nodes(self):
        return NodeTable(ll_table=self._ll_tables.nodes)

    @property
    def edges(self):
        return EdgeTable(ll_table=self._ll_tables.edges)

    @property
    def migrations(self):
        return MigrationTable(ll_table=self._ll_tables.migrations)

    @property
    def sites(self):
        return SiteTable(ll_table=self._ll_tables.sites)

    @property
    def mutations(self):
        return MutationTable(ll_table=self._ll_tables.mutations)

    @property
    def populations(self):
        return PopulationTable(ll_table=self._ll_tables.populations)

    @property
    def provenances(self):
        return ProvenanceTable(ll_table=self._ll_tables.provenances)

    @property
    def indexes(self):
        indexes = self._ll_tables.indexes
        return TableCollectionIndexes(**indexes)

    @indexes.setter
    def indexes(self, indexes):
        self._ll_tables.indexes = indexes.asdict()

    @property
    def sequence_length(self):
        return self._ll_tables.sequence_length

    @sequence_length.setter
    def sequence_length(self, sequence_length):
        self._ll_tables.sequence_length = sequence_length

    @property
    def file_uuid(self):
        return self._ll_tables.file_uuid

    @property
    def metadata_schema(self) -> metadata.MetadataSchema:
        """
        The :class:`tskit.MetadataSchema` for this TableCollection.
        """
        return metadata.parse_metadata_schema(self._ll_tables.metadata_schema)

    @metadata_schema.setter
    def metadata_schema(self, schema: metadata.MetadataSchema) -> None:
        # Check the schema is a valid schema instance by roundtripping it.
        metadata.parse_metadata_schema(str(schema))
        self._ll_tables.metadata_schema = str(schema)

    @property
    def metadata(self) -> Any:
        """
        The decoded metadata for this TableCollection.
        """
        return self.metadata_schema.decode_row(self._ll_tables.metadata)

    @metadata.setter
    def metadata(self, metadata: Any) -> None:
        self._ll_tables.metadata = self.metadata_schema.validate_and_encode_row(
            metadata
        )

    @property
    def metadata_bytes(self) -> Any:
        """
        The raw bytes of metadata for this TableCollection
        """
        return self._ll_tables.metadata

[docs]    def asdict(self):
        """
        Returns a dictionary representation of this TableCollection.

        Note: the semantics of this method changed at tskit 0.1.0. Previously a
        map of table names to the tables themselves was returned.
        """
        ret = {
            "encoding_version": (1, 2),
            "sequence_length": self.sequence_length,
            "metadata_schema": str(self.metadata_schema),
            "metadata": self.metadata_schema.encode_row(self.metadata),
            "individuals": self.individuals.asdict(),
            "nodes": self.nodes.asdict(),
            "edges": self.edges.asdict(),
            "migrations": self.migrations.asdict(),
            "sites": self.sites.asdict(),
            "mutations": self.mutations.asdict(),
            "populations": self.populations.asdict(),
            "provenances": self.provenances.asdict(),
            "indexes": self.indexes.asdict(),
        }
        return ret

    @property
    def name_map(self):
        """
        Returns a dictionary mapping table names to the corresponding
        table instances. For example, the returned dictionary will contain the
        key "edges" that maps to an :class:`.EdgeTable` instance.
        """
        return {
            "edges": self.edges,
            "individuals": self.individuals,
            "migrations": self.migrations,
            "mutations": self.mutations,
            "nodes": self.nodes,
            "populations": self.populations,
            "provenances": self.provenances,
            "sites": self.sites,
        }

    @property
    def nbytes(self) -> int:
        """
        Returns the total number of bytes required to store the data
        in this table collection. Note that this may not be equal to
        the actual memory footprint.
        """
        return sum(
            (
                8,  # sequence_length takes 8 bytes
                len(self.metadata_bytes),
                len(str(self.metadata_schema).encode()),
                self.indexes.nbytes,
                sum(table.nbytes for table in self.name_map.values()),
            )
        )

    def __banner(self, title):
        width = 60
        line = "#" * width
        title_line = f"#   {title}"
        title_line += " " * (width - len(title_line) - 1)
        title_line += "#"
        return line + "\n" + title_line + "\n" + line + "\n"

    def __str__(self):
        s = self.__banner("Individuals")
        s += str(self.individuals) + "\n"
        s += self.__banner("Nodes")
        s += str(self.nodes) + "\n"
        s += self.__banner("Edges")
        s += str(self.edges) + "\n"
        s += self.__banner("Sites")
        s += str(self.sites) + "\n"
        s += self.__banner("Mutations")
        s += str(self.mutations) + "\n"
        s += self.__banner("Migrations")
        s += str(self.migrations) + "\n"
        s += self.__banner("Populations")
        s += str(self.populations) + "\n"
        s += self.__banner("Provenances")
        s += str(self.provenances)
        return s

[docs]    def equals(
        self,
        other,
        *,
        ignore_metadata=False,
        ignore_ts_metadata=False,
        ignore_provenance=False,
        ignore_timestamps=False,
    ):
        """
        Returns True if  `self` and `other` are equal. By default, two table
        collections are considered equal if their

        - ``sequence_length`` properties are identical;
        - top-level tree sequence metadata and metadata schemas are
          byte-wise identical;
        - constituent tables are byte-wise identical.

        Some of the requirements in this definition can be relaxed using the
        parameters, which can be used to remove certain parts of the data model
        from the comparison.

        Table indexes are not considered in the equality comparison.

        :param TableCollection other: Another table collection.
        :param bool ignore_metadata: If True *all* metadata and metadata schemas
            will be excluded from the comparison. This includes the top-level
            tree sequence and constituent table metadata (default=False).
        :param bool ignore_ts_metadata: If True the top-level tree sequence
            metadata and metadata schemas will be excluded from the comparison.
            If ``ignore_metadata`` is True, this parameter has no effect.
        :param bool ignore_provenance: If True the provenance tables are
            not included in the comparison.
        :param bool ignore_timestamps: If True the provenance timestamp column
            is ignored in the comparison. If ``ignore_provenance`` is True, this
            parameter has no effect.
        :return: True if other is equal to this table collection; False otherwise.
        :rtype: bool
        """
        ret = False
        if type(other) is type(self):
            ret = bool(
                self._ll_tables.equals(
                    other._ll_tables,
                    ignore_metadata=bool(ignore_metadata),
                    ignore_ts_metadata=bool(ignore_ts_metadata),
                    ignore_provenance=bool(ignore_provenance),
                    ignore_timestamps=bool(ignore_timestamps),
                )
            )
        return ret

    def __eq__(self, other):
        return self.equals(other)

    def __getstate__(self):
        return self.asdict()

    @classmethod
    def load(cls, file_or_path):
        file, local_file = util.convert_file_like_to_open_file(file_or_path, "rb")
        ll_tc = _tskit.TableCollection(1)
        ll_tc.load(file)
        tc = TableCollection(1)
        tc._ll_tables = ll_tc
        return tc

[docs]    def dump(self, file_or_path):
        """
        Writes the table collection to the specified path or file object.

        :param str file_or_path: The file object or path to write the TreeSequence to.
        """
        file, local_file = util.convert_file_like_to_open_file(file_or_path, "wb")
        try:
            self._ll_tables.dump(file)
        finally:
            if local_file:
                file.close()

    # Unpickle support
    def __setstate__(self, state):
        self.__init__(state["sequence_length"])
        self.metadata_schema = tskit.parse_metadata_schema(state["metadata_schema"])
        self.metadata = self.metadata_schema.decode_row(state["metadata"])
        self.individuals.set_columns(**state["individuals"])
        self.nodes.set_columns(**state["nodes"])
        self.edges.set_columns(**state["edges"])
        self.migrations.set_columns(**state["migrations"])
        self.sites.set_columns(**state["sites"])
        self.mutations.set_columns(**state["mutations"])
        self.populations.set_columns(**state["populations"])
        self.provenances.set_columns(**state["provenances"])

    @classmethod
    def fromdict(self, tables_dict):
        tables = TableCollection(tables_dict["sequence_length"])
        try:
            tables.metadata_schema = tskit.parse_metadata_schema(
                tables_dict["metadata_schema"]
            )
        except KeyError:
            pass
        try:
            tables.metadata = tables.metadata_schema.decode_row(tables_dict["metadata"])
        except KeyError:
            pass
        tables.individuals.set_columns(**tables_dict["individuals"])
        tables.nodes.set_columns(**tables_dict["nodes"])
        tables.edges.set_columns(**tables_dict["edges"])
        tables.migrations.set_columns(**tables_dict["migrations"])
        tables.sites.set_columns(**tables_dict["sites"])
        tables.mutations.set_columns(**tables_dict["mutations"])
        tables.populations.set_columns(**tables_dict["populations"])
        tables.provenances.set_columns(**tables_dict["provenances"])

        # Indexes must be last as other wise the check for their consistency will fail
        try:
            tables.indexes = TableCollectionIndexes(**tables_dict["indexes"])
        except KeyError:
            pass
        return tables

[docs]    def copy(self):
        """
        Returns a deep copy of this TableCollection.

        :return: A deep copy of this TableCollection.
        :rtype: .TableCollection
        """
        return TableCollection.fromdict(self.asdict())

[docs]    def tree_sequence(self):
        """
        Returns a :class:`TreeSequence` instance with the structure defined by the
        tables in this :class:`TableCollection`. If the table collection is not
        in canonical form (i.e., does not meet sorting requirements) or cannot be
        interpreted as a tree sequence an exception is raised. The
        :meth:`.sort` method may be used to ensure that input sorting requirements
        are met. If the table collection does not have indexes they will be
        built.

        :return: A :class:`TreeSequence` instance reflecting the structures
            defined in this set of tables.
        :rtype: .TreeSequence
        """
        if not self.has_index():
            self.build_index()
        return tskit.TreeSequence.load_tables(self)

[docs]    def simplify(
        self,
        samples=None,
        *,
        reduce_to_site_topology=False,
        filter_populations=True,
        filter_individuals=True,
        filter_sites=True,
        keep_unary=False,
        keep_input_roots=False,
        record_provenance=True,
        filter_zero_mutation_sites=None,  # Deprecated alias for filter_sites
    ):
        """
        Simplifies the tables in place to retain only the information necessary
        to reconstruct the tree sequence describing the given ``samples``.
        This will change the ID of the nodes, so that the node
        ``samples[k]`` will have ID ``k`` in the result. The resulting
        NodeTable will have only the first ``len(samples)`` individuals marked
        as samples. The mapping from node IDs in the current set of tables to
        their equivalent values in the simplified tables is also returned as a
        numpy array. If an array ``a`` is returned by this function and ``u``
        is the ID of a node in the input table, then ``a[u]`` is the ID of this
        node in the output table. For any node ``u`` that is not mapped into
        the output tables, this mapping will equal ``-1``.

        Tables operated on by this function must: be sorted (see
        :meth:`TableCollection.sort`), have children be born strictly after their
        parents, and the intervals on which any individual is a child must be
        disjoint. Other than this the tables need not satisfy remaining
        requirements to specify a valid tree sequence (but the resulting tables
        will).

        This is identical to :meth:`TreeSequence.simplify` but acts *in place* to
        alter the data in this :class:`TableCollection`. Please see the
        :meth:`TreeSequence.simplify` method for a description of the remaining
        parameters.

        :param list[int] samples: A list of node IDs to retain as samples. If
            not specified or None, use all nodes marked with the IS_SAMPLE flag.
        :param bool reduce_to_site_topology: Whether to reduce the topology down
            to the trees that are present at sites. (default: False).
        :param bool filter_populations: If True, remove any populations that are
            not referenced by nodes after simplification; new population IDs are
            allocated sequentially from zero. If False, the population table will
            not be altered in any way. (Default: True)
        :param bool filter_individuals: If True, remove any individuals that are
            not referenced by nodes after simplification; new individual IDs are
            allocated sequentially from zero. If False, the individual table will
            not be altered in any way. (Default: True)
        :param bool filter_sites: If True, remove any sites that are
            not referenced by mutations after simplification; new site IDs are
            allocated sequentially from zero. If False, the site table will not
            be altered in any way. (Default: True)
        :param bool keep_unary: If True, any unary nodes (i.e. nodes with exactly
            one child) that exist on the path from samples to root will be preserved
            in the output. (Default: False)
        :param bool keep_input_roots: If True, insert edges from the MRCAs of the
            samples to the roots in the input trees. If False, no topology older
            than the MRCAs of the samples will be included. (Default: False)
        :param bool record_provenance: If True, record details of this call to
            simplify in the returned tree sequence's provenance information
            (Default: True).
        :param bool filter_zero_mutation_sites: Deprecated alias for ``filter_sites``.
        :return: A numpy array mapping node IDs in the input tables to their
            corresponding node IDs in the output tables.
        :rtype: numpy.ndarray (dtype=np.int32)
        """
        if filter_zero_mutation_sites is not None:
            # Deprecated in msprime 0.6.1.
            warnings.warn(
                "filter_zero_mutation_sites is deprecated; use filter_sites instead",
                FutureWarning,
            )
            filter_sites = filter_zero_mutation_sites
        if samples is None:
            flags = self.nodes.flags
            samples = np.where(np.bitwise_and(flags, _tskit.NODE_IS_SAMPLE) != 0)[
                0
            ].astype(np.int32)
        else:
            samples = util.safe_np_int_cast(samples, np.int32)
        node_map = self._ll_tables.simplify(
            samples,
            filter_sites=filter_sites,
            filter_individuals=filter_individuals,
            filter_populations=filter_populations,
            reduce_to_site_topology=reduce_to_site_topology,
            keep_unary=keep_unary,
            keep_input_roots=keep_input_roots,
        )
        if record_provenance:
            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
            # TODO also make sure we convert all the arguments so that they are
            # definitely JSON encodable.
            parameters = {"command": "simplify", "TODO": "add simplify parameters"}
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )
        return node_map

[docs]    def link_ancestors(self, samples, ancestors):
        """
        Returns an :class:`EdgeTable` instance describing a subset of the genealogical
        relationships between the nodes in ``samples`` and ``ancestors``.

        Each row ``parent, child, left, right`` in the output table indicates that
        ``child`` has inherited the segment ``[left, right)`` from ``parent`` more
        recently than from any other node in these lists.

        In particular, suppose ``samples`` is a list of nodes such that ``time`` is 0
        for each node, and ``ancestors`` is a list of nodes such that ``time`` is
        greater than 0.0 for each node. Then each row of the output table will show
        an interval ``[left, right)`` over which a node in ``samples`` has inherited
        most recently from a node in ``ancestors``, or an interval over which one of
        these ``ancestors`` has inherited most recently from another node in
        ``ancestors``.

        The following table shows which ``parent->child`` pairs will be shown in the
        output of ``link_ancestors``.
        A node is a relevant descendant on a given interval if it also appears somewhere
        in the ``parent`` column of the outputted table.

        ========================  ===============================================
        Type of relationship      Shown in output of ``link_ancestors``
        ------------------------  -----------------------------------------------
        ``ancestor->sample``      Always
        ``ancestor1->ancestor2``  Only if ``ancestor2`` has a relevant descendant
        ``sample1->sample2``      Always
        ``sample->ancestor``      Only if ``ancestor`` has a relevant descendant
        ========================  ===============================================

        The difference between ``samples`` and ``ancestors`` is that information about
        the ancestors of a node in ``ancestors`` will only be retained if it also has a
        relevant descendant, while information about the ancestors of a node in
        ``samples`` will always be retained.
        The node IDs in ``parent`` and ``child`` refer to the IDs in the node table
        of the inputted tree sequence.

        The supplied nodes must be non-empty lists of the node IDs in the tree sequence:
        in particular, they do not have to be *samples* of the tree sequence. The lists
        of ``samples`` and ``ancestors`` may overlap, although adding a node from
        ``samples`` to ``ancestors`` will not change the output. So, setting ``samples``
        and ``ancestors`` to the same list of nodes will find all genealogical
        relationships within this list.

        If none of the nodes in ``ancestors`` or ``samples`` are ancestral to ``samples``
        anywhere in the tree sequence, an empty table will be returned.

        :param list[int] samples: A list of node IDs to retain as samples.
        :param list[int] ancestors: A list of node IDs to use as ancestors.
        :return: An :class:`EdgeTable` instance displaying relationships between
            the `samples` and `ancestors`.
        """
        samples = util.safe_np_int_cast(samples, np.int32)
        ancestors = util.safe_np_int_cast(ancestors, np.int32)
        ll_edge_table = self._ll_tables.link_ancestors(samples, ancestors)
        return EdgeTable(ll_table=ll_edge_table)

    def map_ancestors(self, *args, **kwargs):
        # A deprecated alias for link_ancestors()
        return self.link_ancestors(*args, **kwargs)

[docs]    def sort(self, edge_start=0):
        """
        Sorts the tables in place. This ensures that all tree sequence ordering
        requirements listed in the
        :ref:`sec_valid_tree_sequence_requirements` section are met, as long
        as each site has at most one mutation (see below).

        If the ``edge_start`` parameter is provided, this specifies the index
        in the edge table where sorting should start. Only rows with index
        greater than or equal to ``edge_start`` are sorted; rows before this index
        are not affected. This parameter is provided to allow for efficient sorting
        when the user knows that the edges up to a given index are already sorted.

        The individual, node, population and provenance tables are not affected
        by this method.

        Edges are sorted as follows:

        - time of parent, then
        - parent node ID, then
        - child node ID, then
        - left endpoint.

        Note that this sorting order exceeds the
        :ref:`edge sorting requirements <sec_edge_requirements>` for a valid
        tree sequence. For a valid tree sequence, we require that all edges for a
        given parent ID are adjacent, but we do not require that they be listed in
        sorted order.

        Sites are sorted by position, and sites with the same position retain
        their relative ordering.

        Mutations are sorted by site ID, and within the same site are sorted by time.
        Those with equal or unknown time retain their relative ordering. This does not
        currently rearrange tables so that mutations occur after their mutation parents,
        which is a requirement for valid tree sequences.

        :param int edge_start: The index in the edge table where sorting starts
            (default=0; must be <= len(edges)).
        """
        self._ll_tables.sort(edge_start)
        # TODO add provenance

[docs]    def compute_mutation_parents(self):
        """
        Modifies the tables in place, computing the ``parent`` column of the
        mutation table. For this to work, the node and edge tables must be
        valid, and the site and mutation tables must be sorted (see
        :meth:`TableCollection.sort`).  This will produce an error if mutations
        are not sorted (i.e., if a mutation appears before its mutation parent)
        *unless* the two mutations occur on the same branch, in which case
        there is no way to detect the error.

        The ``parent`` of a given mutation is the ID of the next mutation
        encountered traversing the tree upwards from that mutation, or
        ``NULL`` if there is no such mutation.

        .. note:: note: This method does not check that all mutations result
            in a change of state, as required; see :ref:`sec_mutation_requirements`.

        """
        self._ll_tables.compute_mutation_parents()
        # TODO add provenance

[docs]    def compute_mutation_times(self):
        """
        Modifies the tables in place, computing valid values for the ``time`` column of
        the mutation table. For this to work, the node and edge tables must be
        valid, and the site and mutation tables must be sorted and indexed(see
        :meth:`TableCollection.sort` and :meth:`TableCollection.build_index`).

        For a single mutation on an edge at a site, the ``time`` assigned to a mutation
        by this method is the mid-point between the times of the nodes above and below
        the mutation. In the case where there is more than one mutation on an edge for
        a site, the times are evenly spread along the edge. For mutations that are
        above a root node, the time of the root node is assigned.

        The mutation table will be sorted if the new times mean that the original order
        is no longer valid.

        """
        self._ll_tables.compute_mutation_times()
        # TODO add provenance

[docs]    def deduplicate_sites(self):
        """
        Modifies the tables in place, removing entries in the site table with
        duplicate ``position`` (and keeping only the *first* entry for each
        site), and renumbering the ``site`` column of the mutation table
        appropriately.  This requires the site table to be sorted by position.
        """
        self._ll_tables.deduplicate_sites()
        # TODO add provenance

[docs]    def delete_sites(self, site_ids, record_provenance=True):
        """
        Remove the specified sites entirely from the sites and mutations tables in this
        collection. This is identical to :meth:`TreeSequence.delete_sites` but acts
        *in place* to alter the data in this :class:`TableCollection`.

        :param list[int] site_ids: A list of site IDs specifying the sites to remove.
        :param bool record_provenance: If ``True``, add details of this operation
            to the provenance table in this TableCollection. (Default: ``True``).
        """
        keep_sites = np.ones(len(self.sites), dtype=bool)
        site_ids = util.safe_np_int_cast(site_ids, np.int32)
        if np.any(site_ids < 0) or np.any(site_ids >= len(self.sites)):
            raise ValueError("Site ID out of bounds")
        keep_sites[site_ids] = 0
        new_as, new_as_offset = keep_with_offset(
            keep_sites, self.sites.ancestral_state, self.sites.ancestral_state_offset
        )
        new_md, new_md_offset = keep_with_offset(
            keep_sites, self.sites.metadata, self.sites.metadata_offset
        )
        self.sites.set_columns(
            position=self.sites.position[keep_sites],
            ancestral_state=new_as,
            ancestral_state_offset=new_as_offset,
            metadata=new_md,
            metadata_offset=new_md_offset,
        )
        # We also need to adjust the mutations table, as it references into sites
        keep_mutations = keep_sites[self.mutations.site]
        new_ds, new_ds_offset = keep_with_offset(
            keep_mutations,
            self.mutations.derived_state,
            self.mutations.derived_state_offset,
        )
        new_md, new_md_offset = keep_with_offset(
            keep_mutations, self.mutations.metadata, self.mutations.metadata_offset
        )
        # Site numbers will have changed
        site_map = np.cumsum(keep_sites, dtype=self.mutations.site.dtype) - 1
        # Mutation numbers will change, so the parent references need altering
        mutation_map = np.cumsum(keep_mutations, dtype=self.mutations.parent.dtype) - 1
        # Map parent == -1 to -1, and check this has worked (assumes tskit.NULL == -1)
        mutation_map = np.append(mutation_map, -1).astype(self.mutations.parent.dtype)
        assert mutation_map[tskit.NULL] == tskit.NULL
        self.mutations.set_columns(
            site=site_map[self.mutations.site[keep_mutations]],
            node=self.mutations.node[keep_mutations],
            time=self.mutations.time[keep_mutations],
            derived_state=new_ds,
            derived_state_offset=new_ds_offset,
            parent=mutation_map[self.mutations.parent[keep_mutations]],
            metadata=new_md,
            metadata_offset=new_md_offset,
        )
        if record_provenance:
            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
            parameters = {"command": "delete_sites", "TODO": "add parameters"}
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

[docs]    def delete_intervals(self, intervals, simplify=True, record_provenance=True):
        """
        Delete all information from this set of tables which lies *within* the
        specified list of genomic intervals. This is identical to
        :meth:`TreeSequence.delete_intervals` but acts *in place* to alter
        the data in this :class:`TableCollection`.

        :param array_like intervals: A list (start, end) pairs describing the
            genomic intervals to delete. Intervals must be non-overlapping and
            in increasing order. The list of intervals must be interpretable as a
            2D numpy array with shape (N, 2), where N is the number of intervals.
        :param bool simplify: If True, run simplify on the tables so that nodes
            no longer used are discarded. (Default: True).
        :param bool record_provenance: If ``True``, add details of this operation
            to the provenance table in this TableCollection. (Default: ``True``).
        """
        self.keep_intervals(
            util.negate_intervals(intervals, 0, self.sequence_length),
            simplify=simplify,
            record_provenance=False,
        )
        if record_provenance:
            parameters = {"command": "delete_intervals", "TODO": "add parameters"}
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

[docs]    def keep_intervals(self, intervals, simplify=True, record_provenance=True):
        """
        Delete all information from this set of tables which lies *outside* the
        specified list of genomic intervals. This is identical to
        :meth:`TreeSequence.keep_intervals` but acts *in place* to alter
        the data in this :class:`TableCollection`.

        :param array_like intervals: A list (start, end) pairs describing the
            genomic intervals to keep. Intervals must be non-overlapping and
            in increasing order. The list of intervals must be interpretable as a
            2D numpy array with shape (N, 2), where N is the number of intervals.
        :param bool simplify: If True, run simplify on the tables so that nodes
            no longer used are discarded. (Default: True).
        :param bool record_provenance: If ``True``, add details of this operation
            to the provenance table in this TableCollection. (Default: ``True``).
        """
        intervals = util.intervals_to_np_array(intervals, 0, self.sequence_length)
        if len(self.migrations) > 0:
            raise ValueError("Migrations not supported by keep_ and delete_ intervals")

        edges = self.edges.copy()
        self.edges.clear()
        keep_sites = np.repeat(False, self.sites.num_rows)
        for s, e in intervals:
            curr_keep_sites = np.logical_and(
                self.sites.position >= s, self.sites.position < e
            )
            keep_sites = np.logical_or(keep_sites, curr_keep_sites)
            keep_edges = np.logical_not(
                np.logical_or(edges.right <= s, edges.left >= e)
            )
            self.edges.append_columns(
                left=np.fmax(s, edges.left[keep_edges]),
                right=np.fmin(e, edges.right[keep_edges]),
                parent=edges.parent[keep_edges],
                child=edges.child[keep_edges],
            )
        self.delete_sites(
            np.where(np.logical_not(keep_sites))[0], record_provenance=False
        )
        self.sort()
        if simplify:
            self.simplify(record_provenance=False)
        if record_provenance:
            parameters = {"command": "keep_intervals", "TODO": "add parameters"}
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

    def _check_trim_conditions(self):
        if self.migrations.num_rows > 0:
            raise ValueError("You cannot trim a tree sequence containing migrations")
        if self.edges.num_rows == 0:
            raise ValueError(
                "Trimming a tree sequence with no edges would reduce the sequence length"
                " to zero, which is not allowed"
            )

[docs]    def ltrim(self, record_provenance=True):
        """
        Reset the coordinate system used in these tables, changing the left and right
        genomic positions in the edge table such that the leftmost edge now starts at
        position 0. This is identical to :meth:`TreeSequence.ltrim` but acts *in place*
        to alter the data in this :class:`TableCollection`.

        :param bool record_provenance: If ``True``, add details of this operation
            to the provenance table in this TableCollection. (Default: ``True``).
        """
        self._check_trim_conditions()
        leftmost = np.min(self.edges.left)
        self.delete_sites(
            np.where(self.sites.position < leftmost), record_provenance=False
        )
        self.edges.set_columns(
            left=self.edges.left - leftmost,
            right=self.edges.right - leftmost,
            parent=self.edges.parent,
            child=self.edges.child,
        )
        self.sites.set_columns(
            position=self.sites.position - leftmost,
            ancestral_state=self.sites.ancestral_state,
            ancestral_state_offset=self.sites.ancestral_state_offset,
            metadata=self.sites.metadata,
            metadata_offset=self.sites.metadata_offset,
        )
        self.sequence_length = self.sequence_length - leftmost
        if record_provenance:
            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
            parameters = {
                "command": "ltrim",
            }
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

[docs]    def rtrim(self, record_provenance=True):
        """
        Reset the ``sequence_length`` property so that the sequence ends at the end of
        the last edge. This is identical to :meth:`TreeSequence.rtrim` but acts
        *in place* to alter the data in this :class:`TableCollection`.

        :param bool record_provenance: If ``True``, add details of this operation
            to the provenance table in this TableCollection. (Default: ``True``).
        """
        self._check_trim_conditions()
        rightmost = np.max(self.edges.right)
        self.delete_sites(
            np.where(self.sites.position >= rightmost), record_provenance=False
        )
        self.sequence_length = rightmost
        if record_provenance:
            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
            parameters = {
                "command": "rtrim",
            }
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

[docs]    def trim(self, record_provenance=True):
        """
        Trim away any empty regions on the right and left of the tree sequence encoded by
        these tables. This is identical to :meth:`TreeSequence.trim` but acts *in place*
        to alter the data in this :class:`TableCollection`.

        :param bool record_provenance: If ``True``, add details of this operation
            to the provenance table in this TableCollection. (Default: ``True``).
        """
        self.rtrim(record_provenance=False)
        self.ltrim(record_provenance=False)
        if record_provenance:
            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
            parameters = {
                "command": "trim",
            }
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

[docs]    def clear(
        self,
        clear_provenance=False,
        clear_metadata_schemas=False,
        clear_ts_metadata_and_schema=False,
    ):
        """
        Remove all rows of the data tables, optionally remove provenance, metadata
        schemas and ts-level metadata.

        :param bool clear_provenance: If ``True``, remove all rows of the provenance
            table. (Default: ``False``).
        :param bool clear_metadata_schemas: If ``True``, clear the table metadata
            schemas. (Default: ``False``).
        :param bool clear_ts_metadata_and_schema: If ``True``, clear the tree-sequence
            level metadata and schema (Default: ``False``).
        """
        self._ll_tables.clear(
            clear_provenance=clear_provenance,
            clear_metadata_schemas=clear_metadata_schemas,
            clear_ts_metadata_and_schema=clear_ts_metadata_and_schema,
        )

[docs]    def has_index(self):
        """
        Returns True if this TableCollection is indexed.
        """
        return bool(self._ll_tables.has_index())

[docs]    def build_index(self):
        """
        Builds an index on this TableCollection. Any existing indexes are automatically
        dropped.
        """
        self._ll_tables.build_index()

[docs]    def drop_index(self):
        """
        Drops any indexes present on this table collection. If the tables are not
        currently indexed this method has no effect.
        """
        self._ll_tables.drop_index()

[docs]    def subset(self, nodes, record_provenance=True):
        """
        Modifies the tables in place to contain only the entries referring to
        the provided list of nodes, with nodes reordered according to the order
        they appear in the list. See :meth:`TreeSequence.subset` for a more
        detailed description.

        :param list nodes: The list of nodes for which to retain information. This
            may be a numpy array (or array-like) object (dtype=np.int32).
        :param bool record_provenance: Whether to record a provenance entry
            in the provenance table for this operation.
        """
        nodes = util.safe_np_int_cast(nodes, np.int32)
        self._ll_tables.subset(nodes)
        if record_provenance:
            parameters = {"command": "subset", "nodes": nodes.tolist()}
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

[docs]    def union(
        self,
        other,
        node_mapping,
        check_shared_equality=True,
        add_populations=True,
        record_provenance=True,
    ):
        """
        Modifies the table collection in place by adding the non-shared
        portions of ``other`` to itself. To perform the node-wise union,
        the method relies on a ``node_mapping`` array, that maps nodes in
        ``other`` to its equivalent node in ``self`` or ``tskit.NULL`` if
        the node is exclusive to ``other``. See :meth:`TreeSequence.union` for a more
        detailed description.

        :param TableCollection other: Another table collection.
        :param list node_mapping: An array of node IDs that relate nodes in
            ``other`` to nodes in ``self``: the k-th element of ``node_mapping``
            should be the index of the equivalent node in ``self``, or
            ``tskit.NULL`` if the node is not present in ``self`` (in which case it
            will be added to self).
        :param bool check_shared_equality: If True, the shared portions of the
            table collections will be checked for equality.
        :param bool add_populations: If True, nodes new to ``self`` will be
            assigned new population IDs.
        :param bool record_provenance: Whether to record a provenance entry
            in the provenance table for this operation.
        """
        node_mapping = util.safe_np_int_cast(node_mapping, np.int32)
        self._ll_tables.union(
            other._ll_tables,
            node_mapping,
            check_shared_equality=check_shared_equality,
            add_populations=add_populations,
        )
        if record_provenance:
            other_records = [prov.record for prov in other.provenances]
            other_timestamps = [prov.timestamp for prov in other.provenances]
            parameters = {
                "command": "union",
                "other": {"timestamp": other_timestamps, "record": other_records},
                "node_mapping": node_mapping.tolist(),
            }
            self.provenances.add_row(
                record=json.dumps(provenance.get_provenance_dict(parameters))
            )

    def find_ibd(self, samples, max_time=None, min_length=None):
        max_time = sys.float_info.max if max_time is None else max_time
        min_length = 0 if min_length is None else min_length
        return self._ll_tables.find_ibd(
            samples, max_time=max_time, min_length=min_length
        )