fastexcel

  1from __future__ import annotations
  2
  3import sys
  4import typing
  5from typing import TYPE_CHECKING, Callable, Literal
  6
  7if sys.version_info < (3, 10):
  8    from typing_extensions import TypeAlias
  9else:
 10    from typing import TypeAlias
 11
 12if TYPE_CHECKING:
 13    import pandas as pd
 14    import polars as pl
 15    import pyarrow as pa
 16
 17from os.path import expanduser
 18from pathlib import Path
 19
 20try:
 21    import importlib.util
 22
 23    importlib.util.find_spec("pyarrow")
 24    _PYARROW_AVAILABLE = True
 25except ImportError:
 26    _PYARROW_AVAILABLE = False
 27
 28from ._fastexcel import (
 29    ArrowError,
 30    CalamineCellError,
 31    CalamineError,
 32    CannotRetrieveCellDataError,
 33    CellError,
 34    CellErrors,
 35    ColumnInfo,
 36    ColumnInfoNoDtype,
 37    ColumnNotFoundError,
 38    DefinedName,
 39    FastExcelError,
 40    InvalidParametersError,
 41    SheetNotFoundError,
 42    UnsupportedColumnTypeCombinationError,
 43    __version__,
 44    _ExcelReader,
 45    _ExcelSheet,
 46    _ExcelTable,
 47)
 48from ._fastexcel import read_excel as _read_excel
 49
 50DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
 51DTypeMap: TypeAlias = "dict[str | int, DType]"
 52ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
 53DTypeFrom: TypeAlias = Literal[
 54    "provided_for_all", "provided_by_index", "provided_by_name", "guessed"
 55]
 56SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
 57
 58
 59class ExcelSheet:
 60    """A class representing a single sheet in an Excel File"""
 61
 62    def __init__(self, sheet: _ExcelSheet) -> None:
 63        self._sheet = sheet
 64
 65    @property
 66    def name(self) -> str:
 67        """The name of the sheet"""
 68        return self._sheet.name
 69
 70    @property
 71    def width(self) -> int:
 72        """The sheet's width"""
 73        return self._sheet.width
 74
 75    @property
 76    def height(self) -> int:
 77        """The sheet's height, with `skip_rows` and `nrows` applied"""
 78        return self._sheet.height
 79
 80    @property
 81    def total_height(self) -> int:
 82        """The sheet's total height"""
 83        return self._sheet.total_height
 84
 85    @property
 86    def selected_columns(self) -> list[ColumnInfo]:
 87        """The sheet's selected columns"""
 88        return self._sheet.selected_columns
 89
 90    def available_columns(self) -> list[ColumnInfo]:
 91        """The columns available for the given sheet"""
 92        return self._sheet.available_columns()
 93
 94    @property
 95    def specified_dtypes(self) -> DTypeMap | None:
 96        """The dtypes specified for the sheet"""
 97        return self._sheet.specified_dtypes
 98
 99    @property
100    def visible(self) -> SheetVisible:
101        """The visibility of the sheet"""
102        return self._sheet.visible
103
104    def to_arrow(self) -> "pa.RecordBatch":
105        """Converts the sheet to a pyarrow `RecordBatch`
106
107        Requires the `pyarrow` extra to be installed.
108        """
109        if not _PYARROW_AVAILABLE:
110            raise ImportError(
111                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
112            )
113        return self._sheet.to_arrow()
114
115    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
116        """Converts the sheet to a pyarrow `RecordBatch` with error information.
117
118        Stores the positions of any values that cannot be parsed as the specified type and were
119        therefore converted to None.
120
121        Requires the `pyarrow` extra to be installed.
122        """
123        if not _PYARROW_AVAILABLE:
124            raise ImportError(
125                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
126            )
127        rb, cell_errors = self._sheet.to_arrow_with_errors()
128        if not cell_errors.errors:
129            return (rb, None)
130        return (rb, cell_errors)
131
132    def to_pandas(self) -> "pd.DataFrame":
133        """Converts the sheet to a Pandas `DataFrame`.
134
135        Requires the `pandas` extra to be installed.
136        """
137        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
138        # which we don't implement. Using pyarrow conversion for now.
139        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
140        return self.to_arrow().to_pandas()
141
142    def to_polars(self) -> "pl.DataFrame":
143        """Converts the sheet to a Polars `DataFrame`.
144
145        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
146        Requires the `polars` extra to be installed.
147        """
148        import polars as pl
149
150        return pl.DataFrame(self)
151
152    def __arrow_c_schema__(self) -> object:
153        """Export the schema as an `ArrowSchema` `PyCapsule`.
154
155        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
156
157        The Arrow PyCapsule Interface enables zero-copy data exchange with
158        Arrow-compatible libraries without requiring PyArrow as a dependency.
159        """
160        return self._sheet.__arrow_c_schema__()
161
162    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
163        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
164
165        The optional `requested_schema` parameter allows for potential schema conversion.
166
167        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
168
169        The Arrow PyCapsule Interface enables zero-copy data exchange with
170        Arrow-compatible libraries without requiring PyArrow as a dependency.
171        """
172        return self._sheet.__arrow_c_array__(requested_schema)
173
174    def __repr__(self) -> str:
175        return self._sheet.__repr__()
176
177
178class ExcelTable:
179    """A class representing a single table in an Excel file"""
180
181    def __init__(self, table: _ExcelTable) -> None:
182        self._table = table
183
184    @property
185    def name(self) -> str:
186        """The name of the table"""
187        return self._table.name
188
189    @property
190    def sheet_name(self) -> str:
191        """The name of the sheet this table belongs to"""
192        return self._table.sheet_name
193
194    @property
195    def width(self) -> int:
196        """The table's width"""
197        return self._table.width
198
199    @property
200    def height(self) -> int:
201        """The table's height"""
202        return self._table.height
203
204    @property
205    def total_height(self) -> int:
206        """The table's total height"""
207        return self._table.total_height
208
209    @property
210    def offset(self) -> int:
211        """The table's offset before data starts"""
212        return self._table.offset
213
214    @property
215    def selected_columns(self) -> list[ColumnInfo]:
216        """The table's selected columns"""
217        return self._table.selected_columns
218
219    def available_columns(self) -> list[ColumnInfo]:
220        """The columns available for the given table"""
221        return self._table.available_columns()
222
223    @property
224    def specified_dtypes(self) -> DTypeMap | None:
225        """The dtypes specified for the table"""
226        return self._table.specified_dtypes
227
228    def to_arrow(self) -> "pa.RecordBatch":
229        """Converts the table to a pyarrow `RecordBatch`
230
231        Requires the `pyarrow` extra to be installed.
232        """
233        if not _PYARROW_AVAILABLE:
234            raise ImportError(
235                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
236            )
237        return self._table.to_arrow()
238
239    def to_pandas(self) -> "pd.DataFrame":
240        """Converts the table to a Pandas `DataFrame`.
241
242        Requires the `pandas` extra to be installed.
243        """
244        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
245        # which we don't implement. Using pyarrow conversion for now.
246        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
247        return self.to_arrow().to_pandas()
248
249    def to_polars(self) -> "pl.DataFrame":
250        """Converts the table to a Polars `DataFrame`.
251
252        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
253        Requires the `polars` extra to be installed.
254        """
255        import polars as pl
256
257        return pl.DataFrame(self)
258
259    def __arrow_c_schema__(self) -> object:
260        """Export the schema as an `ArrowSchema` `PyCapsule`.
261
262        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
263
264        The Arrow PyCapsule Interface enables zero-copy data exchange with
265        Arrow-compatible libraries without requiring PyArrow as a dependency.
266        """
267        return self._table.__arrow_c_schema__()
268
269    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
270        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
271
272        The optional `requested_schema` parameter allows for potential schema conversion.
273
274        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
275
276        The Arrow PyCapsule Interface enables zero-copy data exchange with
277        Arrow-compatible libraries without requiring PyArrow as a dependency.
278        """
279        return self._table.__arrow_c_array__(requested_schema)
280
281
282class ExcelReader:
283    """A class representing an open Excel file and allowing to read its sheets"""
284
285    def __init__(self, reader: _ExcelReader) -> None:
286        self._reader = reader
287
288    @property
289    def sheet_names(self) -> list[str]:
290        """The list of sheet names"""
291        return self._reader.sheet_names
292
293    @typing.overload
294    def load_sheet(
295        self,
296        idx_or_name: int | str,
297        *,
298        header_row: int | None = 0,
299        column_names: list[str] | None = None,
300        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
301        n_rows: int | None = None,
302        schema_sample_rows: int | None = 1_000,
303        dtype_coercion: Literal["coerce", "strict"] = "coerce",
304        use_columns: list[str]
305        | list[int]
306        | str
307        | Callable[[ColumnInfoNoDtype], bool]
308        | None = None,
309        dtypes: DType | DTypeMap | None = None,
310        eager: Literal[False] = ...,
311    ) -> ExcelSheet: ...
312
313    @typing.overload
314    def load_sheet(
315        self,
316        idx_or_name: int | str,
317        *,
318        header_row: int | None = 0,
319        column_names: list[str] | None = None,
320        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
321        n_rows: int | None = None,
322        schema_sample_rows: int | None = 1_000,
323        dtype_coercion: Literal["coerce", "strict"] = "coerce",
324        use_columns: list[str]
325        | list[int]
326        | str
327        | Callable[[ColumnInfoNoDtype], bool]
328        | None = None,
329        dtypes: DType | DTypeMap | None = None,
330        eager: Literal[True] = ...,
331    ) -> "pa.RecordBatch": ...
332
333    def load_sheet(
334        self,
335        idx_or_name: int | str,
336        *,
337        header_row: int | None = 0,
338        column_names: list[str] | None = None,
339        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
340        n_rows: int | None = None,
341        schema_sample_rows: int | None = 1_000,
342        dtype_coercion: Literal["coerce", "strict"] = "coerce",
343        use_columns: list[str]
344        | list[int]
345        | str
346        | Callable[[ColumnInfoNoDtype], bool]
347        | None = None,
348        dtypes: DType | DTypeMap | None = None,
349        eager: bool = False,
350    ) -> "ExcelSheet | pa.RecordBatch":
351        """Loads a sheet by index or name.
352
353        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
354        :param header_row: The index of the row containing the column labels, default index is 0.
355                           If `None`, the sheet does not have any column labels.
356                           Any rows before the `header_row` will be automatically skipped.
357        :param column_names: Overrides headers found in the document.
358                             If `column_names` is used, `header_row` will be ignored.
359        :param n_rows: Specifies how many rows should be loaded.
360                       If `None`, all rows are loaded
361        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
362                          Any rows before the `header_row` are automatically skipped.
363                          It means row indices are relative to data rows, not the sheet!
364                          Can be one of:
365                          - `int`: Skip this many rows after the header row
366                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
367                          - `Callable[[int], bool]`: Function that receives row index (0-based
368                          relative to data rows) and returns True to skip the row
369                          - `None`: If `header_row` is None, skips empty rows at beginning
370        :param schema_sample_rows: Specifies how many rows should be used to determine
371                                   the dtype of a column. Cannot be 0. A specific dtype can be
372                                   enforced for some or all columns through the `dtypes` parameter.
373                                   If `None`, all rows will be used.
374        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
375                               will try to coerce different dtypes in a column to the same one,
376                               whereas `strict` will raise an error in case a column contains
377                               several dtypes. Note that this only applies to columns whose dtype
378                               is guessed, i.e. not specified via `dtypes`.
379        :param use_columns: Specifies the columns to use. Can either be:
380                            - `None` to select all columns
381                            - A list of strings and ints, the column names and/or indices
382                              (starting at 0)
383                            - A string, a comma separated list of Excel column letters and column
384                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
385                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
386                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
387                              ranges (e.g. `":C"` to select columns from A to C). These can be
388                              combined for "except" patterns (e.g. `":C,E:"` to select everything
389                              except column D)
390                            - A callable, a function that takes a column and returns a boolean
391                              indicating whether the column should be used
392        :param dtypes: An optional dtype (for all columns)
393                       or dict of dtypes with keys as column indices or names.
394        :param eager: Specifies whether the sheet should be loaded eagerly.
395                      `False` (default) will load the sheet lazily using the `PyCapsule` interface,
396                      whereas `True` will load it eagerly via `pyarrow`.
397
398                      Eager loading requires the `pyarrow` extra to be installed.
399        """
400        sheet_or_rb = self._reader.load_sheet(
401            idx_or_name=idx_or_name,
402            header_row=header_row,
403            column_names=column_names,
404            skip_rows=skip_rows,
405            n_rows=n_rows,
406            schema_sample_rows=schema_sample_rows,
407            dtype_coercion=dtype_coercion,
408            use_columns=use_columns,
409            dtypes=dtypes,
410            eager=eager,
411        )
412        return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)
413
414    def table_names(self, sheet_name: str | None = None) -> list[str]:
415        """The list of table names.
416
417        Will return an empty list if no tables are found.
418
419        :param sheet_name: If given, will limit the list to the given sheet, will be faster
420        too.
421        """
422        return self._reader.table_names(sheet_name)
423
424    def defined_names(self) -> list[DefinedName]:
425        """The list of defined names (named ranges) in the workbook.
426
427        Returns a list of DefinedName objects with 'name' and 'formula' attributes.
428        The formula is a string representation of the range or expression.
429
430        Will return an empty list if no defined names are found.
431        """
432        return self._reader.defined_names()
433
434    @typing.overload
435    def load_table(
436        self,
437        name: str,
438        *,
439        header_row: int | None = None,
440        column_names: list[str] | None = None,
441        skip_rows: int | None = None,
442        n_rows: int | None = None,
443        schema_sample_rows: int | None = 1_000,
444        dtype_coercion: Literal["coerce", "strict"] = "coerce",
445        use_columns: list[str]
446        | list[int]
447        | str
448        | Callable[[ColumnInfoNoDtype], bool]
449        | None = None,
450        dtypes: DType | DTypeMap | None = None,
451        eager: Literal[False] = ...,
452    ) -> ExcelTable: ...
453
454    @typing.overload
455    def load_table(
456        self,
457        name: str,
458        *,
459        header_row: int | None = None,
460        column_names: list[str] | None = None,
461        skip_rows: int | None = None,
462        n_rows: int | None = None,
463        schema_sample_rows: int | None = 1_000,
464        dtype_coercion: Literal["coerce", "strict"] = "coerce",
465        use_columns: list[str]
466        | list[int]
467        | str
468        | Callable[[ColumnInfoNoDtype], bool]
469        | None = None,
470        dtypes: DType | DTypeMap | None = None,
471        eager: Literal[True] = ...,
472    ) -> "pa.RecordBatch": ...
473
474    def load_table(
475        self,
476        name: str,
477        *,
478        header_row: int | None = None,
479        column_names: list[str] | None = None,
480        skip_rows: int | None = None,
481        n_rows: int | None = None,
482        schema_sample_rows: int | None = 1_000,
483        dtype_coercion: Literal["coerce", "strict"] = "coerce",
484        use_columns: list[str]
485        | list[int]
486        | str
487        | Callable[[ColumnInfoNoDtype], bool]
488        | None = None,
489        dtypes: DType | DTypeMap | None = None,
490        eager: bool = False,
491    ) -> "ExcelTable | pa.RecordBatch":
492        """Loads a table by name.
493
494        :param name: The name of the table to load.
495        :param header_row: The index of the row containing the column labels.
496                           If `None`, the table's column names will be used.
497                           Any rows before the `header_row` will be automatically skipped.
498        :param column_names: Overrides headers found in the document.
499                             If `column_names` is used, `header_row` will be ignored.
500        :param n_rows: Specifies how many rows should be loaded.
501                       If `None`, all rows are loaded
502        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
503                          Any rows before the `header_row` are automatically skipped.
504                          If `header_row` is `None`, it skips the number of rows from the
505                          start of the sheet.
506        :param schema_sample_rows: Specifies how many rows should be used to determine
507                                   the dtype of a column. Cannot be 0. A specific dtype can be
508                                   enforced for some or all columns through the `dtypes` parameter.
509                                   If `None`, all rows will be used.
510        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
511                               will try to coerce different dtypes in a column to the same one,
512                               whereas `strict` will raise an error in case a column contains
513                               several dtypes. Note that this only applies to columns whose dtype
514                               is guessed, i.e. not specified via `dtypes`.
515        :param use_columns: Specifies the columns to use. Can either be:
516                            - `None` to select all columns
517                            - A list of strings and ints, the column names and/or indices
518                              (starting at 0)
519                            - A string, a comma separated list of Excel column letters and column
520                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
521                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
522                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
523                              ranges (e.g. `":C"` to select columns from A to C). These can be
524                              combined for "except" patterns (e.g. `":C,E:"` to select everything
525                              except column D)
526                            - A callable, a function that takes a column and returns a boolean
527                              indicating whether the column should be used
528        :param dtypes: An optional dtype (for all columns)
529                       or dict of dtypes with keys as column indices or names.
530        :param eager: Specifies whether the table should be loaded eagerly.
531                      `False` (default) will load the table lazily using the `PyCapsule` interface,
532                      whereas `True` will load it eagerly via `pyarrow`.
533
534                      Eager loading requires the `pyarrow` extra to be installed.
535        """
536        if eager:
537            return self._reader.load_table(
538                name=name,
539                header_row=header_row,
540                column_names=column_names,
541                skip_rows=skip_rows,
542                n_rows=n_rows,
543                schema_sample_rows=schema_sample_rows,
544                dtype_coercion=dtype_coercion,
545                use_columns=use_columns,
546                dtypes=dtypes,
547                eager=True,
548            )
549        else:
550            return ExcelTable(
551                self._reader.load_table(
552                    name=name,
553                    header_row=header_row,
554                    column_names=column_names,
555                    skip_rows=skip_rows,
556                    n_rows=n_rows,
557                    schema_sample_rows=schema_sample_rows,
558                    dtype_coercion=dtype_coercion,
559                    use_columns=use_columns,
560                    dtypes=dtypes,
561                    eager=False,
562                )
563            )
564
565    def load_sheet_eager(
566        self,
567        idx_or_name: int | str,
568        *,
569        header_row: int | None = 0,
570        column_names: list[str] | None = None,
571        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
572        n_rows: int | None = None,
573        schema_sample_rows: int | None = 1_000,
574        dtype_coercion: Literal["coerce", "strict"] = "coerce",
575        use_columns: list[str] | list[int] | str | None = None,
576        dtypes: DType | DTypeMap | None = None,
577    ) -> "pa.RecordBatch":
578        """Loads a sheet eagerly by index or name.
579
580        For xlsx files, this will be faster and more memory-efficient, as it will use
581        `worksheet_range_ref` under the hood, which returns borrowed types.
582
583        Refer to `load_sheet` for parameter documentation
584
585        Requires the `pyarrow` extra to be installed.
586        """
587        return self._reader.load_sheet(
588            idx_or_name=idx_or_name,
589            header_row=header_row,
590            column_names=column_names,
591            skip_rows=skip_rows,
592            n_rows=n_rows,
593            schema_sample_rows=schema_sample_rows,
594            dtype_coercion=dtype_coercion,
595            use_columns=use_columns,
596            dtypes=dtypes,
597            eager=True,
598        )
599
600    def load_sheet_by_name(
601        self,
602        name: str,
603        *,
604        header_row: int | None = 0,
605        column_names: list[str] | None = None,
606        skip_rows: int | None = None,
607        n_rows: int | None = None,
608        schema_sample_rows: int | None = 1_000,
609        dtype_coercion: Literal["coerce", "strict"] = "coerce",
610        use_columns: list[str]
611        | list[int]
612        | str
613        | Callable[[ColumnInfoNoDtype], bool]
614        | None = None,
615        dtypes: DType | DTypeMap | None = None,
616    ) -> ExcelSheet:
617        """Loads a sheet by name.
618
619        Refer to `load_sheet` for parameter documentation
620        """
621        return self.load_sheet(
622            name,
623            header_row=header_row,
624            column_names=column_names,
625            skip_rows=skip_rows,
626            n_rows=n_rows,
627            schema_sample_rows=schema_sample_rows,
628            dtype_coercion=dtype_coercion,
629            use_columns=use_columns,
630            dtypes=dtypes,
631        )
632
633    def load_sheet_by_idx(
634        self,
635        idx: int,
636        *,
637        header_row: int | None = 0,
638        column_names: list[str] | None = None,
639        skip_rows: int | None = None,
640        n_rows: int | None = None,
641        schema_sample_rows: int | None = 1_000,
642        dtype_coercion: Literal["coerce", "strict"] = "coerce",
643        use_columns: list[str]
644        | list[int]
645        | str
646        | Callable[[ColumnInfoNoDtype], bool]
647        | None = None,
648        dtypes: DType | DTypeMap | None = None,
649    ) -> ExcelSheet:
650        """Loads a sheet by index.
651
652        Refer to `load_sheet` for parameter documentation
653        """
654        return self.load_sheet(
655            idx,
656            header_row=header_row,
657            column_names=column_names,
658            skip_rows=skip_rows,
659            n_rows=n_rows,
660            schema_sample_rows=schema_sample_rows,
661            dtype_coercion=dtype_coercion,
662            use_columns=use_columns,
663            dtypes=dtypes,
664        )
665
666    def __repr__(self) -> str:
667        return self._reader.__repr__()
668
669
670def read_excel(source: Path | str | bytes) -> ExcelReader:
671    """Opens and loads an excel file.
672
673    :param source: The path to a file or its content as bytes
674    """
675    if isinstance(source, (str, Path)):
676        source = expanduser(source)
677    return ExcelReader(_read_excel(source))
678
679
680__all__ = (
681    # version
682    "__version__",
683    # main entrypoint
684    "read_excel",
685    # Python types
686    "DType",
687    "DTypeMap",
688    # Excel reader
689    "ExcelReader",
690    # Excel sheet
691    "ExcelSheet",
692    # Excel table
693    "ExcelTable",
694    # Column metadata
695    "DTypeFrom",
696    "ColumnNameFrom",
697    "ColumnInfo",
698    # Defined names
699    "DefinedName",
700    # Parse error information
701    "CellError",
702    "CellErrors",
703    # Exceptions
704    "FastExcelError",
705    "CannotRetrieveCellDataError",
706    "CalamineCellError",
707    "CalamineError",
708    "SheetNotFoundError",
709    "ColumnNotFoundError",
710    "ArrowError",
711    "InvalidParametersError",
712    "UnsupportedColumnTypeCombinationError",
713)
__version__ = '0.16.0'
def read_excel(source: pathlib.Path | str | bytes) -> ExcelReader:
671def read_excel(source: Path | str | bytes) -> ExcelReader:
672    """Opens and loads an excel file.
673
674    :param source: The path to a file or its content as bytes
675    """
676    if isinstance(source, (str, Path)):
677        source = expanduser(source)
678    return ExcelReader(_read_excel(source))

Opens and loads an excel file.

Parameters
  • source: The path to a file or its content as bytes
DType = typing.Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']
DTypeMap: TypeAlias = 'dict[str | int, DType]'
class ExcelReader:
283class ExcelReader:
284    """A class representing an open Excel file and allowing to read its sheets"""
285
286    def __init__(self, reader: _ExcelReader) -> None:
287        self._reader = reader
288
289    @property
290    def sheet_names(self) -> list[str]:
291        """The list of sheet names"""
292        return self._reader.sheet_names
293
294    @typing.overload
295    def load_sheet(
296        self,
297        idx_or_name: int | str,
298        *,
299        header_row: int | None = 0,
300        column_names: list[str] | None = None,
301        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
302        n_rows: int | None = None,
303        schema_sample_rows: int | None = 1_000,
304        dtype_coercion: Literal["coerce", "strict"] = "coerce",
305        use_columns: list[str]
306        | list[int]
307        | str
308        | Callable[[ColumnInfoNoDtype], bool]
309        | None = None,
310        dtypes: DType | DTypeMap | None = None,
311        eager: Literal[False] = ...,
312    ) -> ExcelSheet: ...
313
314    @typing.overload
315    def load_sheet(
316        self,
317        idx_or_name: int | str,
318        *,
319        header_row: int | None = 0,
320        column_names: list[str] | None = None,
321        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
322        n_rows: int | None = None,
323        schema_sample_rows: int | None = 1_000,
324        dtype_coercion: Literal["coerce", "strict"] = "coerce",
325        use_columns: list[str]
326        | list[int]
327        | str
328        | Callable[[ColumnInfoNoDtype], bool]
329        | None = None,
330        dtypes: DType | DTypeMap | None = None,
331        eager: Literal[True] = ...,
332    ) -> "pa.RecordBatch": ...
333
334    def load_sheet(
335        self,
336        idx_or_name: int | str,
337        *,
338        header_row: int | None = 0,
339        column_names: list[str] | None = None,
340        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
341        n_rows: int | None = None,
342        schema_sample_rows: int | None = 1_000,
343        dtype_coercion: Literal["coerce", "strict"] = "coerce",
344        use_columns: list[str]
345        | list[int]
346        | str
347        | Callable[[ColumnInfoNoDtype], bool]
348        | None = None,
349        dtypes: DType | DTypeMap | None = None,
350        eager: bool = False,
351    ) -> "ExcelSheet | pa.RecordBatch":
352        """Loads a sheet by index or name.
353
354        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
355        :param header_row: The index of the row containing the column labels, default index is 0.
356                           If `None`, the sheet does not have any column labels.
357                           Any rows before the `header_row` will be automatically skipped.
358        :param column_names: Overrides headers found in the document.
359                             If `column_names` is used, `header_row` will be ignored.
360        :param n_rows: Specifies how many rows should be loaded.
361                       If `None`, all rows are loaded
362        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
363                          Any rows before the `header_row` are automatically skipped.
364                          It means row indices are relative to data rows, not the sheet!
365                          Can be one of:
366                          - `int`: Skip this many rows after the header row
367                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
368                          - `Callable[[int], bool]`: Function that receives row index (0-based
369                          relative to data rows) and returns True to skip the row
370                          - `None`: If `header_row` is None, skips empty rows at beginning
371        :param schema_sample_rows: Specifies how many rows should be used to determine
372                                   the dtype of a column. Cannot be 0. A specific dtype can be
373                                   enforced for some or all columns through the `dtypes` parameter.
374                                   If `None`, all rows will be used.
375        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
376                               will try to coerce different dtypes in a column to the same one,
377                               whereas `strict` will raise an error in case a column contains
378                               several dtypes. Note that this only applies to columns whose dtype
379                               is guessed, i.e. not specified via `dtypes`.
380        :param use_columns: Specifies the columns to use. Can either be:
381                            - `None` to select all columns
382                            - A list of strings and ints, the column names and/or indices
383                              (starting at 0)
384                            - A string, a comma separated list of Excel column letters and column
385                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
386                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
387                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
388                              ranges (e.g. `":C"` to select columns from A to C). These can be
389                              combined for "except" patterns (e.g. `":C,E:"` to select everything
390                              except column D)
391                            - A callable, a function that takes a column and returns a boolean
392                              indicating whether the column should be used
393        :param dtypes: An optional dtype (for all columns)
394                       or dict of dtypes with keys as column indices or names.
395        :param eager: Specifies whether the sheet should be loaded eagerly.
396                      `False` (default) will load the sheet lazily using the `PyCapsule` interface,
397                      whereas `True` will load it eagerly via `pyarrow`.
398
399                      Eager loading requires the `pyarrow` extra to be installed.
400        """
401        sheet_or_rb = self._reader.load_sheet(
402            idx_or_name=idx_or_name,
403            header_row=header_row,
404            column_names=column_names,
405            skip_rows=skip_rows,
406            n_rows=n_rows,
407            schema_sample_rows=schema_sample_rows,
408            dtype_coercion=dtype_coercion,
409            use_columns=use_columns,
410            dtypes=dtypes,
411            eager=eager,
412        )
413        return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)
414
415    def table_names(self, sheet_name: str | None = None) -> list[str]:
416        """The list of table names.
417
418        Will return an empty list if no tables are found.
419
420        :param sheet_name: If given, will limit the list to the given sheet, will be faster
421        too.
422        """
423        return self._reader.table_names(sheet_name)
424
425    def defined_names(self) -> list[DefinedName]:
426        """The list of defined names (named ranges) in the workbook.
427
428        Returns a list of DefinedName objects with 'name' and 'formula' attributes.
429        The formula is a string representation of the range or expression.
430
431        Will return an empty list if no defined names are found.
432        """
433        return self._reader.defined_names()
434
435    @typing.overload
436    def load_table(
437        self,
438        name: str,
439        *,
440        header_row: int | None = None,
441        column_names: list[str] | None = None,
442        skip_rows: int | None = None,
443        n_rows: int | None = None,
444        schema_sample_rows: int | None = 1_000,
445        dtype_coercion: Literal["coerce", "strict"] = "coerce",
446        use_columns: list[str]
447        | list[int]
448        | str
449        | Callable[[ColumnInfoNoDtype], bool]
450        | None = None,
451        dtypes: DType | DTypeMap | None = None,
452        eager: Literal[False] = ...,
453    ) -> ExcelTable: ...
454
455    @typing.overload
456    def load_table(
457        self,
458        name: str,
459        *,
460        header_row: int | None = None,
461        column_names: list[str] | None = None,
462        skip_rows: int | None = None,
463        n_rows: int | None = None,
464        schema_sample_rows: int | None = 1_000,
465        dtype_coercion: Literal["coerce", "strict"] = "coerce",
466        use_columns: list[str]
467        | list[int]
468        | str
469        | Callable[[ColumnInfoNoDtype], bool]
470        | None = None,
471        dtypes: DType | DTypeMap | None = None,
472        eager: Literal[True] = ...,
473    ) -> "pa.RecordBatch": ...
474
475    def load_table(
476        self,
477        name: str,
478        *,
479        header_row: int | None = None,
480        column_names: list[str] | None = None,
481        skip_rows: int | None = None,
482        n_rows: int | None = None,
483        schema_sample_rows: int | None = 1_000,
484        dtype_coercion: Literal["coerce", "strict"] = "coerce",
485        use_columns: list[str]
486        | list[int]
487        | str
488        | Callable[[ColumnInfoNoDtype], bool]
489        | None = None,
490        dtypes: DType | DTypeMap | None = None,
491        eager: bool = False,
492    ) -> "ExcelTable | pa.RecordBatch":
493        """Loads a table by name.
494
495        :param name: The name of the table to load.
496        :param header_row: The index of the row containing the column labels.
497                           If `None`, the table's column names will be used.
498                           Any rows before the `header_row` will be automatically skipped.
499        :param column_names: Overrides headers found in the document.
500                             If `column_names` is used, `header_row` will be ignored.
501        :param n_rows: Specifies how many rows should be loaded.
502                       If `None`, all rows are loaded
503        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
504                          Any rows before the `header_row` are automatically skipped.
505                          If `header_row` is `None`, it skips the number of rows from the
506                          start of the sheet.
507        :param schema_sample_rows: Specifies how many rows should be used to determine
508                                   the dtype of a column. Cannot be 0. A specific dtype can be
509                                   enforced for some or all columns through the `dtypes` parameter.
510                                   If `None`, all rows will be used.
511        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
512                               will try to coerce different dtypes in a column to the same one,
513                               whereas `strict` will raise an error in case a column contains
514                               several dtypes. Note that this only applies to columns whose dtype
515                               is guessed, i.e. not specified via `dtypes`.
516        :param use_columns: Specifies the columns to use. Can either be:
517                            - `None` to select all columns
518                            - A list of strings and ints, the column names and/or indices
519                              (starting at 0)
520                            - A string, a comma separated list of Excel column letters and column
521                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
522                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
523                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
524                              ranges (e.g. `":C"` to select columns from A to C). These can be
525                              combined for "except" patterns (e.g. `":C,E:"` to select everything
526                              except column D)
527                            - A callable, a function that takes a column and returns a boolean
528                              indicating whether the column should be used
529        :param dtypes: An optional dtype (for all columns)
530                       or dict of dtypes with keys as column indices or names.
531        :param eager: Specifies whether the table should be loaded eagerly.
532                      `False` (default) will load the table lazily using the `PyCapsule` interface,
533                      whereas `True` will load it eagerly via `pyarrow`.
534
535                      Eager loading requires the `pyarrow` extra to be installed.
536        """
537        if eager:
538            return self._reader.load_table(
539                name=name,
540                header_row=header_row,
541                column_names=column_names,
542                skip_rows=skip_rows,
543                n_rows=n_rows,
544                schema_sample_rows=schema_sample_rows,
545                dtype_coercion=dtype_coercion,
546                use_columns=use_columns,
547                dtypes=dtypes,
548                eager=True,
549            )
550        else:
551            return ExcelTable(
552                self._reader.load_table(
553                    name=name,
554                    header_row=header_row,
555                    column_names=column_names,
556                    skip_rows=skip_rows,
557                    n_rows=n_rows,
558                    schema_sample_rows=schema_sample_rows,
559                    dtype_coercion=dtype_coercion,
560                    use_columns=use_columns,
561                    dtypes=dtypes,
562                    eager=False,
563                )
564            )
565
566    def load_sheet_eager(
567        self,
568        idx_or_name: int | str,
569        *,
570        header_row: int | None = 0,
571        column_names: list[str] | None = None,
572        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
573        n_rows: int | None = None,
574        schema_sample_rows: int | None = 1_000,
575        dtype_coercion: Literal["coerce", "strict"] = "coerce",
576        use_columns: list[str] | list[int] | str | None = None,
577        dtypes: DType | DTypeMap | None = None,
578    ) -> "pa.RecordBatch":
579        """Loads a sheet eagerly by index or name.
580
581        For xlsx files, this will be faster and more memory-efficient, as it will use
582        `worksheet_range_ref` under the hood, which returns borrowed types.
583
584        Refer to `load_sheet` for parameter documentation
585
586        Requires the `pyarrow` extra to be installed.
587        """
588        return self._reader.load_sheet(
589            idx_or_name=idx_or_name,
590            header_row=header_row,
591            column_names=column_names,
592            skip_rows=skip_rows,
593            n_rows=n_rows,
594            schema_sample_rows=schema_sample_rows,
595            dtype_coercion=dtype_coercion,
596            use_columns=use_columns,
597            dtypes=dtypes,
598            eager=True,
599        )
600
601    def load_sheet_by_name(
602        self,
603        name: str,
604        *,
605        header_row: int | None = 0,
606        column_names: list[str] | None = None,
607        skip_rows: int | None = None,
608        n_rows: int | None = None,
609        schema_sample_rows: int | None = 1_000,
610        dtype_coercion: Literal["coerce", "strict"] = "coerce",
611        use_columns: list[str]
612        | list[int]
613        | str
614        | Callable[[ColumnInfoNoDtype], bool]
615        | None = None,
616        dtypes: DType | DTypeMap | None = None,
617    ) -> ExcelSheet:
618        """Loads a sheet by name.
619
620        Refer to `load_sheet` for parameter documentation
621        """
622        return self.load_sheet(
623            name,
624            header_row=header_row,
625            column_names=column_names,
626            skip_rows=skip_rows,
627            n_rows=n_rows,
628            schema_sample_rows=schema_sample_rows,
629            dtype_coercion=dtype_coercion,
630            use_columns=use_columns,
631            dtypes=dtypes,
632        )
633
634    def load_sheet_by_idx(
635        self,
636        idx: int,
637        *,
638        header_row: int | None = 0,
639        column_names: list[str] | None = None,
640        skip_rows: int | None = None,
641        n_rows: int | None = None,
642        schema_sample_rows: int | None = 1_000,
643        dtype_coercion: Literal["coerce", "strict"] = "coerce",
644        use_columns: list[str]
645        | list[int]
646        | str
647        | Callable[[ColumnInfoNoDtype], bool]
648        | None = None,
649        dtypes: DType | DTypeMap | None = None,
650    ) -> ExcelSheet:
651        """Loads a sheet by index.
652
653        Refer to `load_sheet` for parameter documentation
654        """
655        return self.load_sheet(
656            idx,
657            header_row=header_row,
658            column_names=column_names,
659            skip_rows=skip_rows,
660            n_rows=n_rows,
661            schema_sample_rows=schema_sample_rows,
662            dtype_coercion=dtype_coercion,
663            use_columns=use_columns,
664            dtypes=dtypes,
665        )
666
667    def __repr__(self) -> str:
668        return self._reader.__repr__()

A class representing an open Excel file and allowing to read its sheets

ExcelReader(reader: _ExcelReader)
286    def __init__(self, reader: _ExcelReader) -> None:
287        self._reader = reader
sheet_names: list[str]
289    @property
290    def sheet_names(self) -> list[str]:
291        """The list of sheet names"""
292        return self._reader.sheet_names

The list of sheet names

def load_sheet( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: Union[int, list[int], Callable[[int], bool], NoneType] = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> ExcelSheet | pyarrow.lib.RecordBatch:
334    def load_sheet(
335        self,
336        idx_or_name: int | str,
337        *,
338        header_row: int | None = 0,
339        column_names: list[str] | None = None,
340        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
341        n_rows: int | None = None,
342        schema_sample_rows: int | None = 1_000,
343        dtype_coercion: Literal["coerce", "strict"] = "coerce",
344        use_columns: list[str]
345        | list[int]
346        | str
347        | Callable[[ColumnInfoNoDtype], bool]
348        | None = None,
349        dtypes: DType | DTypeMap | None = None,
350        eager: bool = False,
351    ) -> "ExcelSheet | pa.RecordBatch":
352        """Loads a sheet by index or name.
353
354        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
355        :param header_row: The index of the row containing the column labels, default index is 0.
356                           If `None`, the sheet does not have any column labels.
357                           Any rows before the `header_row` will be automatically skipped.
358        :param column_names: Overrides headers found in the document.
359                             If `column_names` is used, `header_row` will be ignored.
360        :param n_rows: Specifies how many rows should be loaded.
361                       If `None`, all rows are loaded
362        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
363                          Any rows before the `header_row` are automatically skipped.
364                          It means row indices are relative to data rows, not the sheet!
365                          Can be one of:
366                          - `int`: Skip this many rows after the header row
367                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
368                          - `Callable[[int], bool]`: Function that receives row index (0-based
369                          relative to data rows) and returns True to skip the row
370                          - `None`: If `header_row` is None, skips empty rows at beginning
371        :param schema_sample_rows: Specifies how many rows should be used to determine
372                                   the dtype of a column. Cannot be 0. A specific dtype can be
373                                   enforced for some or all columns through the `dtypes` parameter.
374                                   If `None`, all rows will be used.
375        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
376                               will try to coerce different dtypes in a column to the same one,
377                               whereas `strict` will raise an error in case a column contains
378                               several dtypes. Note that this only applies to columns whose dtype
379                               is guessed, i.e. not specified via `dtypes`.
380        :param use_columns: Specifies the columns to use. Can either be:
381                            - `None` to select all columns
382                            - A list of strings and ints, the column names and/or indices
383                              (starting at 0)
384                            - A string, a comma separated list of Excel column letters and column
385                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
386                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
387                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
388                              ranges (e.g. `":C"` to select columns from A to C). These can be
389                              combined for "except" patterns (e.g. `":C,E:"` to select everything
390                              except column D)
391                            - A callable, a function that takes a column and returns a boolean
392                              indicating whether the column should be used
393        :param dtypes: An optional dtype (for all columns)
394                       or dict of dtypes with keys as column indices or names.
395        :param eager: Specifies whether the sheet should be loaded eagerly.
396                      `False` (default) will load the sheet lazily using the `PyCapsule` interface,
397                      whereas `True` will load it eagerly via `pyarrow`.
398
399                      Eager loading requires the `pyarrow` extra to be installed.
400        """
401        sheet_or_rb = self._reader.load_sheet(
402            idx_or_name=idx_or_name,
403            header_row=header_row,
404            column_names=column_names,
405            skip_rows=skip_rows,
406            n_rows=n_rows,
407            schema_sample_rows=schema_sample_rows,
408            dtype_coercion=dtype_coercion,
409            use_columns=use_columns,
410            dtypes=dtypes,
411            eager=eager,
412        )
413        return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)

Loads a sheet by index or name.

Parameters
  • idx_or_name: The index (starting at 0) or the name of the sheet to load.
  • header_row: The index of the row containing the column labels, default index is 0. If None, the sheet does not have any column labels. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies which rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. It means row indices are relative to data rows, not the sheet! Can be one of:
    • int: Skip this many rows after the header row
    • list[int]: Skip specific row indices (0-based relative to data rows)
    • Callable[[int], bool]: Function that receives row index (0-based relative to data rows) and returns True to skip the row
    • None: If header_row is None, skips empty rows at beginning
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F", which would result in A,B,C,D,E and A,C,E,F). Also supports open-ended ranges (e.g. "B:" to select all columns from B onwards) and from-beginning ranges (e.g. ":C" to select columns from A to C). These can be combined for "except" patterns (e.g. ":C,E:" to select everything except column D)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
  • eager: Specifies whether the sheet should be loaded eagerly. False (default) will load the sheet lazily using the PyCapsule interface, whereas True will load it eagerly via pyarrow.

          Eager loading requires the `pyarrow` extra to be installed.
    
def table_names(self, sheet_name: str | None = None) -> list[str]:
415    def table_names(self, sheet_name: str | None = None) -> list[str]:
416        """The list of table names.
417
418        Will return an empty list if no tables are found.
419
420        :param sheet_name: If given, will limit the list to the given sheet, will be faster
421        too.
422        """
423        return self._reader.table_names(sheet_name)

The list of table names.

Will return an empty list if no tables are found.

Parameters
  • sheet_name: If given, will limit the list to the given sheet, will be faster too.
def defined_names(self) -> list[DefinedName]:
425    def defined_names(self) -> list[DefinedName]:
426        """The list of defined names (named ranges) in the workbook.
427
428        Returns a list of DefinedName objects with 'name' and 'formula' attributes.
429        The formula is a string representation of the range or expression.
430
431        Will return an empty list if no defined names are found.
432        """
433        return self._reader.defined_names()

The list of defined names (named ranges) in the workbook.

Returns a list of DefinedName objects with 'name' and 'formula' attributes. The formula is a string representation of the range or expression.

Will return an empty list if no defined names are found.

def load_table( self, name: str, *, header_row: int | None = None, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> ExcelTable | pyarrow.lib.RecordBatch:
475    def load_table(
476        self,
477        name: str,
478        *,
479        header_row: int | None = None,
480        column_names: list[str] | None = None,
481        skip_rows: int | None = None,
482        n_rows: int | None = None,
483        schema_sample_rows: int | None = 1_000,
484        dtype_coercion: Literal["coerce", "strict"] = "coerce",
485        use_columns: list[str]
486        | list[int]
487        | str
488        | Callable[[ColumnInfoNoDtype], bool]
489        | None = None,
490        dtypes: DType | DTypeMap | None = None,
491        eager: bool = False,
492    ) -> "ExcelTable | pa.RecordBatch":
493        """Loads a table by name.
494
495        :param name: The name of the table to load.
496        :param header_row: The index of the row containing the column labels.
497                           If `None`, the table's column names will be used.
498                           Any rows before the `header_row` will be automatically skipped.
499        :param column_names: Overrides headers found in the document.
500                             If `column_names` is used, `header_row` will be ignored.
501        :param n_rows: Specifies how many rows should be loaded.
502                       If `None`, all rows are loaded
503        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
504                          Any rows before the `header_row` are automatically skipped.
505                          If `header_row` is `None`, it skips the number of rows from the
506                          start of the sheet.
507        :param schema_sample_rows: Specifies how many rows should be used to determine
508                                   the dtype of a column. Cannot be 0. A specific dtype can be
509                                   enforced for some or all columns through the `dtypes` parameter.
510                                   If `None`, all rows will be used.
511        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
512                               will try to coerce different dtypes in a column to the same one,
513                               whereas `strict` will raise an error in case a column contains
514                               several dtypes. Note that this only applies to columns whose dtype
515                               is guessed, i.e. not specified via `dtypes`.
516        :param use_columns: Specifies the columns to use. Can either be:
517                            - `None` to select all columns
518                            - A list of strings and ints, the column names and/or indices
519                              (starting at 0)
520                            - A string, a comma separated list of Excel column letters and column
521                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
522                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
523                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
524                              ranges (e.g. `":C"` to select columns from A to C). These can be
525                              combined for "except" patterns (e.g. `":C,E:"` to select everything
526                              except column D)
527                            - A callable, a function that takes a column and returns a boolean
528                              indicating whether the column should be used
529        :param dtypes: An optional dtype (for all columns)
530                       or dict of dtypes with keys as column indices or names.
531        :param eager: Specifies whether the table should be loaded eagerly.
532                      `False` (default) will load the table lazily using the `PyCapsule` interface,
533                      whereas `True` will load it eagerly via `pyarrow`.
534
535                      Eager loading requires the `pyarrow` extra to be installed.
536        """
537        if eager:
538            return self._reader.load_table(
539                name=name,
540                header_row=header_row,
541                column_names=column_names,
542                skip_rows=skip_rows,
543                n_rows=n_rows,
544                schema_sample_rows=schema_sample_rows,
545                dtype_coercion=dtype_coercion,
546                use_columns=use_columns,
547                dtypes=dtypes,
548                eager=True,
549            )
550        else:
551            return ExcelTable(
552                self._reader.load_table(
553                    name=name,
554                    header_row=header_row,
555                    column_names=column_names,
556                    skip_rows=skip_rows,
557                    n_rows=n_rows,
558                    schema_sample_rows=schema_sample_rows,
559                    dtype_coercion=dtype_coercion,
560                    use_columns=use_columns,
561                    dtypes=dtypes,
562                    eager=False,
563                )
564            )

Loads a table by name.

Parameters
  • name: The name of the table to load.
  • header_row: The index of the row containing the column labels. If None, the table's column names will be used. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None, it skips the number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F", which would result in A,B,C,D,E and A,C,E,F). Also supports open-ended ranges (e.g. "B:" to select all columns from B onwards) and from-beginning ranges (e.g. ":C" to select columns from A to C). These can be combined for "except" patterns (e.g. ":C,E:" to select everything except column D)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
  • eager: Specifies whether the table should be loaded eagerly. False (default) will load the table lazily using the PyCapsule interface, whereas True will load it eagerly via pyarrow.

          Eager loading requires the `pyarrow` extra to be installed.
    
def load_sheet_eager( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: Union[int, list[int], Callable[[int], bool], NoneType] = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: list[str] | list[int] | str | None = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> pyarrow.lib.RecordBatch:
566    def load_sheet_eager(
567        self,
568        idx_or_name: int | str,
569        *,
570        header_row: int | None = 0,
571        column_names: list[str] | None = None,
572        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
573        n_rows: int | None = None,
574        schema_sample_rows: int | None = 1_000,
575        dtype_coercion: Literal["coerce", "strict"] = "coerce",
576        use_columns: list[str] | list[int] | str | None = None,
577        dtypes: DType | DTypeMap | None = None,
578    ) -> "pa.RecordBatch":
579        """Loads a sheet eagerly by index or name.
580
581        For xlsx files, this will be faster and more memory-efficient, as it will use
582        `worksheet_range_ref` under the hood, which returns borrowed types.
583
584        Refer to `load_sheet` for parameter documentation
585
586        Requires the `pyarrow` extra to be installed.
587        """
588        return self._reader.load_sheet(
589            idx_or_name=idx_or_name,
590            header_row=header_row,
591            column_names=column_names,
592            skip_rows=skip_rows,
593            n_rows=n_rows,
594            schema_sample_rows=schema_sample_rows,
595            dtype_coercion=dtype_coercion,
596            use_columns=use_columns,
597            dtypes=dtypes,
598            eager=True,
599        )

Loads a sheet eagerly by index or name.

For xlsx files, this will be faster and more memory-efficient, as it will use worksheet_range_ref under the hood, which returns borrowed types.

Refer to load_sheet for parameter documentation

Requires the pyarrow extra to be installed.

def load_sheet_by_name( self, name: str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
601    def load_sheet_by_name(
602        self,
603        name: str,
604        *,
605        header_row: int | None = 0,
606        column_names: list[str] | None = None,
607        skip_rows: int | None = None,
608        n_rows: int | None = None,
609        schema_sample_rows: int | None = 1_000,
610        dtype_coercion: Literal["coerce", "strict"] = "coerce",
611        use_columns: list[str]
612        | list[int]
613        | str
614        | Callable[[ColumnInfoNoDtype], bool]
615        | None = None,
616        dtypes: DType | DTypeMap | None = None,
617    ) -> ExcelSheet:
618        """Loads a sheet by name.
619
620        Refer to `load_sheet` for parameter documentation
621        """
622        return self.load_sheet(
623            name,
624            header_row=header_row,
625            column_names=column_names,
626            skip_rows=skip_rows,
627            n_rows=n_rows,
628            schema_sample_rows=schema_sample_rows,
629            dtype_coercion=dtype_coercion,
630            use_columns=use_columns,
631            dtypes=dtypes,
632        )

Loads a sheet by name.

Refer to load_sheet for parameter documentation

def load_sheet_by_idx( self, idx: int, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
634    def load_sheet_by_idx(
635        self,
636        idx: int,
637        *,
638        header_row: int | None = 0,
639        column_names: list[str] | None = None,
640        skip_rows: int | None = None,
641        n_rows: int | None = None,
642        schema_sample_rows: int | None = 1_000,
643        dtype_coercion: Literal["coerce", "strict"] = "coerce",
644        use_columns: list[str]
645        | list[int]
646        | str
647        | Callable[[ColumnInfoNoDtype], bool]
648        | None = None,
649        dtypes: DType | DTypeMap | None = None,
650    ) -> ExcelSheet:
651        """Loads a sheet by index.
652
653        Refer to `load_sheet` for parameter documentation
654        """
655        return self.load_sheet(
656            idx,
657            header_row=header_row,
658            column_names=column_names,
659            skip_rows=skip_rows,
660            n_rows=n_rows,
661            schema_sample_rows=schema_sample_rows,
662            dtype_coercion=dtype_coercion,
663            use_columns=use_columns,
664            dtypes=dtypes,
665        )

Loads a sheet by index.

Refer to load_sheet for parameter documentation

class ExcelSheet:
 60class ExcelSheet:
 61    """A class representing a single sheet in an Excel File"""
 62
 63    def __init__(self, sheet: _ExcelSheet) -> None:
 64        self._sheet = sheet
 65
 66    @property
 67    def name(self) -> str:
 68        """The name of the sheet"""
 69        return self._sheet.name
 70
 71    @property
 72    def width(self) -> int:
 73        """The sheet's width"""
 74        return self._sheet.width
 75
 76    @property
 77    def height(self) -> int:
 78        """The sheet's height, with `skip_rows` and `nrows` applied"""
 79        return self._sheet.height
 80
 81    @property
 82    def total_height(self) -> int:
 83        """The sheet's total height"""
 84        return self._sheet.total_height
 85
 86    @property
 87    def selected_columns(self) -> list[ColumnInfo]:
 88        """The sheet's selected columns"""
 89        return self._sheet.selected_columns
 90
 91    def available_columns(self) -> list[ColumnInfo]:
 92        """The columns available for the given sheet"""
 93        return self._sheet.available_columns()
 94
 95    @property
 96    def specified_dtypes(self) -> DTypeMap | None:
 97        """The dtypes specified for the sheet"""
 98        return self._sheet.specified_dtypes
 99
100    @property
101    def visible(self) -> SheetVisible:
102        """The visibility of the sheet"""
103        return self._sheet.visible
104
105    def to_arrow(self) -> "pa.RecordBatch":
106        """Converts the sheet to a pyarrow `RecordBatch`
107
108        Requires the `pyarrow` extra to be installed.
109        """
110        if not _PYARROW_AVAILABLE:
111            raise ImportError(
112                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
113            )
114        return self._sheet.to_arrow()
115
116    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
117        """Converts the sheet to a pyarrow `RecordBatch` with error information.
118
119        Stores the positions of any values that cannot be parsed as the specified type and were
120        therefore converted to None.
121
122        Requires the `pyarrow` extra to be installed.
123        """
124        if not _PYARROW_AVAILABLE:
125            raise ImportError(
126                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
127            )
128        rb, cell_errors = self._sheet.to_arrow_with_errors()
129        if not cell_errors.errors:
130            return (rb, None)
131        return (rb, cell_errors)
132
133    def to_pandas(self) -> "pd.DataFrame":
134        """Converts the sheet to a Pandas `DataFrame`.
135
136        Requires the `pandas` extra to be installed.
137        """
138        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
139        # which we don't implement. Using pyarrow conversion for now.
140        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
141        return self.to_arrow().to_pandas()
142
143    def to_polars(self) -> "pl.DataFrame":
144        """Converts the sheet to a Polars `DataFrame`.
145
146        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
147        Requires the `polars` extra to be installed.
148        """
149        import polars as pl
150
151        return pl.DataFrame(self)
152
153    def __arrow_c_schema__(self) -> object:
154        """Export the schema as an `ArrowSchema` `PyCapsule`.
155
156        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
157
158        The Arrow PyCapsule Interface enables zero-copy data exchange with
159        Arrow-compatible libraries without requiring PyArrow as a dependency.
160        """
161        return self._sheet.__arrow_c_schema__()
162
163    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
164        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
165
166        The optional `requested_schema` parameter allows for potential schema conversion.
167
168        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
169
170        The Arrow PyCapsule Interface enables zero-copy data exchange with
171        Arrow-compatible libraries without requiring PyArrow as a dependency.
172        """
173        return self._sheet.__arrow_c_array__(requested_schema)
174
175    def __repr__(self) -> str:
176        return self._sheet.__repr__()

A class representing a single sheet in an Excel File

ExcelSheet(sheet: _ExcelSheet)
63    def __init__(self, sheet: _ExcelSheet) -> None:
64        self._sheet = sheet
name: str
66    @property
67    def name(self) -> str:
68        """The name of the sheet"""
69        return self._sheet.name

The name of the sheet

width: int
71    @property
72    def width(self) -> int:
73        """The sheet's width"""
74        return self._sheet.width

The sheet's width

height: int
76    @property
77    def height(self) -> int:
78        """The sheet's height, with `skip_rows` and `nrows` applied"""
79        return self._sheet.height

The sheet's height, with skip_rows and nrows applied

total_height: int
81    @property
82    def total_height(self) -> int:
83        """The sheet's total height"""
84        return self._sheet.total_height

The sheet's total height

selected_columns: list[ColumnInfo]
86    @property
87    def selected_columns(self) -> list[ColumnInfo]:
88        """The sheet's selected columns"""
89        return self._sheet.selected_columns

The sheet's selected columns

def available_columns(self) -> list[ColumnInfo]:
91    def available_columns(self) -> list[ColumnInfo]:
92        """The columns available for the given sheet"""
93        return self._sheet.available_columns()

The columns available for the given sheet

specified_dtypes: 'DTypeMap | None'
95    @property
96    def specified_dtypes(self) -> DTypeMap | None:
97        """The dtypes specified for the sheet"""
98        return self._sheet.specified_dtypes

The dtypes specified for the sheet

visible: Literal['visible', 'hidden', 'veryhidden']
100    @property
101    def visible(self) -> SheetVisible:
102        """The visibility of the sheet"""
103        return self._sheet.visible

The visibility of the sheet

def to_arrow(self) -> pyarrow.lib.RecordBatch:
105    def to_arrow(self) -> "pa.RecordBatch":
106        """Converts the sheet to a pyarrow `RecordBatch`
107
108        Requires the `pyarrow` extra to be installed.
109        """
110        if not _PYARROW_AVAILABLE:
111            raise ImportError(
112                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
113            )
114        return self._sheet.to_arrow()

Converts the sheet to a pyarrow RecordBatch

Requires the pyarrow extra to be installed.

def to_arrow_with_errors(self) -> tuple[pyarrow.lib.RecordBatch, CellErrors | None]:
116    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
117        """Converts the sheet to a pyarrow `RecordBatch` with error information.
118
119        Stores the positions of any values that cannot be parsed as the specified type and were
120        therefore converted to None.
121
122        Requires the `pyarrow` extra to be installed.
123        """
124        if not _PYARROW_AVAILABLE:
125            raise ImportError(
126                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
127            )
128        rb, cell_errors = self._sheet.to_arrow_with_errors()
129        if not cell_errors.errors:
130            return (rb, None)
131        return (rb, cell_errors)

Converts the sheet to a pyarrow RecordBatch with error information.

Stores the positions of any values that cannot be parsed as the specified type and were therefore converted to None.

Requires the pyarrow extra to be installed.

def to_pandas(self) -> pandas.core.frame.DataFrame:
133    def to_pandas(self) -> "pd.DataFrame":
134        """Converts the sheet to a Pandas `DataFrame`.
135
136        Requires the `pandas` extra to be installed.
137        """
138        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
139        # which we don't implement. Using pyarrow conversion for now.
140        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
141        return self.to_arrow().to_pandas()

Converts the sheet to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
143    def to_polars(self) -> "pl.DataFrame":
144        """Converts the sheet to a Polars `DataFrame`.
145
146        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
147        Requires the `polars` extra to be installed.
148        """
149        import polars as pl
150
151        return pl.DataFrame(self)

Converts the sheet to a Polars DataFrame.

Uses the Arrow PyCapsule Interface for zero-copy data exchange. Requires the polars extra to be installed.

class ExcelTable:
179class ExcelTable:
180    """A class representing a single table in an Excel file"""
181
182    def __init__(self, table: _ExcelTable) -> None:
183        self._table = table
184
185    @property
186    def name(self) -> str:
187        """The name of the table"""
188        return self._table.name
189
190    @property
191    def sheet_name(self) -> str:
192        """The name of the sheet this table belongs to"""
193        return self._table.sheet_name
194
195    @property
196    def width(self) -> int:
197        """The table's width"""
198        return self._table.width
199
200    @property
201    def height(self) -> int:
202        """The table's height"""
203        return self._table.height
204
205    @property
206    def total_height(self) -> int:
207        """The table's total height"""
208        return self._table.total_height
209
210    @property
211    def offset(self) -> int:
212        """The table's offset before data starts"""
213        return self._table.offset
214
215    @property
216    def selected_columns(self) -> list[ColumnInfo]:
217        """The table's selected columns"""
218        return self._table.selected_columns
219
220    def available_columns(self) -> list[ColumnInfo]:
221        """The columns available for the given table"""
222        return self._table.available_columns()
223
224    @property
225    def specified_dtypes(self) -> DTypeMap | None:
226        """The dtypes specified for the table"""
227        return self._table.specified_dtypes
228
229    def to_arrow(self) -> "pa.RecordBatch":
230        """Converts the table to a pyarrow `RecordBatch`
231
232        Requires the `pyarrow` extra to be installed.
233        """
234        if not _PYARROW_AVAILABLE:
235            raise ImportError(
236                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
237            )
238        return self._table.to_arrow()
239
240    def to_pandas(self) -> "pd.DataFrame":
241        """Converts the table to a Pandas `DataFrame`.
242
243        Requires the `pandas` extra to be installed.
244        """
245        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
246        # which we don't implement. Using pyarrow conversion for now.
247        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
248        return self.to_arrow().to_pandas()
249
250    def to_polars(self) -> "pl.DataFrame":
251        """Converts the table to a Polars `DataFrame`.
252
253        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
254        Requires the `polars` extra to be installed.
255        """
256        import polars as pl
257
258        return pl.DataFrame(self)
259
260    def __arrow_c_schema__(self) -> object:
261        """Export the schema as an `ArrowSchema` `PyCapsule`.
262
263        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
264
265        The Arrow PyCapsule Interface enables zero-copy data exchange with
266        Arrow-compatible libraries without requiring PyArrow as a dependency.
267        """
268        return self._table.__arrow_c_schema__()
269
270    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
271        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
272
273        The optional `requested_schema` parameter allows for potential schema conversion.
274
275        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
276
277        The Arrow PyCapsule Interface enables zero-copy data exchange with
278        Arrow-compatible libraries without requiring PyArrow as a dependency.
279        """
280        return self._table.__arrow_c_array__(requested_schema)

A class representing a single table in an Excel file

ExcelTable(table: _ExcelTable)
182    def __init__(self, table: _ExcelTable) -> None:
183        self._table = table
name: str
185    @property
186    def name(self) -> str:
187        """The name of the table"""
188        return self._table.name

The name of the table

sheet_name: str
190    @property
191    def sheet_name(self) -> str:
192        """The name of the sheet this table belongs to"""
193        return self._table.sheet_name

The name of the sheet this table belongs to

width: int
195    @property
196    def width(self) -> int:
197        """The table's width"""
198        return self._table.width

The table's width

height: int
200    @property
201    def height(self) -> int:
202        """The table's height"""
203        return self._table.height

The table's height

total_height: int
205    @property
206    def total_height(self) -> int:
207        """The table's total height"""
208        return self._table.total_height

The table's total height

offset: int
210    @property
211    def offset(self) -> int:
212        """The table's offset before data starts"""
213        return self._table.offset

The table's offset before data starts

selected_columns: list[ColumnInfo]
215    @property
216    def selected_columns(self) -> list[ColumnInfo]:
217        """The table's selected columns"""
218        return self._table.selected_columns

The table's selected columns

def available_columns(self) -> list[ColumnInfo]:
220    def available_columns(self) -> list[ColumnInfo]:
221        """The columns available for the given table"""
222        return self._table.available_columns()

The columns available for the given table

specified_dtypes: 'DTypeMap | None'
224    @property
225    def specified_dtypes(self) -> DTypeMap | None:
226        """The dtypes specified for the table"""
227        return self._table.specified_dtypes

The dtypes specified for the table

def to_arrow(self) -> pyarrow.lib.RecordBatch:
229    def to_arrow(self) -> "pa.RecordBatch":
230        """Converts the table to a pyarrow `RecordBatch`
231
232        Requires the `pyarrow` extra to be installed.
233        """
234        if not _PYARROW_AVAILABLE:
235            raise ImportError(
236                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
237            )
238        return self._table.to_arrow()

Converts the table to a pyarrow RecordBatch

Requires the pyarrow extra to be installed.

def to_pandas(self) -> pandas.core.frame.DataFrame:
240    def to_pandas(self) -> "pd.DataFrame":
241        """Converts the table to a Pandas `DataFrame`.
242
243        Requires the `pandas` extra to be installed.
244        """
245        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
246        # which we don't implement. Using pyarrow conversion for now.
247        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
248        return self.to_arrow().to_pandas()

Converts the table to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
250    def to_polars(self) -> "pl.DataFrame":
251        """Converts the table to a Polars `DataFrame`.
252
253        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
254        Requires the `polars` extra to be installed.
255        """
256        import polars as pl
257
258        return pl.DataFrame(self)

Converts the table to a Polars DataFrame.

Uses the Arrow PyCapsule Interface for zero-copy data exchange. Requires the polars extra to be installed.

DTypeFrom: TypeAlias = Literal['provided_for_all', 'provided_by_index', 'provided_by_name', 'guessed']
ColumnNameFrom: TypeAlias = Literal['provided', 'looked_up', 'generated']
class ColumnInfo:

Metadata about a single column in a sheet.

index

int. The index of the column

name

str. The name of the column

dtype_from

fastexcel.DTypeFrom. How the dtype of the column was determined.

One of three possible values:

  • "provided_by_index": The dtype was specified via the column index
  • "provided_by_name": The dtype was specified via the column name
  • "guessed": The dtype was determined from the content of the column
dtype

fastexcel.DType. The dtype of the column

column_name_from

fastexcel.ColumnNameFrom. How the name of the column was determined.

One of three possible values:

  • "provided": The column name was provided via the use_columns parameter
  • "looked_up": The column name was looked up from the data found in the sheet
  • "generated": The column name was generated from the column index, either because header_row was None, or because it could not be looked up
class DefinedName:
name
formula
class CellError:
row_offset

int. The row offset

position

(int, int). The original row and column of the error

offset_position
detail

str. The error message

class CellErrors:
errors
class FastExcelError(builtins.Exception):

The base class for all fastexcel errors

class CannotRetrieveCellDataError(fastexcel.FastExcelError):

Data for a given cell cannot be retrieved

class CalamineCellError(fastexcel.FastExcelError):

calamine returned an error regarding the content of the cell

class CalamineError(fastexcel.FastExcelError):

Generic calamine error

class SheetNotFoundError(fastexcel.FastExcelError):

Sheet was not found

class ColumnNotFoundError(fastexcel.FastExcelError):

Column was not found

class ArrowError(fastexcel.FastExcelError):

Generic arrow error

class InvalidParametersError(fastexcel.FastExcelError):

Provided parameters are invalid

class UnsupportedColumnTypeCombinationError(fastexcel.FastExcelError):

Column contains an unsupported type combination