fastexcel

  1from __future__ import annotations
  2
  3import sys
  4import typing
  5from typing import TYPE_CHECKING, Callable, Literal
  6
  7if sys.version_info < (3, 10):
  8    from typing_extensions import TypeAlias
  9else:
 10    from typing import TypeAlias
 11
 12if TYPE_CHECKING:
 13    import pandas as pd
 14    import polars as pl
 15    import pyarrow as pa
 16
 17from os.path import expanduser
 18from pathlib import Path
 19
 20try:
 21    import importlib.util
 22
 23    importlib.util.find_spec("pyarrow")
 24    _PYARROW_AVAILABLE = True
 25except ImportError:
 26    _PYARROW_AVAILABLE = False
 27
 28from ._fastexcel import (
 29    ArrowError,
 30    CalamineCellError,
 31    CalamineError,
 32    CannotRetrieveCellDataError,
 33    CellError,
 34    CellErrors,
 35    ColumnInfo,
 36    ColumnInfoNoDtype,
 37    ColumnNotFoundError,
 38    FastExcelError,
 39    InvalidParametersError,
 40    SheetNotFoundError,
 41    UnsupportedColumnTypeCombinationError,
 42    __version__,
 43    _ExcelReader,
 44    _ExcelSheet,
 45    _ExcelTable,
 46)
 47from ._fastexcel import read_excel as _read_excel
 48
 49DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
 50DTypeMap: TypeAlias = "dict[str | int, DType]"
 51ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
 52DTypeFrom: TypeAlias = Literal[
 53    "provided_for_all", "provided_by_index", "provided_by_name", "guessed"
 54]
 55SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
 56
 57
 58class ExcelSheet:
 59    """A class representing a single sheet in an Excel File"""
 60
 61    def __init__(self, sheet: _ExcelSheet) -> None:
 62        self._sheet = sheet
 63
 64    @property
 65    def name(self) -> str:
 66        """The name of the sheet"""
 67        return self._sheet.name
 68
 69    @property
 70    def width(self) -> int:
 71        """The sheet's width"""
 72        return self._sheet.width
 73
 74    @property
 75    def height(self) -> int:
 76        """The sheet's height, with `skip_rows` and `nrows` applied"""
 77        return self._sheet.height
 78
 79    @property
 80    def total_height(self) -> int:
 81        """The sheet's total height"""
 82        return self._sheet.total_height
 83
 84    @property
 85    def selected_columns(self) -> list[ColumnInfo]:
 86        """The sheet's selected columns"""
 87        return self._sheet.selected_columns
 88
 89    def available_columns(self) -> list[ColumnInfo]:
 90        """The columns available for the given sheet"""
 91        return self._sheet.available_columns()
 92
 93    @property
 94    def specified_dtypes(self) -> DTypeMap | None:
 95        """The dtypes specified for the sheet"""
 96        return self._sheet.specified_dtypes
 97
 98    @property
 99    def visible(self) -> SheetVisible:
100        """The visibility of the sheet"""
101        return self._sheet.visible
102
103    def to_arrow(self) -> "pa.RecordBatch":
104        """Converts the sheet to a pyarrow `RecordBatch`"""
105        if not _PYARROW_AVAILABLE:
106            raise ImportError(
107                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
108            )
109        return self._sheet.to_arrow()
110
111    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
112        """Converts the sheet to a pyarrow `RecordBatch` with error information.
113
114        Stores the positions of any values that cannot be parsed as the specified type and were
115        therefore converted to None.
116        """
117        if not _PYARROW_AVAILABLE:
118            raise ImportError(
119                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
120            )
121        rb, cell_errors = self._sheet.to_arrow_with_errors()
122        if not cell_errors.errors:
123            return (rb, None)
124        return (rb, cell_errors)
125
126    def to_pandas(self) -> "pd.DataFrame":
127        """Converts the sheet to a Pandas `DataFrame`.
128
129        Requires the `pandas` extra to be installed.
130        """
131        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
132        # which we don't implement. Using pyarrow conversion for now.
133        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
134        return self.to_arrow().to_pandas()
135
136    def to_polars(self) -> "pl.DataFrame":
137        """Converts the sheet to a Polars `DataFrame`.
138
139        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
140        Requires the `polars` extra to be installed.
141        """
142        import polars as pl
143
144        return pl.DataFrame(self)
145
146    def __arrow_c_schema__(self) -> object:
147        """Export the schema as an `ArrowSchema` `PyCapsule`.
148
149        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
150
151        The Arrow PyCapsule Interface enables zero-copy data exchange with
152        Arrow-compatible libraries without requiring PyArrow as a dependency.
153        """
154        return self._sheet.__arrow_c_schema__()
155
156    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
157        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
158
159        The optional `requested_schema` parameter allows for potential schema conversion.
160
161        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
162
163        The Arrow PyCapsule Interface enables zero-copy data exchange with
164        Arrow-compatible libraries without requiring PyArrow as a dependency.
165        """
166        return self._sheet.__arrow_c_array__(requested_schema)
167
168    def __repr__(self) -> str:
169        return self._sheet.__repr__()
170
171
172class ExcelTable:
173    """A class representing a single table in an Excel file"""
174
175    def __init__(self, table: _ExcelTable) -> None:
176        self._table = table
177
178    @property
179    def name(self) -> str:
180        """The name of the table"""
181        return self._table.name
182
183    @property
184    def sheet_name(self) -> str:
185        """The name of the sheet this table belongs to"""
186        return self._table.sheet_name
187
188    @property
189    def width(self) -> int:
190        """The table's width"""
191        return self._table.width
192
193    @property
194    def height(self) -> int:
195        """The table's height"""
196        return self._table.height
197
198    @property
199    def total_height(self) -> int:
200        """The table's total height"""
201        return self._table.total_height
202
203    @property
204    def offset(self) -> int:
205        """The table's offset before data starts"""
206        return self._table.offset
207
208    @property
209    def selected_columns(self) -> list[ColumnInfo]:
210        """The table's selected columns"""
211        return self._table.selected_columns
212
213    def available_columns(self) -> list[ColumnInfo]:
214        """The columns available for the given table"""
215        return self._table.available_columns()
216
217    @property
218    def specified_dtypes(self) -> DTypeMap | None:
219        """The dtypes specified for the table"""
220        return self._table.specified_dtypes
221
222    def to_arrow(self) -> "pa.RecordBatch":
223        """Converts the table to a pyarrow `RecordBatch`"""
224        if not _PYARROW_AVAILABLE:
225            raise ImportError(
226                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
227            )
228        return self._table.to_arrow()
229
230    def to_pandas(self) -> "pd.DataFrame":
231        """Converts the table to a Pandas `DataFrame`.
232
233        Requires the `pandas` extra to be installed.
234        """
235        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
236        # which we don't implement. Using pyarrow conversion for now.
237        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
238        return self.to_arrow().to_pandas()
239
240    def to_polars(self) -> "pl.DataFrame":
241        """Converts the table to a Polars `DataFrame`.
242
243        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
244        Requires the `polars` extra to be installed.
245        """
246        import polars as pl
247
248        return pl.DataFrame(self)
249
250    def __arrow_c_schema__(self) -> object:
251        """Export the schema as an `ArrowSchema` `PyCapsule`.
252
253        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
254
255        The Arrow PyCapsule Interface enables zero-copy data exchange with
256        Arrow-compatible libraries without requiring PyArrow as a dependency.
257        """
258        return self._table.__arrow_c_schema__()
259
260    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
261        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
262
263        The optional `requested_schema` parameter allows for potential schema conversion.
264
265        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
266
267        The Arrow PyCapsule Interface enables zero-copy data exchange with
268        Arrow-compatible libraries without requiring PyArrow as a dependency.
269        """
270        return self._table.__arrow_c_array__(requested_schema)
271
272
273class ExcelReader:
274    """A class representing an open Excel file and allowing to read its sheets"""
275
276    def __init__(self, reader: _ExcelReader) -> None:
277        self._reader = reader
278
279    @property
280    def sheet_names(self) -> list[str]:
281        """The list of sheet names"""
282        return self._reader.sheet_names
283
284    def load_sheet(
285        self,
286        idx_or_name: int | str,
287        *,
288        header_row: int | None = 0,
289        column_names: list[str] | None = None,
290        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
291        n_rows: int | None = None,
292        schema_sample_rows: int | None = 1_000,
293        dtype_coercion: Literal["coerce", "strict"] = "coerce",
294        use_columns: list[str]
295        | list[int]
296        | str
297        | Callable[[ColumnInfoNoDtype], bool]
298        | None = None,
299        dtypes: DType | DTypeMap | None = None,
300    ) -> ExcelSheet:
301        """Loads a sheet lazily by index or name.
302
303        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
304        :param header_row: The index of the row containing the column labels, default index is 0.
305                           If `None`, the sheet does not have any column labels.
306                           Any rows before the `header_row` will be automatically skipped.
307        :param column_names: Overrides headers found in the document.
308                             If `column_names` is used, `header_row` will be ignored.
309        :param n_rows: Specifies how many rows should be loaded.
310                       If `None`, all rows are loaded
311        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
312                          Any rows before the `header_row` are automatically skipped.
313                          It means row indices are relative to data rows, not the sheet!
314                          Can be one of:
315                          - `int`: Skip this many rows after the header row
316                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
317                          - `Callable[[int], bool]`: Function that receives row index (0-based
318                          relative to data rows) and returns True to skip the row
319                          - `None`: If `header_row` is None, skips empty rows at beginning
320        :param schema_sample_rows: Specifies how many rows should be used to determine
321                                   the dtype of a column. Cannot be 0. A specific dtype can be
322                                   enforced for some or all columns through the `dtypes` parameter.
323                                   If `None`, all rows will be used.
324        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
325                               will try to coerce different dtypes in a column to the same one,
326                               whereas `strict` will raise an error in case a column contains
327                               several dtypes. Note that this only applies to columns whose dtype
328                               is guessed, i.e. not specified via `dtypes`.
329        :param use_columns: Specifies the columns to use. Can either be:
330                            - `None` to select all columns
331                            - A list of strings and ints, the column names and/or indices
332                              (starting at 0)
333                            - A string, a comma separated list of Excel column letters and column
334                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
335                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
336                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
337                              ranges (e.g. `":C"` to select columns from A to C). These can be
338                              combined for "except" patterns (e.g. `":C,E:"` to select everything
339                              except column D)
340                            - A callable, a function that takes a column and returns a boolean
341                              indicating whether the column should be used
342        :param dtypes: An optional dtype (for all columns)
343                       or dict of dtypes with keys as column indices or names.
344        """
345        return ExcelSheet(
346            self._reader.load_sheet(
347                idx_or_name=idx_or_name,
348                header_row=header_row,
349                column_names=column_names,
350                skip_rows=skip_rows,
351                n_rows=n_rows,
352                schema_sample_rows=schema_sample_rows,
353                dtype_coercion=dtype_coercion,
354                use_columns=use_columns,
355                dtypes=dtypes,
356                eager=False,
357            )
358        )
359
360    def table_names(self, sheet_name: str | None = None) -> list[str]:
361        """The list of table names.
362
363        Will return an empty list if no tables are found.
364
365        :param sheet_name: If given, will limit the list to the given sheet, will be faster
366        too.
367        """
368        return self._reader.table_names(sheet_name)
369
370    @typing.overload
371    def load_table(
372        self,
373        name: str,
374        *,
375        header_row: int | None = None,
376        column_names: list[str] | None = None,
377        skip_rows: int | None = None,
378        n_rows: int | None = None,
379        schema_sample_rows: int | None = 1_000,
380        dtype_coercion: Literal["coerce", "strict"] = "coerce",
381        use_columns: list[str]
382        | list[int]
383        | str
384        | Callable[[ColumnInfoNoDtype], bool]
385        | None = None,
386        dtypes: DType | DTypeMap | None = None,
387        eager: Literal[False] = ...,
388    ) -> ExcelTable: ...
389
390    @typing.overload
391    def load_table(
392        self,
393        name: str,
394        *,
395        header_row: int | None = None,
396        column_names: list[str] | None = None,
397        skip_rows: int | None = None,
398        n_rows: int | None = None,
399        schema_sample_rows: int | None = 1_000,
400        dtype_coercion: Literal["coerce", "strict"] = "coerce",
401        use_columns: list[str]
402        | list[int]
403        | str
404        | Callable[[ColumnInfoNoDtype], bool]
405        | None = None,
406        dtypes: DType | DTypeMap | None = None,
407        eager: Literal[True] = ...,
408    ) -> "pa.RecordBatch": ...
409
410    def load_table(
411        self,
412        name: str,
413        *,
414        header_row: int | None = None,
415        column_names: list[str] | None = None,
416        skip_rows: int | None = None,
417        n_rows: int | None = None,
418        schema_sample_rows: int | None = 1_000,
419        dtype_coercion: Literal["coerce", "strict"] = "coerce",
420        use_columns: list[str]
421        | list[int]
422        | str
423        | Callable[[ColumnInfoNoDtype], bool]
424        | None = None,
425        dtypes: DType | DTypeMap | None = None,
426        eager: bool = False,
427    ) -> "ExcelTable | pa.RecordBatch":
428        """Loads a table by name.
429
430        :param name: The name of the table to load.
431        :param header_row: The index of the row containing the column labels.
432                           If `None`, the table's column names will be used.
433                           Any rows before the `header_row` will be automatically skipped.
434        :param column_names: Overrides headers found in the document.
435                             If `column_names` is used, `header_row` will be ignored.
436        :param n_rows: Specifies how many rows should be loaded.
437                       If `None`, all rows are loaded
438        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
439                          Any rows before the `header_row` are automatically skipped.
440                          If `header_row` is `None`, it skips the number of rows from the
441                          start of the sheet.
442        :param schema_sample_rows: Specifies how many rows should be used to determine
443                                   the dtype of a column. Cannot be 0. A specific dtype can be
444                                   enforced for some or all columns through the `dtypes` parameter.
445                                   If `None`, all rows will be used.
446        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
447                               will try to coerce different dtypes in a column to the same one,
448                               whereas `strict` will raise an error in case a column contains
449                               several dtypes. Note that this only applies to columns whose dtype
450                               is guessed, i.e. not specified via `dtypes`.
451        :param use_columns: Specifies the columns to use. Can either be:
452                            - `None` to select all columns
453                            - A list of strings and ints, the column names and/or indices
454                              (starting at 0)
455                            - A string, a comma separated list of Excel column letters and column
456                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
457                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
458                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
459                              ranges (e.g. `":C"` to select columns from A to C). These can be
460                              combined for "except" patterns (e.g. `":C,E:"` to select everything
461                              except column D)
462                            - A callable, a function that takes a column and returns a boolean
463                              indicating whether the column should be used
464        :param dtypes: An optional dtype (for all columns)
465                       or dict of dtypes with keys as column indices or names.
466        """
467        if eager:
468            return self._reader.load_table(
469                name=name,
470                header_row=header_row,
471                column_names=column_names,
472                skip_rows=skip_rows,
473                n_rows=n_rows,
474                schema_sample_rows=schema_sample_rows,
475                dtype_coercion=dtype_coercion,
476                use_columns=use_columns,
477                dtypes=dtypes,
478                eager=True,
479            )
480        else:
481            return ExcelTable(
482                self._reader.load_table(
483                    name=name,
484                    header_row=header_row,
485                    column_names=column_names,
486                    skip_rows=skip_rows,
487                    n_rows=n_rows,
488                    schema_sample_rows=schema_sample_rows,
489                    dtype_coercion=dtype_coercion,
490                    use_columns=use_columns,
491                    dtypes=dtypes,
492                    eager=False,
493                )
494            )
495
496    def load_sheet_eager(
497        self,
498        idx_or_name: int | str,
499        *,
500        header_row: int | None = 0,
501        column_names: list[str] | None = None,
502        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
503        n_rows: int | None = None,
504        schema_sample_rows: int | None = 1_000,
505        dtype_coercion: Literal["coerce", "strict"] = "coerce",
506        use_columns: list[str] | list[int] | str | None = None,
507        dtypes: DType | DTypeMap | None = None,
508    ) -> "pa.RecordBatch":
509        """Loads a sheet eagerly by index or name.
510
511        For xlsx files, this will be faster and more memory-efficient, as it will use
512        `worksheet_range_ref` under the hood, which returns borrowed types.
513
514        Refer to `load_sheet` for parameter documentation
515        """
516        return self._reader.load_sheet(
517            idx_or_name=idx_or_name,
518            header_row=header_row,
519            column_names=column_names,
520            skip_rows=skip_rows,
521            n_rows=n_rows,
522            schema_sample_rows=schema_sample_rows,
523            dtype_coercion=dtype_coercion,
524            use_columns=use_columns,
525            dtypes=dtypes,
526            eager=True,
527        )
528
529    def load_sheet_by_name(
530        self,
531        name: str,
532        *,
533        header_row: int | None = 0,
534        column_names: list[str] | None = None,
535        skip_rows: int | None = None,
536        n_rows: int | None = None,
537        schema_sample_rows: int | None = 1_000,
538        dtype_coercion: Literal["coerce", "strict"] = "coerce",
539        use_columns: list[str]
540        | list[int]
541        | str
542        | Callable[[ColumnInfoNoDtype], bool]
543        | None = None,
544        dtypes: DType | DTypeMap | None = None,
545    ) -> ExcelSheet:
546        """Loads a sheet by name.
547
548        Refer to `load_sheet` for parameter documentation
549        """
550        return self.load_sheet(
551            name,
552            header_row=header_row,
553            column_names=column_names,
554            skip_rows=skip_rows,
555            n_rows=n_rows,
556            schema_sample_rows=schema_sample_rows,
557            dtype_coercion=dtype_coercion,
558            use_columns=use_columns,
559            dtypes=dtypes,
560        )
561
562    def load_sheet_by_idx(
563        self,
564        idx: int,
565        *,
566        header_row: int | None = 0,
567        column_names: list[str] | None = None,
568        skip_rows: int | None = None,
569        n_rows: int | None = None,
570        schema_sample_rows: int | None = 1_000,
571        dtype_coercion: Literal["coerce", "strict"] = "coerce",
572        use_columns: list[str]
573        | list[int]
574        | str
575        | Callable[[ColumnInfoNoDtype], bool]
576        | None = None,
577        dtypes: DType | DTypeMap | None = None,
578    ) -> ExcelSheet:
579        """Loads a sheet by index.
580
581        Refer to `load_sheet` for parameter documentation
582        """
583        return self.load_sheet(
584            idx,
585            header_row=header_row,
586            column_names=column_names,
587            skip_rows=skip_rows,
588            n_rows=n_rows,
589            schema_sample_rows=schema_sample_rows,
590            dtype_coercion=dtype_coercion,
591            use_columns=use_columns,
592            dtypes=dtypes,
593        )
594
595    def __repr__(self) -> str:
596        return self._reader.__repr__()
597
598
599def read_excel(source: Path | str | bytes) -> ExcelReader:
600    """Opens and loads an excel file.
601
602    :param source: The path to a file or its content as bytes
603    """
604    if isinstance(source, (str, Path)):
605        source = expanduser(source)
606    return ExcelReader(_read_excel(source))
607
608
609__all__ = (
610    # version
611    "__version__",
612    # main entrypoint
613    "read_excel",
614    # Python types
615    "DType",
616    "DTypeMap",
617    # Excel reader
618    "ExcelReader",
619    # Excel sheet
620    "ExcelSheet",
621    # Column metadata
622    "DTypeFrom",
623    "ColumnNameFrom",
624    "ColumnInfo",
625    # Parse error information
626    "CellError",
627    "CellErrors",
628    # Exceptions
629    "FastExcelError",
630    "CannotRetrieveCellDataError",
631    "CalamineCellError",
632    "CalamineError",
633    "SheetNotFoundError",
634    "ColumnNotFoundError",
635    "ArrowError",
636    "InvalidParametersError",
637    "UnsupportedColumnTypeCombinationError",
638)
__version__ = '0.15.1'
def read_excel(source: pathlib.Path | str | bytes) -> ExcelReader:
600def read_excel(source: Path | str | bytes) -> ExcelReader:
601    """Opens and loads an excel file.
602
603    :param source: The path to a file or its content as bytes
604    """
605    if isinstance(source, (str, Path)):
606        source = expanduser(source)
607    return ExcelReader(_read_excel(source))

Opens and loads an excel file.

Parameters
  • source: The path to a file or its content as bytes
DType = typing.Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']
DTypeMap: TypeAlias = 'dict[str | int, DType]'
class ExcelReader:
274class ExcelReader:
275    """A class representing an open Excel file and allowing to read its sheets"""
276
277    def __init__(self, reader: _ExcelReader) -> None:
278        self._reader = reader
279
280    @property
281    def sheet_names(self) -> list[str]:
282        """The list of sheet names"""
283        return self._reader.sheet_names
284
285    def load_sheet(
286        self,
287        idx_or_name: int | str,
288        *,
289        header_row: int | None = 0,
290        column_names: list[str] | None = None,
291        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
292        n_rows: int | None = None,
293        schema_sample_rows: int | None = 1_000,
294        dtype_coercion: Literal["coerce", "strict"] = "coerce",
295        use_columns: list[str]
296        | list[int]
297        | str
298        | Callable[[ColumnInfoNoDtype], bool]
299        | None = None,
300        dtypes: DType | DTypeMap | None = None,
301    ) -> ExcelSheet:
302        """Loads a sheet lazily by index or name.
303
304        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
305        :param header_row: The index of the row containing the column labels, default index is 0.
306                           If `None`, the sheet does not have any column labels.
307                           Any rows before the `header_row` will be automatically skipped.
308        :param column_names: Overrides headers found in the document.
309                             If `column_names` is used, `header_row` will be ignored.
310        :param n_rows: Specifies how many rows should be loaded.
311                       If `None`, all rows are loaded
312        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
313                          Any rows before the `header_row` are automatically skipped.
314                          It means row indices are relative to data rows, not the sheet!
315                          Can be one of:
316                          - `int`: Skip this many rows after the header row
317                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
318                          - `Callable[[int], bool]`: Function that receives row index (0-based
319                          relative to data rows) and returns True to skip the row
320                          - `None`: If `header_row` is None, skips empty rows at beginning
321        :param schema_sample_rows: Specifies how many rows should be used to determine
322                                   the dtype of a column. Cannot be 0. A specific dtype can be
323                                   enforced for some or all columns through the `dtypes` parameter.
324                                   If `None`, all rows will be used.
325        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
326                               will try to coerce different dtypes in a column to the same one,
327                               whereas `strict` will raise an error in case a column contains
328                               several dtypes. Note that this only applies to columns whose dtype
329                               is guessed, i.e. not specified via `dtypes`.
330        :param use_columns: Specifies the columns to use. Can either be:
331                            - `None` to select all columns
332                            - A list of strings and ints, the column names and/or indices
333                              (starting at 0)
334                            - A string, a comma separated list of Excel column letters and column
335                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
336                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
337                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
338                              ranges (e.g. `":C"` to select columns from A to C). These can be
339                              combined for "except" patterns (e.g. `":C,E:"` to select everything
340                              except column D)
341                            - A callable, a function that takes a column and returns a boolean
342                              indicating whether the column should be used
343        :param dtypes: An optional dtype (for all columns)
344                       or dict of dtypes with keys as column indices or names.
345        """
346        return ExcelSheet(
347            self._reader.load_sheet(
348                idx_or_name=idx_or_name,
349                header_row=header_row,
350                column_names=column_names,
351                skip_rows=skip_rows,
352                n_rows=n_rows,
353                schema_sample_rows=schema_sample_rows,
354                dtype_coercion=dtype_coercion,
355                use_columns=use_columns,
356                dtypes=dtypes,
357                eager=False,
358            )
359        )
360
361    def table_names(self, sheet_name: str | None = None) -> list[str]:
362        """The list of table names.
363
364        Will return an empty list if no tables are found.
365
366        :param sheet_name: If given, will limit the list to the given sheet, will be faster
367        too.
368        """
369        return self._reader.table_names(sheet_name)
370
371    @typing.overload
372    def load_table(
373        self,
374        name: str,
375        *,
376        header_row: int | None = None,
377        column_names: list[str] | None = None,
378        skip_rows: int | None = None,
379        n_rows: int | None = None,
380        schema_sample_rows: int | None = 1_000,
381        dtype_coercion: Literal["coerce", "strict"] = "coerce",
382        use_columns: list[str]
383        | list[int]
384        | str
385        | Callable[[ColumnInfoNoDtype], bool]
386        | None = None,
387        dtypes: DType | DTypeMap | None = None,
388        eager: Literal[False] = ...,
389    ) -> ExcelTable: ...
390
391    @typing.overload
392    def load_table(
393        self,
394        name: str,
395        *,
396        header_row: int | None = None,
397        column_names: list[str] | None = None,
398        skip_rows: int | None = None,
399        n_rows: int | None = None,
400        schema_sample_rows: int | None = 1_000,
401        dtype_coercion: Literal["coerce", "strict"] = "coerce",
402        use_columns: list[str]
403        | list[int]
404        | str
405        | Callable[[ColumnInfoNoDtype], bool]
406        | None = None,
407        dtypes: DType | DTypeMap | None = None,
408        eager: Literal[True] = ...,
409    ) -> "pa.RecordBatch": ...
410
411    def load_table(
412        self,
413        name: str,
414        *,
415        header_row: int | None = None,
416        column_names: list[str] | None = None,
417        skip_rows: int | None = None,
418        n_rows: int | None = None,
419        schema_sample_rows: int | None = 1_000,
420        dtype_coercion: Literal["coerce", "strict"] = "coerce",
421        use_columns: list[str]
422        | list[int]
423        | str
424        | Callable[[ColumnInfoNoDtype], bool]
425        | None = None,
426        dtypes: DType | DTypeMap | None = None,
427        eager: bool = False,
428    ) -> "ExcelTable | pa.RecordBatch":
429        """Loads a table by name.
430
431        :param name: The name of the table to load.
432        :param header_row: The index of the row containing the column labels.
433                           If `None`, the table's column names will be used.
434                           Any rows before the `header_row` will be automatically skipped.
435        :param column_names: Overrides headers found in the document.
436                             If `column_names` is used, `header_row` will be ignored.
437        :param n_rows: Specifies how many rows should be loaded.
438                       If `None`, all rows are loaded
439        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
440                          Any rows before the `header_row` are automatically skipped.
441                          If `header_row` is `None`, it skips the number of rows from the
442                          start of the sheet.
443        :param schema_sample_rows: Specifies how many rows should be used to determine
444                                   the dtype of a column. Cannot be 0. A specific dtype can be
445                                   enforced for some or all columns through the `dtypes` parameter.
446                                   If `None`, all rows will be used.
447        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
448                               will try to coerce different dtypes in a column to the same one,
449                               whereas `strict` will raise an error in case a column contains
450                               several dtypes. Note that this only applies to columns whose dtype
451                               is guessed, i.e. not specified via `dtypes`.
452        :param use_columns: Specifies the columns to use. Can either be:
453                            - `None` to select all columns
454                            - A list of strings and ints, the column names and/or indices
455                              (starting at 0)
456                            - A string, a comma separated list of Excel column letters and column
457                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
458                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
459                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
460                              ranges (e.g. `":C"` to select columns from A to C). These can be
461                              combined for "except" patterns (e.g. `":C,E:"` to select everything
462                              except column D)
463                            - A callable, a function that takes a column and returns a boolean
464                              indicating whether the column should be used
465        :param dtypes: An optional dtype (for all columns)
466                       or dict of dtypes with keys as column indices or names.
467        """
468        if eager:
469            return self._reader.load_table(
470                name=name,
471                header_row=header_row,
472                column_names=column_names,
473                skip_rows=skip_rows,
474                n_rows=n_rows,
475                schema_sample_rows=schema_sample_rows,
476                dtype_coercion=dtype_coercion,
477                use_columns=use_columns,
478                dtypes=dtypes,
479                eager=True,
480            )
481        else:
482            return ExcelTable(
483                self._reader.load_table(
484                    name=name,
485                    header_row=header_row,
486                    column_names=column_names,
487                    skip_rows=skip_rows,
488                    n_rows=n_rows,
489                    schema_sample_rows=schema_sample_rows,
490                    dtype_coercion=dtype_coercion,
491                    use_columns=use_columns,
492                    dtypes=dtypes,
493                    eager=False,
494                )
495            )
496
497    def load_sheet_eager(
498        self,
499        idx_or_name: int | str,
500        *,
501        header_row: int | None = 0,
502        column_names: list[str] | None = None,
503        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
504        n_rows: int | None = None,
505        schema_sample_rows: int | None = 1_000,
506        dtype_coercion: Literal["coerce", "strict"] = "coerce",
507        use_columns: list[str] | list[int] | str | None = None,
508        dtypes: DType | DTypeMap | None = None,
509    ) -> "pa.RecordBatch":
510        """Loads a sheet eagerly by index or name.
511
512        For xlsx files, this will be faster and more memory-efficient, as it will use
513        `worksheet_range_ref` under the hood, which returns borrowed types.
514
515        Refer to `load_sheet` for parameter documentation
516        """
517        return self._reader.load_sheet(
518            idx_or_name=idx_or_name,
519            header_row=header_row,
520            column_names=column_names,
521            skip_rows=skip_rows,
522            n_rows=n_rows,
523            schema_sample_rows=schema_sample_rows,
524            dtype_coercion=dtype_coercion,
525            use_columns=use_columns,
526            dtypes=dtypes,
527            eager=True,
528        )
529
530    def load_sheet_by_name(
531        self,
532        name: str,
533        *,
534        header_row: int | None = 0,
535        column_names: list[str] | None = None,
536        skip_rows: int | None = None,
537        n_rows: int | None = None,
538        schema_sample_rows: int | None = 1_000,
539        dtype_coercion: Literal["coerce", "strict"] = "coerce",
540        use_columns: list[str]
541        | list[int]
542        | str
543        | Callable[[ColumnInfoNoDtype], bool]
544        | None = None,
545        dtypes: DType | DTypeMap | None = None,
546    ) -> ExcelSheet:
547        """Loads a sheet by name.
548
549        Refer to `load_sheet` for parameter documentation
550        """
551        return self.load_sheet(
552            name,
553            header_row=header_row,
554            column_names=column_names,
555            skip_rows=skip_rows,
556            n_rows=n_rows,
557            schema_sample_rows=schema_sample_rows,
558            dtype_coercion=dtype_coercion,
559            use_columns=use_columns,
560            dtypes=dtypes,
561        )
562
563    def load_sheet_by_idx(
564        self,
565        idx: int,
566        *,
567        header_row: int | None = 0,
568        column_names: list[str] | None = None,
569        skip_rows: int | None = None,
570        n_rows: int | None = None,
571        schema_sample_rows: int | None = 1_000,
572        dtype_coercion: Literal["coerce", "strict"] = "coerce",
573        use_columns: list[str]
574        | list[int]
575        | str
576        | Callable[[ColumnInfoNoDtype], bool]
577        | None = None,
578        dtypes: DType | DTypeMap | None = None,
579    ) -> ExcelSheet:
580        """Loads a sheet by index.
581
582        Refer to `load_sheet` for parameter documentation
583        """
584        return self.load_sheet(
585            idx,
586            header_row=header_row,
587            column_names=column_names,
588            skip_rows=skip_rows,
589            n_rows=n_rows,
590            schema_sample_rows=schema_sample_rows,
591            dtype_coercion=dtype_coercion,
592            use_columns=use_columns,
593            dtypes=dtypes,
594        )
595
596    def __repr__(self) -> str:
597        return self._reader.__repr__()

A class representing an open Excel file and allowing to read its sheets

ExcelReader(reader: _ExcelReader)
277    def __init__(self, reader: _ExcelReader) -> None:
278        self._reader = reader
sheet_names: list[str]
280    @property
281    def sheet_names(self) -> list[str]:
282        """The list of sheet names"""
283        return self._reader.sheet_names

The list of sheet names

def load_sheet( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: Union[int, list[int], Callable[[int], bool], NoneType] = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
285    def load_sheet(
286        self,
287        idx_or_name: int | str,
288        *,
289        header_row: int | None = 0,
290        column_names: list[str] | None = None,
291        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
292        n_rows: int | None = None,
293        schema_sample_rows: int | None = 1_000,
294        dtype_coercion: Literal["coerce", "strict"] = "coerce",
295        use_columns: list[str]
296        | list[int]
297        | str
298        | Callable[[ColumnInfoNoDtype], bool]
299        | None = None,
300        dtypes: DType | DTypeMap | None = None,
301    ) -> ExcelSheet:
302        """Loads a sheet lazily by index or name.
303
304        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
305        :param header_row: The index of the row containing the column labels, default index is 0.
306                           If `None`, the sheet does not have any column labels.
307                           Any rows before the `header_row` will be automatically skipped.
308        :param column_names: Overrides headers found in the document.
309                             If `column_names` is used, `header_row` will be ignored.
310        :param n_rows: Specifies how many rows should be loaded.
311                       If `None`, all rows are loaded
312        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
313                          Any rows before the `header_row` are automatically skipped.
314                          It means row indices are relative to data rows, not the sheet!
315                          Can be one of:
316                          - `int`: Skip this many rows after the header row
317                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
318                          - `Callable[[int], bool]`: Function that receives row index (0-based
319                          relative to data rows) and returns True to skip the row
320                          - `None`: If `header_row` is None, skips empty rows at beginning
321        :param schema_sample_rows: Specifies how many rows should be used to determine
322                                   the dtype of a column. Cannot be 0. A specific dtype can be
323                                   enforced for some or all columns through the `dtypes` parameter.
324                                   If `None`, all rows will be used.
325        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
326                               will try to coerce different dtypes in a column to the same one,
327                               whereas `strict` will raise an error in case a column contains
328                               several dtypes. Note that this only applies to columns whose dtype
329                               is guessed, i.e. not specified via `dtypes`.
330        :param use_columns: Specifies the columns to use. Can either be:
331                            - `None` to select all columns
332                            - A list of strings and ints, the column names and/or indices
333                              (starting at 0)
334                            - A string, a comma separated list of Excel column letters and column
335                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
336                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
337                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
338                              ranges (e.g. `":C"` to select columns from A to C). These can be
339                              combined for "except" patterns (e.g. `":C,E:"` to select everything
340                              except column D)
341                            - A callable, a function that takes a column and returns a boolean
342                              indicating whether the column should be used
343        :param dtypes: An optional dtype (for all columns)
344                       or dict of dtypes with keys as column indices or names.
345        """
346        return ExcelSheet(
347            self._reader.load_sheet(
348                idx_or_name=idx_or_name,
349                header_row=header_row,
350                column_names=column_names,
351                skip_rows=skip_rows,
352                n_rows=n_rows,
353                schema_sample_rows=schema_sample_rows,
354                dtype_coercion=dtype_coercion,
355                use_columns=use_columns,
356                dtypes=dtypes,
357                eager=False,
358            )
359        )

Loads a sheet lazily by index or name.

Parameters
  • idx_or_name: The index (starting at 0) or the name of the sheet to load.
  • header_row: The index of the row containing the column labels, default index is 0. If None, the sheet does not have any column labels. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies which rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. It means row indices are relative to data rows, not the sheet! Can be one of:
    • int: Skip this many rows after the header row
    • list[int]: Skip specific row indices (0-based relative to data rows)
    • Callable[[int], bool]: Function that receives row index (0-based relative to data rows) and returns True to skip the row
    • None: If header_row is None, skips empty rows at beginning
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F", which would result in A,B,C,D,E and A,C,E,F). Also supports open-ended ranges (e.g. "B:" to select all columns from B onwards) and from-beginning ranges (e.g. ":C" to select columns from A to C). These can be combined for "except" patterns (e.g. ":C,E:" to select everything except column D)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def table_names(self, sheet_name: str | None = None) -> list[str]:
361    def table_names(self, sheet_name: str | None = None) -> list[str]:
362        """The list of table names.
363
364        Will return an empty list if no tables are found.
365
366        :param sheet_name: If given, will limit the list to the given sheet, will be faster
367        too.
368        """
369        return self._reader.table_names(sheet_name)

The list of table names.

Will return an empty list if no tables are found.

Parameters
  • sheet_name: If given, will limit the list to the given sheet, will be faster too.
def load_table( self, name: str, *, header_row: int | None = None, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> fastexcel.ExcelTable | pyarrow.lib.RecordBatch:
411    def load_table(
412        self,
413        name: str,
414        *,
415        header_row: int | None = None,
416        column_names: list[str] | None = None,
417        skip_rows: int | None = None,
418        n_rows: int | None = None,
419        schema_sample_rows: int | None = 1_000,
420        dtype_coercion: Literal["coerce", "strict"] = "coerce",
421        use_columns: list[str]
422        | list[int]
423        | str
424        | Callable[[ColumnInfoNoDtype], bool]
425        | None = None,
426        dtypes: DType | DTypeMap | None = None,
427        eager: bool = False,
428    ) -> "ExcelTable | pa.RecordBatch":
429        """Loads a table by name.
430
431        :param name: The name of the table to load.
432        :param header_row: The index of the row containing the column labels.
433                           If `None`, the table's column names will be used.
434                           Any rows before the `header_row` will be automatically skipped.
435        :param column_names: Overrides headers found in the document.
436                             If `column_names` is used, `header_row` will be ignored.
437        :param n_rows: Specifies how many rows should be loaded.
438                       If `None`, all rows are loaded
439        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
440                          Any rows before the `header_row` are automatically skipped.
441                          If `header_row` is `None`, it skips the number of rows from the
442                          start of the sheet.
443        :param schema_sample_rows: Specifies how many rows should be used to determine
444                                   the dtype of a column. Cannot be 0. A specific dtype can be
445                                   enforced for some or all columns through the `dtypes` parameter.
446                                   If `None`, all rows will be used.
447        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
448                               will try to coerce different dtypes in a column to the same one,
449                               whereas `strict` will raise an error in case a column contains
450                               several dtypes. Note that this only applies to columns whose dtype
451                               is guessed, i.e. not specified via `dtypes`.
452        :param use_columns: Specifies the columns to use. Can either be:
453                            - `None` to select all columns
454                            - A list of strings and ints, the column names and/or indices
455                              (starting at 0)
456                            - A string, a comma separated list of Excel column letters and column
457                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
458                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
459                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
460                              ranges (e.g. `":C"` to select columns from A to C). These can be
461                              combined for "except" patterns (e.g. `":C,E:"` to select everything
462                              except column D)
463                            - A callable, a function that takes a column and returns a boolean
464                              indicating whether the column should be used
465        :param dtypes: An optional dtype (for all columns)
466                       or dict of dtypes with keys as column indices or names.
467        """
468        if eager:
469            return self._reader.load_table(
470                name=name,
471                header_row=header_row,
472                column_names=column_names,
473                skip_rows=skip_rows,
474                n_rows=n_rows,
475                schema_sample_rows=schema_sample_rows,
476                dtype_coercion=dtype_coercion,
477                use_columns=use_columns,
478                dtypes=dtypes,
479                eager=True,
480            )
481        else:
482            return ExcelTable(
483                self._reader.load_table(
484                    name=name,
485                    header_row=header_row,
486                    column_names=column_names,
487                    skip_rows=skip_rows,
488                    n_rows=n_rows,
489                    schema_sample_rows=schema_sample_rows,
490                    dtype_coercion=dtype_coercion,
491                    use_columns=use_columns,
492                    dtypes=dtypes,
493                    eager=False,
494                )
495            )

Loads a table by name.

Parameters
  • name: The name of the table to load.
  • header_row: The index of the row containing the column labels. If None, the table's column names will be used. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None, it skips the number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F", which would result in A,B,C,D,E and A,C,E,F). Also supports open-ended ranges (e.g. "B:" to select all columns from B onwards) and from-beginning ranges (e.g. ":C" to select columns from A to C). These can be combined for "except" patterns (e.g. ":C,E:" to select everything except column D)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def load_sheet_eager( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: Union[int, list[int], Callable[[int], bool], NoneType] = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: list[str] | list[int] | str | None = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> pyarrow.lib.RecordBatch:
497    def load_sheet_eager(
498        self,
499        idx_or_name: int | str,
500        *,
501        header_row: int | None = 0,
502        column_names: list[str] | None = None,
503        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
504        n_rows: int | None = None,
505        schema_sample_rows: int | None = 1_000,
506        dtype_coercion: Literal["coerce", "strict"] = "coerce",
507        use_columns: list[str] | list[int] | str | None = None,
508        dtypes: DType | DTypeMap | None = None,
509    ) -> "pa.RecordBatch":
510        """Loads a sheet eagerly by index or name.
511
512        For xlsx files, this will be faster and more memory-efficient, as it will use
513        `worksheet_range_ref` under the hood, which returns borrowed types.
514
515        Refer to `load_sheet` for parameter documentation
516        """
517        return self._reader.load_sheet(
518            idx_or_name=idx_or_name,
519            header_row=header_row,
520            column_names=column_names,
521            skip_rows=skip_rows,
522            n_rows=n_rows,
523            schema_sample_rows=schema_sample_rows,
524            dtype_coercion=dtype_coercion,
525            use_columns=use_columns,
526            dtypes=dtypes,
527            eager=True,
528        )

Loads a sheet eagerly by index or name.

For xlsx files, this will be faster and more memory-efficient, as it will use worksheet_range_ref under the hood, which returns borrowed types.

Refer to load_sheet for parameter documentation

def load_sheet_by_name( self, name: str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
530    def load_sheet_by_name(
531        self,
532        name: str,
533        *,
534        header_row: int | None = 0,
535        column_names: list[str] | None = None,
536        skip_rows: int | None = None,
537        n_rows: int | None = None,
538        schema_sample_rows: int | None = 1_000,
539        dtype_coercion: Literal["coerce", "strict"] = "coerce",
540        use_columns: list[str]
541        | list[int]
542        | str
543        | Callable[[ColumnInfoNoDtype], bool]
544        | None = None,
545        dtypes: DType | DTypeMap | None = None,
546    ) -> ExcelSheet:
547        """Loads a sheet by name.
548
549        Refer to `load_sheet` for parameter documentation
550        """
551        return self.load_sheet(
552            name,
553            header_row=header_row,
554            column_names=column_names,
555            skip_rows=skip_rows,
556            n_rows=n_rows,
557            schema_sample_rows=schema_sample_rows,
558            dtype_coercion=dtype_coercion,
559            use_columns=use_columns,
560            dtypes=dtypes,
561        )

Loads a sheet by name.

Refer to load_sheet for parameter documentation

def load_sheet_by_idx( self, idx: int, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
563    def load_sheet_by_idx(
564        self,
565        idx: int,
566        *,
567        header_row: int | None = 0,
568        column_names: list[str] | None = None,
569        skip_rows: int | None = None,
570        n_rows: int | None = None,
571        schema_sample_rows: int | None = 1_000,
572        dtype_coercion: Literal["coerce", "strict"] = "coerce",
573        use_columns: list[str]
574        | list[int]
575        | str
576        | Callable[[ColumnInfoNoDtype], bool]
577        | None = None,
578        dtypes: DType | DTypeMap | None = None,
579    ) -> ExcelSheet:
580        """Loads a sheet by index.
581
582        Refer to `load_sheet` for parameter documentation
583        """
584        return self.load_sheet(
585            idx,
586            header_row=header_row,
587            column_names=column_names,
588            skip_rows=skip_rows,
589            n_rows=n_rows,
590            schema_sample_rows=schema_sample_rows,
591            dtype_coercion=dtype_coercion,
592            use_columns=use_columns,
593            dtypes=dtypes,
594        )

Loads a sheet by index.

Refer to load_sheet for parameter documentation

class ExcelSheet:
 59class ExcelSheet:
 60    """A class representing a single sheet in an Excel File"""
 61
 62    def __init__(self, sheet: _ExcelSheet) -> None:
 63        self._sheet = sheet
 64
 65    @property
 66    def name(self) -> str:
 67        """The name of the sheet"""
 68        return self._sheet.name
 69
 70    @property
 71    def width(self) -> int:
 72        """The sheet's width"""
 73        return self._sheet.width
 74
 75    @property
 76    def height(self) -> int:
 77        """The sheet's height, with `skip_rows` and `nrows` applied"""
 78        return self._sheet.height
 79
 80    @property
 81    def total_height(self) -> int:
 82        """The sheet's total height"""
 83        return self._sheet.total_height
 84
 85    @property
 86    def selected_columns(self) -> list[ColumnInfo]:
 87        """The sheet's selected columns"""
 88        return self._sheet.selected_columns
 89
 90    def available_columns(self) -> list[ColumnInfo]:
 91        """The columns available for the given sheet"""
 92        return self._sheet.available_columns()
 93
 94    @property
 95    def specified_dtypes(self) -> DTypeMap | None:
 96        """The dtypes specified for the sheet"""
 97        return self._sheet.specified_dtypes
 98
 99    @property
100    def visible(self) -> SheetVisible:
101        """The visibility of the sheet"""
102        return self._sheet.visible
103
104    def to_arrow(self) -> "pa.RecordBatch":
105        """Converts the sheet to a pyarrow `RecordBatch`"""
106        if not _PYARROW_AVAILABLE:
107            raise ImportError(
108                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
109            )
110        return self._sheet.to_arrow()
111
112    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
113        """Converts the sheet to a pyarrow `RecordBatch` with error information.
114
115        Stores the positions of any values that cannot be parsed as the specified type and were
116        therefore converted to None.
117        """
118        if not _PYARROW_AVAILABLE:
119            raise ImportError(
120                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
121            )
122        rb, cell_errors = self._sheet.to_arrow_with_errors()
123        if not cell_errors.errors:
124            return (rb, None)
125        return (rb, cell_errors)
126
127    def to_pandas(self) -> "pd.DataFrame":
128        """Converts the sheet to a Pandas `DataFrame`.
129
130        Requires the `pandas` extra to be installed.
131        """
132        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
133        # which we don't implement. Using pyarrow conversion for now.
134        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
135        return self.to_arrow().to_pandas()
136
137    def to_polars(self) -> "pl.DataFrame":
138        """Converts the sheet to a Polars `DataFrame`.
139
140        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
141        Requires the `polars` extra to be installed.
142        """
143        import polars as pl
144
145        return pl.DataFrame(self)
146
147    def __arrow_c_schema__(self) -> object:
148        """Export the schema as an `ArrowSchema` `PyCapsule`.
149
150        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
151
152        The Arrow PyCapsule Interface enables zero-copy data exchange with
153        Arrow-compatible libraries without requiring PyArrow as a dependency.
154        """
155        return self._sheet.__arrow_c_schema__()
156
157    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
158        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
159
160        The optional `requested_schema` parameter allows for potential schema conversion.
161
162        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
163
164        The Arrow PyCapsule Interface enables zero-copy data exchange with
165        Arrow-compatible libraries without requiring PyArrow as a dependency.
166        """
167        return self._sheet.__arrow_c_array__(requested_schema)
168
169    def __repr__(self) -> str:
170        return self._sheet.__repr__()

A class representing a single sheet in an Excel File

ExcelSheet(sheet: _ExcelSheet)
62    def __init__(self, sheet: _ExcelSheet) -> None:
63        self._sheet = sheet
name: str
65    @property
66    def name(self) -> str:
67        """The name of the sheet"""
68        return self._sheet.name

The name of the sheet

width: int
70    @property
71    def width(self) -> int:
72        """The sheet's width"""
73        return self._sheet.width

The sheet's width

height: int
75    @property
76    def height(self) -> int:
77        """The sheet's height, with `skip_rows` and `nrows` applied"""
78        return self._sheet.height

The sheet's height, with skip_rows and nrows applied

total_height: int
80    @property
81    def total_height(self) -> int:
82        """The sheet's total height"""
83        return self._sheet.total_height

The sheet's total height

selected_columns: list[ColumnInfo]
85    @property
86    def selected_columns(self) -> list[ColumnInfo]:
87        """The sheet's selected columns"""
88        return self._sheet.selected_columns

The sheet's selected columns

def available_columns(self) -> list[ColumnInfo]:
90    def available_columns(self) -> list[ColumnInfo]:
91        """The columns available for the given sheet"""
92        return self._sheet.available_columns()

The columns available for the given sheet

specified_dtypes: 'DTypeMap | None'
94    @property
95    def specified_dtypes(self) -> DTypeMap | None:
96        """The dtypes specified for the sheet"""
97        return self._sheet.specified_dtypes

The dtypes specified for the sheet

visible: Literal['visible', 'hidden', 'veryhidden']
 99    @property
100    def visible(self) -> SheetVisible:
101        """The visibility of the sheet"""
102        return self._sheet.visible

The visibility of the sheet

def to_arrow(self) -> pyarrow.lib.RecordBatch:
104    def to_arrow(self) -> "pa.RecordBatch":
105        """Converts the sheet to a pyarrow `RecordBatch`"""
106        if not _PYARROW_AVAILABLE:
107            raise ImportError(
108                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
109            )
110        return self._sheet.to_arrow()

Converts the sheet to a pyarrow RecordBatch

def to_arrow_with_errors(self) -> tuple[pyarrow.lib.RecordBatch, CellErrors | None]:
112    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
113        """Converts the sheet to a pyarrow `RecordBatch` with error information.
114
115        Stores the positions of any values that cannot be parsed as the specified type and were
116        therefore converted to None.
117        """
118        if not _PYARROW_AVAILABLE:
119            raise ImportError(
120                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
121            )
122        rb, cell_errors = self._sheet.to_arrow_with_errors()
123        if not cell_errors.errors:
124            return (rb, None)
125        return (rb, cell_errors)

Converts the sheet to a pyarrow RecordBatch with error information.

Stores the positions of any values that cannot be parsed as the specified type and were therefore converted to None.

def to_pandas(self) -> pandas.core.frame.DataFrame:
127    def to_pandas(self) -> "pd.DataFrame":
128        """Converts the sheet to a Pandas `DataFrame`.
129
130        Requires the `pandas` extra to be installed.
131        """
132        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
133        # which we don't implement. Using pyarrow conversion for now.
134        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
135        return self.to_arrow().to_pandas()

Converts the sheet to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
137    def to_polars(self) -> "pl.DataFrame":
138        """Converts the sheet to a Polars `DataFrame`.
139
140        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
141        Requires the `polars` extra to be installed.
142        """
143        import polars as pl
144
145        return pl.DataFrame(self)

Converts the sheet to a Polars DataFrame.

Uses the Arrow PyCapsule Interface for zero-copy data exchange. Requires the polars extra to be installed.

DTypeFrom: TypeAlias = Literal['provided_for_all', 'provided_by_index', 'provided_by_name', 'guessed']
ColumnNameFrom: TypeAlias = Literal['provided', 'looked_up', 'generated']
class ColumnInfo:

Metadata about a single column in a sheet.

index

int. The index of the column

column_name_from

fastexcel.ColumnNameFrom. How the name of the column was determined.

One of three possible values:

  • "provided": The column name was provided via the use_columns parameter
  • "looked_up": The column name was looked up from the data found in the sheet
  • "generated": The column name was generated from the column index, either because header_row was None, or because it could not be looked up
dtype_from

fastexcel.DTypeFrom. How the dtype of the column was determined.

One of three possible values:

  • "provided_by_index": The dtype was specified via the column index
  • "provided_by_name": The dtype was specified via the column name
  • "guessed": The dtype was determined from the content of the column
dtype

fastexcel.DType. The dtype of the column

name

str. The name of the column

class CellError:
row_offset

int. The row offset

position

(int, int). The original row and column of the error

offset_position
detail

str. The error message

class CellErrors:
errors
class FastExcelError(builtins.Exception):

The base class for all fastexcel errors

class CannotRetrieveCellDataError(fastexcel.FastExcelError):

Data for a given cell cannot be retrieved

class CalamineCellError(fastexcel.FastExcelError):

calamine returned an error regarding the content of the cell

class CalamineError(fastexcel.FastExcelError):

Generic calamine error

class SheetNotFoundError(fastexcel.FastExcelError):

Sheet was not found

class ColumnNotFoundError(fastexcel.FastExcelError):

Column was not found

class ArrowError(fastexcel.FastExcelError):

Generic arrow error

class InvalidParametersError(fastexcel.FastExcelError):

Provided parameters are invalid

class UnsupportedColumnTypeCombinationError(fastexcel.FastExcelError):

Column contains an unsupported type combination