fastexcel

  1from __future__ import annotations
  2
  3import sys
  4import typing
  5from typing import TYPE_CHECKING, Callable, Literal
  6
  7if sys.version_info < (3, 10):
  8    from typing_extensions import TypeAlias
  9else:
 10    from typing import TypeAlias
 11
 12if TYPE_CHECKING:
 13    import pandas as pd
 14    import polars as pl
 15    import pyarrow as pa
 16
 17from os.path import expanduser
 18from pathlib import Path
 19
 20try:
 21    import importlib.util
 22
 23    importlib.util.find_spec("pyarrow")
 24    _PYARROW_AVAILABLE = True
 25except ImportError:
 26    _PYARROW_AVAILABLE = False
 27
 28from ._fastexcel import (
 29    ArrowError,
 30    CalamineCellError,
 31    CalamineError,
 32    CannotRetrieveCellDataError,
 33    CellError,
 34    CellErrors,
 35    ColumnInfo,
 36    ColumnInfoNoDtype,
 37    ColumnNotFoundError,
 38    FastExcelError,
 39    InvalidParametersError,
 40    SheetNotFoundError,
 41    UnsupportedColumnTypeCombinationError,
 42    __version__,
 43    _ExcelReader,
 44    _ExcelSheet,
 45    _ExcelTable,
 46)
 47from ._fastexcel import read_excel as _read_excel
 48
 49DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
 50DTypeMap: TypeAlias = "dict[str | int, DType]"
 51ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
 52DTypeFrom: TypeAlias = Literal[
 53    "provided_for_all", "provided_by_index", "provided_by_name", "guessed"
 54]
 55SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
 56
 57
 58class ExcelSheet:
 59    """A class representing a single sheet in an Excel File"""
 60
 61    def __init__(self, sheet: _ExcelSheet) -> None:
 62        self._sheet = sheet
 63
 64    @property
 65    def name(self) -> str:
 66        """The name of the sheet"""
 67        return self._sheet.name
 68
 69    @property
 70    def width(self) -> int:
 71        """The sheet's width"""
 72        return self._sheet.width
 73
 74    @property
 75    def height(self) -> int:
 76        """The sheet's height, with `skip_rows` and `nrows` applied"""
 77        return self._sheet.height
 78
 79    @property
 80    def total_height(self) -> int:
 81        """The sheet's total height"""
 82        return self._sheet.total_height
 83
 84    @property
 85    def selected_columns(self) -> list[ColumnInfo]:
 86        """The sheet's selected columns"""
 87        return self._sheet.selected_columns
 88
 89    def available_columns(self) -> list[ColumnInfo]:
 90        """The columns available for the given sheet"""
 91        return self._sheet.available_columns()
 92
 93    @property
 94    def specified_dtypes(self) -> DTypeMap | None:
 95        """The dtypes specified for the sheet"""
 96        return self._sheet.specified_dtypes
 97
 98    @property
 99    def visible(self) -> SheetVisible:
100        """The visibility of the sheet"""
101        return self._sheet.visible
102
103    def to_arrow(self) -> "pa.RecordBatch":
104        """Converts the sheet to a pyarrow `RecordBatch`
105
106        Requires the `pyarrow` extra to be installed.
107        """
108        if not _PYARROW_AVAILABLE:
109            raise ImportError(
110                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
111            )
112        return self._sheet.to_arrow()
113
114    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
115        """Converts the sheet to a pyarrow `RecordBatch` with error information.
116
117        Stores the positions of any values that cannot be parsed as the specified type and were
118        therefore converted to None.
119
120        Requires the `pyarrow` extra to be installed.
121        """
122        if not _PYARROW_AVAILABLE:
123            raise ImportError(
124                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
125            )
126        rb, cell_errors = self._sheet.to_arrow_with_errors()
127        if not cell_errors.errors:
128            return (rb, None)
129        return (rb, cell_errors)
130
131    def to_pandas(self) -> "pd.DataFrame":
132        """Converts the sheet to a Pandas `DataFrame`.
133
134        Requires the `pandas` extra to be installed.
135        """
136        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
137        # which we don't implement. Using pyarrow conversion for now.
138        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
139        return self.to_arrow().to_pandas()
140
141    def to_polars(self) -> "pl.DataFrame":
142        """Converts the sheet to a Polars `DataFrame`.
143
144        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
145        Requires the `polars` extra to be installed.
146        """
147        import polars as pl
148
149        return pl.DataFrame(self)
150
151    def __arrow_c_schema__(self) -> object:
152        """Export the schema as an `ArrowSchema` `PyCapsule`.
153
154        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
155
156        The Arrow PyCapsule Interface enables zero-copy data exchange with
157        Arrow-compatible libraries without requiring PyArrow as a dependency.
158        """
159        return self._sheet.__arrow_c_schema__()
160
161    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
162        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
163
164        The optional `requested_schema` parameter allows for potential schema conversion.
165
166        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
167
168        The Arrow PyCapsule Interface enables zero-copy data exchange with
169        Arrow-compatible libraries without requiring PyArrow as a dependency.
170        """
171        return self._sheet.__arrow_c_array__(requested_schema)
172
173    def __repr__(self) -> str:
174        return self._sheet.__repr__()
175
176
177class ExcelTable:
178    """A class representing a single table in an Excel file"""
179
180    def __init__(self, table: _ExcelTable) -> None:
181        self._table = table
182
183    @property
184    def name(self) -> str:
185        """The name of the table"""
186        return self._table.name
187
188    @property
189    def sheet_name(self) -> str:
190        """The name of the sheet this table belongs to"""
191        return self._table.sheet_name
192
193    @property
194    def width(self) -> int:
195        """The table's width"""
196        return self._table.width
197
198    @property
199    def height(self) -> int:
200        """The table's height"""
201        return self._table.height
202
203    @property
204    def total_height(self) -> int:
205        """The table's total height"""
206        return self._table.total_height
207
208    @property
209    def offset(self) -> int:
210        """The table's offset before data starts"""
211        return self._table.offset
212
213    @property
214    def selected_columns(self) -> list[ColumnInfo]:
215        """The table's selected columns"""
216        return self._table.selected_columns
217
218    def available_columns(self) -> list[ColumnInfo]:
219        """The columns available for the given table"""
220        return self._table.available_columns()
221
222    @property
223    def specified_dtypes(self) -> DTypeMap | None:
224        """The dtypes specified for the table"""
225        return self._table.specified_dtypes
226
227    def to_arrow(self) -> "pa.RecordBatch":
228        """Converts the table to a pyarrow `RecordBatch`
229
230        Requires the `pyarrow` extra to be installed.
231        """
232        if not _PYARROW_AVAILABLE:
233            raise ImportError(
234                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
235            )
236        return self._table.to_arrow()
237
238    def to_pandas(self) -> "pd.DataFrame":
239        """Converts the table to a Pandas `DataFrame`.
240
241        Requires the `pandas` extra to be installed.
242        """
243        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
244        # which we don't implement. Using pyarrow conversion for now.
245        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
246        return self.to_arrow().to_pandas()
247
248    def to_polars(self) -> "pl.DataFrame":
249        """Converts the table to a Polars `DataFrame`.
250
251        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
252        Requires the `polars` extra to be installed.
253        """
254        import polars as pl
255
256        return pl.DataFrame(self)
257
258    def __arrow_c_schema__(self) -> object:
259        """Export the schema as an `ArrowSchema` `PyCapsule`.
260
261        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
262
263        The Arrow PyCapsule Interface enables zero-copy data exchange with
264        Arrow-compatible libraries without requiring PyArrow as a dependency.
265        """
266        return self._table.__arrow_c_schema__()
267
268    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
269        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
270
271        The optional `requested_schema` parameter allows for potential schema conversion.
272
273        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
274
275        The Arrow PyCapsule Interface enables zero-copy data exchange with
276        Arrow-compatible libraries without requiring PyArrow as a dependency.
277        """
278        return self._table.__arrow_c_array__(requested_schema)
279
280
281class ExcelReader:
282    """A class representing an open Excel file and allowing to read its sheets"""
283
284    def __init__(self, reader: _ExcelReader) -> None:
285        self._reader = reader
286
287    @property
288    def sheet_names(self) -> list[str]:
289        """The list of sheet names"""
290        return self._reader.sheet_names
291
292    @typing.overload
293    def load_sheet(
294        self,
295        idx_or_name: int | str,
296        *,
297        header_row: int | None = 0,
298        column_names: list[str] | None = None,
299        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
300        n_rows: int | None = None,
301        schema_sample_rows: int | None = 1_000,
302        dtype_coercion: Literal["coerce", "strict"] = "coerce",
303        use_columns: list[str]
304        | list[int]
305        | str
306        | Callable[[ColumnInfoNoDtype], bool]
307        | None = None,
308        dtypes: DType | DTypeMap | None = None,
309        eager: Literal[False] = ...,
310    ) -> ExcelSheet: ...
311
312    @typing.overload
313    def load_sheet(
314        self,
315        idx_or_name: int | str,
316        *,
317        header_row: int | None = 0,
318        column_names: list[str] | None = None,
319        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
320        n_rows: int | None = None,
321        schema_sample_rows: int | None = 1_000,
322        dtype_coercion: Literal["coerce", "strict"] = "coerce",
323        use_columns: list[str]
324        | list[int]
325        | str
326        | Callable[[ColumnInfoNoDtype], bool]
327        | None = None,
328        dtypes: DType | DTypeMap | None = None,
329        eager: Literal[True] = ...,
330    ) -> "pa.RecordBatch": ...
331
332    def load_sheet(
333        self,
334        idx_or_name: int | str,
335        *,
336        header_row: int | None = 0,
337        column_names: list[str] | None = None,
338        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
339        n_rows: int | None = None,
340        schema_sample_rows: int | None = 1_000,
341        dtype_coercion: Literal["coerce", "strict"] = "coerce",
342        use_columns: list[str]
343        | list[int]
344        | str
345        | Callable[[ColumnInfoNoDtype], bool]
346        | None = None,
347        dtypes: DType | DTypeMap | None = None,
348        eager: bool = False,
349    ) -> "ExcelSheet | pa.RecordBatch":
350        """Loads a sheet by index or name.
351
352        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
353        :param header_row: The index of the row containing the column labels, default index is 0.
354                           If `None`, the sheet does not have any column labels.
355                           Any rows before the `header_row` will be automatically skipped.
356        :param column_names: Overrides headers found in the document.
357                             If `column_names` is used, `header_row` will be ignored.
358        :param n_rows: Specifies how many rows should be loaded.
359                       If `None`, all rows are loaded
360        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
361                          Any rows before the `header_row` are automatically skipped.
362                          It means row indices are relative to data rows, not the sheet!
363                          Can be one of:
364                          - `int`: Skip this many rows after the header row
365                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
366                          - `Callable[[int], bool]`: Function that receives row index (0-based
367                          relative to data rows) and returns True to skip the row
368                          - `None`: If `header_row` is None, skips empty rows at beginning
369        :param schema_sample_rows: Specifies how many rows should be used to determine
370                                   the dtype of a column. Cannot be 0. A specific dtype can be
371                                   enforced for some or all columns through the `dtypes` parameter.
372                                   If `None`, all rows will be used.
373        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
374                               will try to coerce different dtypes in a column to the same one,
375                               whereas `strict` will raise an error in case a column contains
376                               several dtypes. Note that this only applies to columns whose dtype
377                               is guessed, i.e. not specified via `dtypes`.
378        :param use_columns: Specifies the columns to use. Can either be:
379                            - `None` to select all columns
380                            - A list of strings and ints, the column names and/or indices
381                              (starting at 0)
382                            - A string, a comma separated list of Excel column letters and column
383                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
384                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
385                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
386                              ranges (e.g. `":C"` to select columns from A to C). These can be
387                              combined for "except" patterns (e.g. `":C,E:"` to select everything
388                              except column D)
389                            - A callable, a function that takes a column and returns a boolean
390                              indicating whether the column should be used
391        :param dtypes: An optional dtype (for all columns)
392                       or dict of dtypes with keys as column indices or names.
393        :param eager: Specifies whether the sheet should be loaded eagerly.
394                      `False` (default) will load the sheet lazily using the `PyCapsule` interface,
395                      whereas `True` will load it eagerly via `pyarrow`.
396
397                      Eager loading requires the `pyarrow` extra to be installed.
398        """
399        sheet_or_rb = self._reader.load_sheet(
400            idx_or_name=idx_or_name,
401            header_row=header_row,
402            column_names=column_names,
403            skip_rows=skip_rows,
404            n_rows=n_rows,
405            schema_sample_rows=schema_sample_rows,
406            dtype_coercion=dtype_coercion,
407            use_columns=use_columns,
408            dtypes=dtypes,
409            eager=eager,
410        )
411        return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)
412
413    def table_names(self, sheet_name: str | None = None) -> list[str]:
414        """The list of table names.
415
416        Will return an empty list if no tables are found.
417
418        :param sheet_name: If given, will limit the list to the given sheet, will be faster
419        too.
420        """
421        return self._reader.table_names(sheet_name)
422
423    @typing.overload
424    def load_table(
425        self,
426        name: str,
427        *,
428        header_row: int | None = None,
429        column_names: list[str] | None = None,
430        skip_rows: int | None = None,
431        n_rows: int | None = None,
432        schema_sample_rows: int | None = 1_000,
433        dtype_coercion: Literal["coerce", "strict"] = "coerce",
434        use_columns: list[str]
435        | list[int]
436        | str
437        | Callable[[ColumnInfoNoDtype], bool]
438        | None = None,
439        dtypes: DType | DTypeMap | None = None,
440        eager: Literal[False] = ...,
441    ) -> ExcelTable: ...
442
443    @typing.overload
444    def load_table(
445        self,
446        name: str,
447        *,
448        header_row: int | None = None,
449        column_names: list[str] | None = None,
450        skip_rows: int | None = None,
451        n_rows: int | None = None,
452        schema_sample_rows: int | None = 1_000,
453        dtype_coercion: Literal["coerce", "strict"] = "coerce",
454        use_columns: list[str]
455        | list[int]
456        | str
457        | Callable[[ColumnInfoNoDtype], bool]
458        | None = None,
459        dtypes: DType | DTypeMap | None = None,
460        eager: Literal[True] = ...,
461    ) -> "pa.RecordBatch": ...
462
463    def load_table(
464        self,
465        name: str,
466        *,
467        header_row: int | None = None,
468        column_names: list[str] | None = None,
469        skip_rows: int | None = None,
470        n_rows: int | None = None,
471        schema_sample_rows: int | None = 1_000,
472        dtype_coercion: Literal["coerce", "strict"] = "coerce",
473        use_columns: list[str]
474        | list[int]
475        | str
476        | Callable[[ColumnInfoNoDtype], bool]
477        | None = None,
478        dtypes: DType | DTypeMap | None = None,
479        eager: bool = False,
480    ) -> "ExcelTable | pa.RecordBatch":
481        """Loads a table by name.
482
483        :param name: The name of the table to load.
484        :param header_row: The index of the row containing the column labels.
485                           If `None`, the table's column names will be used.
486                           Any rows before the `header_row` will be automatically skipped.
487        :param column_names: Overrides headers found in the document.
488                             If `column_names` is used, `header_row` will be ignored.
489        :param n_rows: Specifies how many rows should be loaded.
490                       If `None`, all rows are loaded
491        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
492                          Any rows before the `header_row` are automatically skipped.
493                          If `header_row` is `None`, it skips the number of rows from the
494                          start of the sheet.
495        :param schema_sample_rows: Specifies how many rows should be used to determine
496                                   the dtype of a column. Cannot be 0. A specific dtype can be
497                                   enforced for some or all columns through the `dtypes` parameter.
498                                   If `None`, all rows will be used.
499        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
500                               will try to coerce different dtypes in a column to the same one,
501                               whereas `strict` will raise an error in case a column contains
502                               several dtypes. Note that this only applies to columns whose dtype
503                               is guessed, i.e. not specified via `dtypes`.
504        :param use_columns: Specifies the columns to use. Can either be:
505                            - `None` to select all columns
506                            - A list of strings and ints, the column names and/or indices
507                              (starting at 0)
508                            - A string, a comma separated list of Excel column letters and column
509                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
510                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
511                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
512                              ranges (e.g. `":C"` to select columns from A to C). These can be
513                              combined for "except" patterns (e.g. `":C,E:"` to select everything
514                              except column D)
515                            - A callable, a function that takes a column and returns a boolean
516                              indicating whether the column should be used
517        :param dtypes: An optional dtype (for all columns)
518                       or dict of dtypes with keys as column indices or names.
519        :param eager: Specifies whether the table should be loaded eagerly.
520                      `False` (default) will load the table lazily using the `PyCapsule` interface,
521                      whereas `True` will load it eagerly via `pyarrow`.
522
523                      Eager loading requires the `pyarrow` extra to be installed.
524        """
525        if eager:
526            return self._reader.load_table(
527                name=name,
528                header_row=header_row,
529                column_names=column_names,
530                skip_rows=skip_rows,
531                n_rows=n_rows,
532                schema_sample_rows=schema_sample_rows,
533                dtype_coercion=dtype_coercion,
534                use_columns=use_columns,
535                dtypes=dtypes,
536                eager=True,
537            )
538        else:
539            return ExcelTable(
540                self._reader.load_table(
541                    name=name,
542                    header_row=header_row,
543                    column_names=column_names,
544                    skip_rows=skip_rows,
545                    n_rows=n_rows,
546                    schema_sample_rows=schema_sample_rows,
547                    dtype_coercion=dtype_coercion,
548                    use_columns=use_columns,
549                    dtypes=dtypes,
550                    eager=False,
551                )
552            )
553
554    def load_sheet_eager(
555        self,
556        idx_or_name: int | str,
557        *,
558        header_row: int | None = 0,
559        column_names: list[str] | None = None,
560        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
561        n_rows: int | None = None,
562        schema_sample_rows: int | None = 1_000,
563        dtype_coercion: Literal["coerce", "strict"] = "coerce",
564        use_columns: list[str] | list[int] | str | None = None,
565        dtypes: DType | DTypeMap | None = None,
566    ) -> "pa.RecordBatch":
567        """Loads a sheet eagerly by index or name.
568
569        For xlsx files, this will be faster and more memory-efficient, as it will use
570        `worksheet_range_ref` under the hood, which returns borrowed types.
571
572        Refer to `load_sheet` for parameter documentation
573
574        Requires the `pyarrow` extra to be installed.
575        """
576        return self._reader.load_sheet(
577            idx_or_name=idx_or_name,
578            header_row=header_row,
579            column_names=column_names,
580            skip_rows=skip_rows,
581            n_rows=n_rows,
582            schema_sample_rows=schema_sample_rows,
583            dtype_coercion=dtype_coercion,
584            use_columns=use_columns,
585            dtypes=dtypes,
586            eager=True,
587        )
588
589    def load_sheet_by_name(
590        self,
591        name: str,
592        *,
593        header_row: int | None = 0,
594        column_names: list[str] | None = None,
595        skip_rows: int | None = None,
596        n_rows: int | None = None,
597        schema_sample_rows: int | None = 1_000,
598        dtype_coercion: Literal["coerce", "strict"] = "coerce",
599        use_columns: list[str]
600        | list[int]
601        | str
602        | Callable[[ColumnInfoNoDtype], bool]
603        | None = None,
604        dtypes: DType | DTypeMap | None = None,
605    ) -> ExcelSheet:
606        """Loads a sheet by name.
607
608        Refer to `load_sheet` for parameter documentation
609        """
610        return self.load_sheet(
611            name,
612            header_row=header_row,
613            column_names=column_names,
614            skip_rows=skip_rows,
615            n_rows=n_rows,
616            schema_sample_rows=schema_sample_rows,
617            dtype_coercion=dtype_coercion,
618            use_columns=use_columns,
619            dtypes=dtypes,
620        )
621
622    def load_sheet_by_idx(
623        self,
624        idx: int,
625        *,
626        header_row: int | None = 0,
627        column_names: list[str] | None = None,
628        skip_rows: int | None = None,
629        n_rows: int | None = None,
630        schema_sample_rows: int | None = 1_000,
631        dtype_coercion: Literal["coerce", "strict"] = "coerce",
632        use_columns: list[str]
633        | list[int]
634        | str
635        | Callable[[ColumnInfoNoDtype], bool]
636        | None = None,
637        dtypes: DType | DTypeMap | None = None,
638    ) -> ExcelSheet:
639        """Loads a sheet by index.
640
641        Refer to `load_sheet` for parameter documentation
642        """
643        return self.load_sheet(
644            idx,
645            header_row=header_row,
646            column_names=column_names,
647            skip_rows=skip_rows,
648            n_rows=n_rows,
649            schema_sample_rows=schema_sample_rows,
650            dtype_coercion=dtype_coercion,
651            use_columns=use_columns,
652            dtypes=dtypes,
653        )
654
655    def __repr__(self) -> str:
656        return self._reader.__repr__()
657
658
659def read_excel(source: Path | str | bytes) -> ExcelReader:
660    """Opens and loads an excel file.
661
662    :param source: The path to a file or its content as bytes
663    """
664    if isinstance(source, (str, Path)):
665        source = expanduser(source)
666    return ExcelReader(_read_excel(source))
667
668
669__all__ = (
670    # version
671    "__version__",
672    # main entrypoint
673    "read_excel",
674    # Python types
675    "DType",
676    "DTypeMap",
677    # Excel reader
678    "ExcelReader",
679    # Excel sheet
680    "ExcelSheet",
681    # Excel table
682    "ExcelTable",
683    # Column metadata
684    "DTypeFrom",
685    "ColumnNameFrom",
686    "ColumnInfo",
687    # Parse error information
688    "CellError",
689    "CellErrors",
690    # Exceptions
691    "FastExcelError",
692    "CannotRetrieveCellDataError",
693    "CalamineCellError",
694    "CalamineError",
695    "SheetNotFoundError",
696    "ColumnNotFoundError",
697    "ArrowError",
698    "InvalidParametersError",
699    "UnsupportedColumnTypeCombinationError",
700)
__version__ = '0.15.1'
def read_excel(source: pathlib.Path | str | bytes) -> ExcelReader:
660def read_excel(source: Path | str | bytes) -> ExcelReader:
661    """Opens and loads an excel file.
662
663    :param source: The path to a file or its content as bytes
664    """
665    if isinstance(source, (str, Path)):
666        source = expanduser(source)
667    return ExcelReader(_read_excel(source))

Opens and loads an excel file.

Parameters
  • source: The path to a file or its content as bytes
DType = typing.Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']
DTypeMap: TypeAlias = 'dict[str | int, DType]'
class ExcelReader:
282class ExcelReader:
283    """A class representing an open Excel file and allowing to read its sheets"""
284
285    def __init__(self, reader: _ExcelReader) -> None:
286        self._reader = reader
287
288    @property
289    def sheet_names(self) -> list[str]:
290        """The list of sheet names"""
291        return self._reader.sheet_names
292
293    @typing.overload
294    def load_sheet(
295        self,
296        idx_or_name: int | str,
297        *,
298        header_row: int | None = 0,
299        column_names: list[str] | None = None,
300        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
301        n_rows: int | None = None,
302        schema_sample_rows: int | None = 1_000,
303        dtype_coercion: Literal["coerce", "strict"] = "coerce",
304        use_columns: list[str]
305        | list[int]
306        | str
307        | Callable[[ColumnInfoNoDtype], bool]
308        | None = None,
309        dtypes: DType | DTypeMap | None = None,
310        eager: Literal[False] = ...,
311    ) -> ExcelSheet: ...
312
313    @typing.overload
314    def load_sheet(
315        self,
316        idx_or_name: int | str,
317        *,
318        header_row: int | None = 0,
319        column_names: list[str] | None = None,
320        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
321        n_rows: int | None = None,
322        schema_sample_rows: int | None = 1_000,
323        dtype_coercion: Literal["coerce", "strict"] = "coerce",
324        use_columns: list[str]
325        | list[int]
326        | str
327        | Callable[[ColumnInfoNoDtype], bool]
328        | None = None,
329        dtypes: DType | DTypeMap | None = None,
330        eager: Literal[True] = ...,
331    ) -> "pa.RecordBatch": ...
332
333    def load_sheet(
334        self,
335        idx_or_name: int | str,
336        *,
337        header_row: int | None = 0,
338        column_names: list[str] | None = None,
339        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
340        n_rows: int | None = None,
341        schema_sample_rows: int | None = 1_000,
342        dtype_coercion: Literal["coerce", "strict"] = "coerce",
343        use_columns: list[str]
344        | list[int]
345        | str
346        | Callable[[ColumnInfoNoDtype], bool]
347        | None = None,
348        dtypes: DType | DTypeMap | None = None,
349        eager: bool = False,
350    ) -> "ExcelSheet | pa.RecordBatch":
351        """Loads a sheet by index or name.
352
353        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
354        :param header_row: The index of the row containing the column labels, default index is 0.
355                           If `None`, the sheet does not have any column labels.
356                           Any rows before the `header_row` will be automatically skipped.
357        :param column_names: Overrides headers found in the document.
358                             If `column_names` is used, `header_row` will be ignored.
359        :param n_rows: Specifies how many rows should be loaded.
360                       If `None`, all rows are loaded
361        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
362                          Any rows before the `header_row` are automatically skipped.
363                          It means row indices are relative to data rows, not the sheet!
364                          Can be one of:
365                          - `int`: Skip this many rows after the header row
366                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
367                          - `Callable[[int], bool]`: Function that receives row index (0-based
368                          relative to data rows) and returns True to skip the row
369                          - `None`: If `header_row` is None, skips empty rows at beginning
370        :param schema_sample_rows: Specifies how many rows should be used to determine
371                                   the dtype of a column. Cannot be 0. A specific dtype can be
372                                   enforced for some or all columns through the `dtypes` parameter.
373                                   If `None`, all rows will be used.
374        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
375                               will try to coerce different dtypes in a column to the same one,
376                               whereas `strict` will raise an error in case a column contains
377                               several dtypes. Note that this only applies to columns whose dtype
378                               is guessed, i.e. not specified via `dtypes`.
379        :param use_columns: Specifies the columns to use. Can either be:
380                            - `None` to select all columns
381                            - A list of strings and ints, the column names and/or indices
382                              (starting at 0)
383                            - A string, a comma separated list of Excel column letters and column
384                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
385                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
386                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
387                              ranges (e.g. `":C"` to select columns from A to C). These can be
388                              combined for "except" patterns (e.g. `":C,E:"` to select everything
389                              except column D)
390                            - A callable, a function that takes a column and returns a boolean
391                              indicating whether the column should be used
392        :param dtypes: An optional dtype (for all columns)
393                       or dict of dtypes with keys as column indices or names.
394        :param eager: Specifies whether the sheet should be loaded eagerly.
395                      `False` (default) will load the sheet lazily using the `PyCapsule` interface,
396                      whereas `True` will load it eagerly via `pyarrow`.
397
398                      Eager loading requires the `pyarrow` extra to be installed.
399        """
400        sheet_or_rb = self._reader.load_sheet(
401            idx_or_name=idx_or_name,
402            header_row=header_row,
403            column_names=column_names,
404            skip_rows=skip_rows,
405            n_rows=n_rows,
406            schema_sample_rows=schema_sample_rows,
407            dtype_coercion=dtype_coercion,
408            use_columns=use_columns,
409            dtypes=dtypes,
410            eager=eager,
411        )
412        return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)
413
414    def table_names(self, sheet_name: str | None = None) -> list[str]:
415        """The list of table names.
416
417        Will return an empty list if no tables are found.
418
419        :param sheet_name: If given, will limit the list to the given sheet, will be faster
420        too.
421        """
422        return self._reader.table_names(sheet_name)
423
424    @typing.overload
425    def load_table(
426        self,
427        name: str,
428        *,
429        header_row: int | None = None,
430        column_names: list[str] | None = None,
431        skip_rows: int | None = None,
432        n_rows: int | None = None,
433        schema_sample_rows: int | None = 1_000,
434        dtype_coercion: Literal["coerce", "strict"] = "coerce",
435        use_columns: list[str]
436        | list[int]
437        | str
438        | Callable[[ColumnInfoNoDtype], bool]
439        | None = None,
440        dtypes: DType | DTypeMap | None = None,
441        eager: Literal[False] = ...,
442    ) -> ExcelTable: ...
443
444    @typing.overload
445    def load_table(
446        self,
447        name: str,
448        *,
449        header_row: int | None = None,
450        column_names: list[str] | None = None,
451        skip_rows: int | None = None,
452        n_rows: int | None = None,
453        schema_sample_rows: int | None = 1_000,
454        dtype_coercion: Literal["coerce", "strict"] = "coerce",
455        use_columns: list[str]
456        | list[int]
457        | str
458        | Callable[[ColumnInfoNoDtype], bool]
459        | None = None,
460        dtypes: DType | DTypeMap | None = None,
461        eager: Literal[True] = ...,
462    ) -> "pa.RecordBatch": ...
463
464    def load_table(
465        self,
466        name: str,
467        *,
468        header_row: int | None = None,
469        column_names: list[str] | None = None,
470        skip_rows: int | None = None,
471        n_rows: int | None = None,
472        schema_sample_rows: int | None = 1_000,
473        dtype_coercion: Literal["coerce", "strict"] = "coerce",
474        use_columns: list[str]
475        | list[int]
476        | str
477        | Callable[[ColumnInfoNoDtype], bool]
478        | None = None,
479        dtypes: DType | DTypeMap | None = None,
480        eager: bool = False,
481    ) -> "ExcelTable | pa.RecordBatch":
482        """Loads a table by name.
483
484        :param name: The name of the table to load.
485        :param header_row: The index of the row containing the column labels.
486                           If `None`, the table's column names will be used.
487                           Any rows before the `header_row` will be automatically skipped.
488        :param column_names: Overrides headers found in the document.
489                             If `column_names` is used, `header_row` will be ignored.
490        :param n_rows: Specifies how many rows should be loaded.
491                       If `None`, all rows are loaded
492        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
493                          Any rows before the `header_row` are automatically skipped.
494                          If `header_row` is `None`, it skips the number of rows from the
495                          start of the sheet.
496        :param schema_sample_rows: Specifies how many rows should be used to determine
497                                   the dtype of a column. Cannot be 0. A specific dtype can be
498                                   enforced for some or all columns through the `dtypes` parameter.
499                                   If `None`, all rows will be used.
500        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
501                               will try to coerce different dtypes in a column to the same one,
502                               whereas `strict` will raise an error in case a column contains
503                               several dtypes. Note that this only applies to columns whose dtype
504                               is guessed, i.e. not specified via `dtypes`.
505        :param use_columns: Specifies the columns to use. Can either be:
506                            - `None` to select all columns
507                            - A list of strings and ints, the column names and/or indices
508                              (starting at 0)
509                            - A string, a comma separated list of Excel column letters and column
510                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
511                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
512                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
513                              ranges (e.g. `":C"` to select columns from A to C). These can be
514                              combined for "except" patterns (e.g. `":C,E:"` to select everything
515                              except column D)
516                            - A callable, a function that takes a column and returns a boolean
517                              indicating whether the column should be used
518        :param dtypes: An optional dtype (for all columns)
519                       or dict of dtypes with keys as column indices or names.
520        :param eager: Specifies whether the table should be loaded eagerly.
521                      `False` (default) will load the table lazily using the `PyCapsule` interface,
522                      whereas `True` will load it eagerly via `pyarrow`.
523
524                      Eager loading requires the `pyarrow` extra to be installed.
525        """
526        if eager:
527            return self._reader.load_table(
528                name=name,
529                header_row=header_row,
530                column_names=column_names,
531                skip_rows=skip_rows,
532                n_rows=n_rows,
533                schema_sample_rows=schema_sample_rows,
534                dtype_coercion=dtype_coercion,
535                use_columns=use_columns,
536                dtypes=dtypes,
537                eager=True,
538            )
539        else:
540            return ExcelTable(
541                self._reader.load_table(
542                    name=name,
543                    header_row=header_row,
544                    column_names=column_names,
545                    skip_rows=skip_rows,
546                    n_rows=n_rows,
547                    schema_sample_rows=schema_sample_rows,
548                    dtype_coercion=dtype_coercion,
549                    use_columns=use_columns,
550                    dtypes=dtypes,
551                    eager=False,
552                )
553            )
554
555    def load_sheet_eager(
556        self,
557        idx_or_name: int | str,
558        *,
559        header_row: int | None = 0,
560        column_names: list[str] | None = None,
561        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
562        n_rows: int | None = None,
563        schema_sample_rows: int | None = 1_000,
564        dtype_coercion: Literal["coerce", "strict"] = "coerce",
565        use_columns: list[str] | list[int] | str | None = None,
566        dtypes: DType | DTypeMap | None = None,
567    ) -> "pa.RecordBatch":
568        """Loads a sheet eagerly by index or name.
569
570        For xlsx files, this will be faster and more memory-efficient, as it will use
571        `worksheet_range_ref` under the hood, which returns borrowed types.
572
573        Refer to `load_sheet` for parameter documentation
574
575        Requires the `pyarrow` extra to be installed.
576        """
577        return self._reader.load_sheet(
578            idx_or_name=idx_or_name,
579            header_row=header_row,
580            column_names=column_names,
581            skip_rows=skip_rows,
582            n_rows=n_rows,
583            schema_sample_rows=schema_sample_rows,
584            dtype_coercion=dtype_coercion,
585            use_columns=use_columns,
586            dtypes=dtypes,
587            eager=True,
588        )
589
590    def load_sheet_by_name(
591        self,
592        name: str,
593        *,
594        header_row: int | None = 0,
595        column_names: list[str] | None = None,
596        skip_rows: int | None = None,
597        n_rows: int | None = None,
598        schema_sample_rows: int | None = 1_000,
599        dtype_coercion: Literal["coerce", "strict"] = "coerce",
600        use_columns: list[str]
601        | list[int]
602        | str
603        | Callable[[ColumnInfoNoDtype], bool]
604        | None = None,
605        dtypes: DType | DTypeMap | None = None,
606    ) -> ExcelSheet:
607        """Loads a sheet by name.
608
609        Refer to `load_sheet` for parameter documentation
610        """
611        return self.load_sheet(
612            name,
613            header_row=header_row,
614            column_names=column_names,
615            skip_rows=skip_rows,
616            n_rows=n_rows,
617            schema_sample_rows=schema_sample_rows,
618            dtype_coercion=dtype_coercion,
619            use_columns=use_columns,
620            dtypes=dtypes,
621        )
622
623    def load_sheet_by_idx(
624        self,
625        idx: int,
626        *,
627        header_row: int | None = 0,
628        column_names: list[str] | None = None,
629        skip_rows: int | None = None,
630        n_rows: int | None = None,
631        schema_sample_rows: int | None = 1_000,
632        dtype_coercion: Literal["coerce", "strict"] = "coerce",
633        use_columns: list[str]
634        | list[int]
635        | str
636        | Callable[[ColumnInfoNoDtype], bool]
637        | None = None,
638        dtypes: DType | DTypeMap | None = None,
639    ) -> ExcelSheet:
640        """Loads a sheet by index.
641
642        Refer to `load_sheet` for parameter documentation
643        """
644        return self.load_sheet(
645            idx,
646            header_row=header_row,
647            column_names=column_names,
648            skip_rows=skip_rows,
649            n_rows=n_rows,
650            schema_sample_rows=schema_sample_rows,
651            dtype_coercion=dtype_coercion,
652            use_columns=use_columns,
653            dtypes=dtypes,
654        )
655
656    def __repr__(self) -> str:
657        return self._reader.__repr__()

A class representing an open Excel file and allowing to read its sheets

ExcelReader(reader: _ExcelReader)
285    def __init__(self, reader: _ExcelReader) -> None:
286        self._reader = reader
sheet_names: list[str]
288    @property
289    def sheet_names(self) -> list[str]:
290        """The list of sheet names"""
291        return self._reader.sheet_names

The list of sheet names

def load_sheet( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: Union[int, list[int], Callable[[int], bool], NoneType] = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> ExcelSheet | pyarrow.lib.RecordBatch:
333    def load_sheet(
334        self,
335        idx_or_name: int | str,
336        *,
337        header_row: int | None = 0,
338        column_names: list[str] | None = None,
339        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
340        n_rows: int | None = None,
341        schema_sample_rows: int | None = 1_000,
342        dtype_coercion: Literal["coerce", "strict"] = "coerce",
343        use_columns: list[str]
344        | list[int]
345        | str
346        | Callable[[ColumnInfoNoDtype], bool]
347        | None = None,
348        dtypes: DType | DTypeMap | None = None,
349        eager: bool = False,
350    ) -> "ExcelSheet | pa.RecordBatch":
351        """Loads a sheet by index or name.
352
353        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
354        :param header_row: The index of the row containing the column labels, default index is 0.
355                           If `None`, the sheet does not have any column labels.
356                           Any rows before the `header_row` will be automatically skipped.
357        :param column_names: Overrides headers found in the document.
358                             If `column_names` is used, `header_row` will be ignored.
359        :param n_rows: Specifies how many rows should be loaded.
360                       If `None`, all rows are loaded
361        :param skip_rows: Specifies which rows should be skipped after the `header_row`.
362                          Any rows before the `header_row` are automatically skipped.
363                          It means row indices are relative to data rows, not the sheet!
364                          Can be one of:
365                          - `int`: Skip this many rows after the header row
366                          - `list[int]`: Skip specific row indices (0-based relative to data rows)
367                          - `Callable[[int], bool]`: Function that receives row index (0-based
368                          relative to data rows) and returns True to skip the row
369                          - `None`: If `header_row` is None, skips empty rows at beginning
370        :param schema_sample_rows: Specifies how many rows should be used to determine
371                                   the dtype of a column. Cannot be 0. A specific dtype can be
372                                   enforced for some or all columns through the `dtypes` parameter.
373                                   If `None`, all rows will be used.
374        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
375                               will try to coerce different dtypes in a column to the same one,
376                               whereas `strict` will raise an error in case a column contains
377                               several dtypes. Note that this only applies to columns whose dtype
378                               is guessed, i.e. not specified via `dtypes`.
379        :param use_columns: Specifies the columns to use. Can either be:
380                            - `None` to select all columns
381                            - A list of strings and ints, the column names and/or indices
382                              (starting at 0)
383                            - A string, a comma separated list of Excel column letters and column
384                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
385                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
386                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
387                              ranges (e.g. `":C"` to select columns from A to C). These can be
388                              combined for "except" patterns (e.g. `":C,E:"` to select everything
389                              except column D)
390                            - A callable, a function that takes a column and returns a boolean
391                              indicating whether the column should be used
392        :param dtypes: An optional dtype (for all columns)
393                       or dict of dtypes with keys as column indices or names.
394        :param eager: Specifies whether the sheet should be loaded eagerly.
395                      `False` (default) will load the sheet lazily using the `PyCapsule` interface,
396                      whereas `True` will load it eagerly via `pyarrow`.
397
398                      Eager loading requires the `pyarrow` extra to be installed.
399        """
400        sheet_or_rb = self._reader.load_sheet(
401            idx_or_name=idx_or_name,
402            header_row=header_row,
403            column_names=column_names,
404            skip_rows=skip_rows,
405            n_rows=n_rows,
406            schema_sample_rows=schema_sample_rows,
407            dtype_coercion=dtype_coercion,
408            use_columns=use_columns,
409            dtypes=dtypes,
410            eager=eager,
411        )
412        return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)

Loads a sheet by index or name.

Parameters
  • idx_or_name: The index (starting at 0) or the name of the sheet to load.
  • header_row: The index of the row containing the column labels, default index is 0. If None, the sheet does not have any column labels. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies which rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. It means row indices are relative to data rows, not the sheet! Can be one of:
    • int: Skip this many rows after the header row
    • list[int]: Skip specific row indices (0-based relative to data rows)
    • Callable[[int], bool]: Function that receives row index (0-based relative to data rows) and returns True to skip the row
    • None: If header_row is None, skips empty rows at beginning
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F", which would result in A,B,C,D,E and A,C,E,F). Also supports open-ended ranges (e.g. "B:" to select all columns from B onwards) and from-beginning ranges (e.g. ":C" to select columns from A to C). These can be combined for "except" patterns (e.g. ":C,E:" to select everything except column D)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
  • eager: Specifies whether the sheet should be loaded eagerly. False (default) will load the sheet lazily using the PyCapsule interface, whereas True will load it eagerly via pyarrow.

          Eager loading requires the `pyarrow` extra to be installed.
    
def table_names(self, sheet_name: str | None = None) -> list[str]:
414    def table_names(self, sheet_name: str | None = None) -> list[str]:
415        """The list of table names.
416
417        Will return an empty list if no tables are found.
418
419        :param sheet_name: If given, will limit the list to the given sheet, will be faster
420        too.
421        """
422        return self._reader.table_names(sheet_name)

The list of table names.

Will return an empty list if no tables are found.

Parameters
  • sheet_name: If given, will limit the list to the given sheet, will be faster too.
def load_table( self, name: str, *, header_row: int | None = None, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> ExcelTable | pyarrow.lib.RecordBatch:
464    def load_table(
465        self,
466        name: str,
467        *,
468        header_row: int | None = None,
469        column_names: list[str] | None = None,
470        skip_rows: int | None = None,
471        n_rows: int | None = None,
472        schema_sample_rows: int | None = 1_000,
473        dtype_coercion: Literal["coerce", "strict"] = "coerce",
474        use_columns: list[str]
475        | list[int]
476        | str
477        | Callable[[ColumnInfoNoDtype], bool]
478        | None = None,
479        dtypes: DType | DTypeMap | None = None,
480        eager: bool = False,
481    ) -> "ExcelTable | pa.RecordBatch":
482        """Loads a table by name.
483
484        :param name: The name of the table to load.
485        :param header_row: The index of the row containing the column labels.
486                           If `None`, the table's column names will be used.
487                           Any rows before the `header_row` will be automatically skipped.
488        :param column_names: Overrides headers found in the document.
489                             If `column_names` is used, `header_row` will be ignored.
490        :param n_rows: Specifies how many rows should be loaded.
491                       If `None`, all rows are loaded
492        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
493                          Any rows before the `header_row` are automatically skipped.
494                          If `header_row` is `None`, it skips the number of rows from the
495                          start of the sheet.
496        :param schema_sample_rows: Specifies how many rows should be used to determine
497                                   the dtype of a column. Cannot be 0. A specific dtype can be
498                                   enforced for some or all columns through the `dtypes` parameter.
499                                   If `None`, all rows will be used.
500        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
501                               will try to coerce different dtypes in a column to the same one,
502                               whereas `strict` will raise an error in case a column contains
503                               several dtypes. Note that this only applies to columns whose dtype
504                               is guessed, i.e. not specified via `dtypes`.
505        :param use_columns: Specifies the columns to use. Can either be:
506                            - `None` to select all columns
507                            - A list of strings and ints, the column names and/or indices
508                              (starting at 0)
509                            - A string, a comma separated list of Excel column letters and column
510                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
511                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
512                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
513                              ranges (e.g. `":C"` to select columns from A to C). These can be
514                              combined for "except" patterns (e.g. `":C,E:"` to select everything
515                              except column D)
516                            - A callable, a function that takes a column and returns a boolean
517                              indicating whether the column should be used
518        :param dtypes: An optional dtype (for all columns)
519                       or dict of dtypes with keys as column indices or names.
520        :param eager: Specifies whether the table should be loaded eagerly.
521                      `False` (default) will load the table lazily using the `PyCapsule` interface,
522                      whereas `True` will load it eagerly via `pyarrow`.
523
524                      Eager loading requires the `pyarrow` extra to be installed.
525        """
526        if eager:
527            return self._reader.load_table(
528                name=name,
529                header_row=header_row,
530                column_names=column_names,
531                skip_rows=skip_rows,
532                n_rows=n_rows,
533                schema_sample_rows=schema_sample_rows,
534                dtype_coercion=dtype_coercion,
535                use_columns=use_columns,
536                dtypes=dtypes,
537                eager=True,
538            )
539        else:
540            return ExcelTable(
541                self._reader.load_table(
542                    name=name,
543                    header_row=header_row,
544                    column_names=column_names,
545                    skip_rows=skip_rows,
546                    n_rows=n_rows,
547                    schema_sample_rows=schema_sample_rows,
548                    dtype_coercion=dtype_coercion,
549                    use_columns=use_columns,
550                    dtypes=dtypes,
551                    eager=False,
552                )
553            )

Loads a table by name.

Parameters
  • name: The name of the table to load.
  • header_row: The index of the row containing the column labels. If None, the table's column names will be used. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None, it skips the number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F", which would result in A,B,C,D,E and A,C,E,F). Also supports open-ended ranges (e.g. "B:" to select all columns from B onwards) and from-beginning ranges (e.g. ":C" to select columns from A to C). These can be combined for "except" patterns (e.g. ":C,E:" to select everything except column D)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
  • eager: Specifies whether the table should be loaded eagerly. False (default) will load the table lazily using the PyCapsule interface, whereas True will load it eagerly via pyarrow.

          Eager loading requires the `pyarrow` extra to be installed.
    
def load_sheet_eager( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: Union[int, list[int], Callable[[int], bool], NoneType] = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: list[str] | list[int] | str | None = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> pyarrow.lib.RecordBatch:
555    def load_sheet_eager(
556        self,
557        idx_or_name: int | str,
558        *,
559        header_row: int | None = 0,
560        column_names: list[str] | None = None,
561        skip_rows: int | list[int] | Callable[[int], bool] | None = None,
562        n_rows: int | None = None,
563        schema_sample_rows: int | None = 1_000,
564        dtype_coercion: Literal["coerce", "strict"] = "coerce",
565        use_columns: list[str] | list[int] | str | None = None,
566        dtypes: DType | DTypeMap | None = None,
567    ) -> "pa.RecordBatch":
568        """Loads a sheet eagerly by index or name.
569
570        For xlsx files, this will be faster and more memory-efficient, as it will use
571        `worksheet_range_ref` under the hood, which returns borrowed types.
572
573        Refer to `load_sheet` for parameter documentation
574
575        Requires the `pyarrow` extra to be installed.
576        """
577        return self._reader.load_sheet(
578            idx_or_name=idx_or_name,
579            header_row=header_row,
580            column_names=column_names,
581            skip_rows=skip_rows,
582            n_rows=n_rows,
583            schema_sample_rows=schema_sample_rows,
584            dtype_coercion=dtype_coercion,
585            use_columns=use_columns,
586            dtypes=dtypes,
587            eager=True,
588        )

Loads a sheet eagerly by index or name.

For xlsx files, this will be faster and more memory-efficient, as it will use worksheet_range_ref under the hood, which returns borrowed types.

Refer to load_sheet for parameter documentation

Requires the pyarrow extra to be installed.

def load_sheet_by_name( self, name: str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
590    def load_sheet_by_name(
591        self,
592        name: str,
593        *,
594        header_row: int | None = 0,
595        column_names: list[str] | None = None,
596        skip_rows: int | None = None,
597        n_rows: int | None = None,
598        schema_sample_rows: int | None = 1_000,
599        dtype_coercion: Literal["coerce", "strict"] = "coerce",
600        use_columns: list[str]
601        | list[int]
602        | str
603        | Callable[[ColumnInfoNoDtype], bool]
604        | None = None,
605        dtypes: DType | DTypeMap | None = None,
606    ) -> ExcelSheet:
607        """Loads a sheet by name.
608
609        Refer to `load_sheet` for parameter documentation
610        """
611        return self.load_sheet(
612            name,
613            header_row=header_row,
614            column_names=column_names,
615            skip_rows=skip_rows,
616            n_rows=n_rows,
617            schema_sample_rows=schema_sample_rows,
618            dtype_coercion=dtype_coercion,
619            use_columns=use_columns,
620            dtypes=dtypes,
621        )

Loads a sheet by name.

Refer to load_sheet for parameter documentation

def load_sheet_by_idx( self, idx: int, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
623    def load_sheet_by_idx(
624        self,
625        idx: int,
626        *,
627        header_row: int | None = 0,
628        column_names: list[str] | None = None,
629        skip_rows: int | None = None,
630        n_rows: int | None = None,
631        schema_sample_rows: int | None = 1_000,
632        dtype_coercion: Literal["coerce", "strict"] = "coerce",
633        use_columns: list[str]
634        | list[int]
635        | str
636        | Callable[[ColumnInfoNoDtype], bool]
637        | None = None,
638        dtypes: DType | DTypeMap | None = None,
639    ) -> ExcelSheet:
640        """Loads a sheet by index.
641
642        Refer to `load_sheet` for parameter documentation
643        """
644        return self.load_sheet(
645            idx,
646            header_row=header_row,
647            column_names=column_names,
648            skip_rows=skip_rows,
649            n_rows=n_rows,
650            schema_sample_rows=schema_sample_rows,
651            dtype_coercion=dtype_coercion,
652            use_columns=use_columns,
653            dtypes=dtypes,
654        )

Loads a sheet by index.

Refer to load_sheet for parameter documentation

class ExcelSheet:
 59class ExcelSheet:
 60    """A class representing a single sheet in an Excel File"""
 61
 62    def __init__(self, sheet: _ExcelSheet) -> None:
 63        self._sheet = sheet
 64
 65    @property
 66    def name(self) -> str:
 67        """The name of the sheet"""
 68        return self._sheet.name
 69
 70    @property
 71    def width(self) -> int:
 72        """The sheet's width"""
 73        return self._sheet.width
 74
 75    @property
 76    def height(self) -> int:
 77        """The sheet's height, with `skip_rows` and `nrows` applied"""
 78        return self._sheet.height
 79
 80    @property
 81    def total_height(self) -> int:
 82        """The sheet's total height"""
 83        return self._sheet.total_height
 84
 85    @property
 86    def selected_columns(self) -> list[ColumnInfo]:
 87        """The sheet's selected columns"""
 88        return self._sheet.selected_columns
 89
 90    def available_columns(self) -> list[ColumnInfo]:
 91        """The columns available for the given sheet"""
 92        return self._sheet.available_columns()
 93
 94    @property
 95    def specified_dtypes(self) -> DTypeMap | None:
 96        """The dtypes specified for the sheet"""
 97        return self._sheet.specified_dtypes
 98
 99    @property
100    def visible(self) -> SheetVisible:
101        """The visibility of the sheet"""
102        return self._sheet.visible
103
104    def to_arrow(self) -> "pa.RecordBatch":
105        """Converts the sheet to a pyarrow `RecordBatch`
106
107        Requires the `pyarrow` extra to be installed.
108        """
109        if not _PYARROW_AVAILABLE:
110            raise ImportError(
111                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
112            )
113        return self._sheet.to_arrow()
114
115    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
116        """Converts the sheet to a pyarrow `RecordBatch` with error information.
117
118        Stores the positions of any values that cannot be parsed as the specified type and were
119        therefore converted to None.
120
121        Requires the `pyarrow` extra to be installed.
122        """
123        if not _PYARROW_AVAILABLE:
124            raise ImportError(
125                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
126            )
127        rb, cell_errors = self._sheet.to_arrow_with_errors()
128        if not cell_errors.errors:
129            return (rb, None)
130        return (rb, cell_errors)
131
132    def to_pandas(self) -> "pd.DataFrame":
133        """Converts the sheet to a Pandas `DataFrame`.
134
135        Requires the `pandas` extra to be installed.
136        """
137        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
138        # which we don't implement. Using pyarrow conversion for now.
139        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
140        return self.to_arrow().to_pandas()
141
142    def to_polars(self) -> "pl.DataFrame":
143        """Converts the sheet to a Polars `DataFrame`.
144
145        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
146        Requires the `polars` extra to be installed.
147        """
148        import polars as pl
149
150        return pl.DataFrame(self)
151
152    def __arrow_c_schema__(self) -> object:
153        """Export the schema as an `ArrowSchema` `PyCapsule`.
154
155        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
156
157        The Arrow PyCapsule Interface enables zero-copy data exchange with
158        Arrow-compatible libraries without requiring PyArrow as a dependency.
159        """
160        return self._sheet.__arrow_c_schema__()
161
162    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
163        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
164
165        The optional `requested_schema` parameter allows for potential schema conversion.
166
167        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
168
169        The Arrow PyCapsule Interface enables zero-copy data exchange with
170        Arrow-compatible libraries without requiring PyArrow as a dependency.
171        """
172        return self._sheet.__arrow_c_array__(requested_schema)
173
174    def __repr__(self) -> str:
175        return self._sheet.__repr__()

A class representing a single sheet in an Excel File

ExcelSheet(sheet: _ExcelSheet)
62    def __init__(self, sheet: _ExcelSheet) -> None:
63        self._sheet = sheet
name: str
65    @property
66    def name(self) -> str:
67        """The name of the sheet"""
68        return self._sheet.name

The name of the sheet

width: int
70    @property
71    def width(self) -> int:
72        """The sheet's width"""
73        return self._sheet.width

The sheet's width

height: int
75    @property
76    def height(self) -> int:
77        """The sheet's height, with `skip_rows` and `nrows` applied"""
78        return self._sheet.height

The sheet's height, with skip_rows and nrows applied

total_height: int
80    @property
81    def total_height(self) -> int:
82        """The sheet's total height"""
83        return self._sheet.total_height

The sheet's total height

selected_columns: list[ColumnInfo]
85    @property
86    def selected_columns(self) -> list[ColumnInfo]:
87        """The sheet's selected columns"""
88        return self._sheet.selected_columns

The sheet's selected columns

def available_columns(self) -> list[ColumnInfo]:
90    def available_columns(self) -> list[ColumnInfo]:
91        """The columns available for the given sheet"""
92        return self._sheet.available_columns()

The columns available for the given sheet

specified_dtypes: 'DTypeMap | None'
94    @property
95    def specified_dtypes(self) -> DTypeMap | None:
96        """The dtypes specified for the sheet"""
97        return self._sheet.specified_dtypes

The dtypes specified for the sheet

visible: Literal['visible', 'hidden', 'veryhidden']
 99    @property
100    def visible(self) -> SheetVisible:
101        """The visibility of the sheet"""
102        return self._sheet.visible

The visibility of the sheet

def to_arrow(self) -> pyarrow.lib.RecordBatch:
104    def to_arrow(self) -> "pa.RecordBatch":
105        """Converts the sheet to a pyarrow `RecordBatch`
106
107        Requires the `pyarrow` extra to be installed.
108        """
109        if not _PYARROW_AVAILABLE:
110            raise ImportError(
111                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
112            )
113        return self._sheet.to_arrow()

Converts the sheet to a pyarrow RecordBatch

Requires the pyarrow extra to be installed.

def to_arrow_with_errors(self) -> tuple[pyarrow.lib.RecordBatch, CellErrors | None]:
115    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
116        """Converts the sheet to a pyarrow `RecordBatch` with error information.
117
118        Stores the positions of any values that cannot be parsed as the specified type and were
119        therefore converted to None.
120
121        Requires the `pyarrow` extra to be installed.
122        """
123        if not _PYARROW_AVAILABLE:
124            raise ImportError(
125                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
126            )
127        rb, cell_errors = self._sheet.to_arrow_with_errors()
128        if not cell_errors.errors:
129            return (rb, None)
130        return (rb, cell_errors)

Converts the sheet to a pyarrow RecordBatch with error information.

Stores the positions of any values that cannot be parsed as the specified type and were therefore converted to None.

Requires the pyarrow extra to be installed.

def to_pandas(self) -> pandas.core.frame.DataFrame:
132    def to_pandas(self) -> "pd.DataFrame":
133        """Converts the sheet to a Pandas `DataFrame`.
134
135        Requires the `pandas` extra to be installed.
136        """
137        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
138        # which we don't implement. Using pyarrow conversion for now.
139        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
140        return self.to_arrow().to_pandas()

Converts the sheet to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
142    def to_polars(self) -> "pl.DataFrame":
143        """Converts the sheet to a Polars `DataFrame`.
144
145        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
146        Requires the `polars` extra to be installed.
147        """
148        import polars as pl
149
150        return pl.DataFrame(self)

Converts the sheet to a Polars DataFrame.

Uses the Arrow PyCapsule Interface for zero-copy data exchange. Requires the polars extra to be installed.

class ExcelTable:
178class ExcelTable:
179    """A class representing a single table in an Excel file"""
180
181    def __init__(self, table: _ExcelTable) -> None:
182        self._table = table
183
184    @property
185    def name(self) -> str:
186        """The name of the table"""
187        return self._table.name
188
189    @property
190    def sheet_name(self) -> str:
191        """The name of the sheet this table belongs to"""
192        return self._table.sheet_name
193
194    @property
195    def width(self) -> int:
196        """The table's width"""
197        return self._table.width
198
199    @property
200    def height(self) -> int:
201        """The table's height"""
202        return self._table.height
203
204    @property
205    def total_height(self) -> int:
206        """The table's total height"""
207        return self._table.total_height
208
209    @property
210    def offset(self) -> int:
211        """The table's offset before data starts"""
212        return self._table.offset
213
214    @property
215    def selected_columns(self) -> list[ColumnInfo]:
216        """The table's selected columns"""
217        return self._table.selected_columns
218
219    def available_columns(self) -> list[ColumnInfo]:
220        """The columns available for the given table"""
221        return self._table.available_columns()
222
223    @property
224    def specified_dtypes(self) -> DTypeMap | None:
225        """The dtypes specified for the table"""
226        return self._table.specified_dtypes
227
228    def to_arrow(self) -> "pa.RecordBatch":
229        """Converts the table to a pyarrow `RecordBatch`
230
231        Requires the `pyarrow` extra to be installed.
232        """
233        if not _PYARROW_AVAILABLE:
234            raise ImportError(
235                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
236            )
237        return self._table.to_arrow()
238
239    def to_pandas(self) -> "pd.DataFrame":
240        """Converts the table to a Pandas `DataFrame`.
241
242        Requires the `pandas` extra to be installed.
243        """
244        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
245        # which we don't implement. Using pyarrow conversion for now.
246        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
247        return self.to_arrow().to_pandas()
248
249    def to_polars(self) -> "pl.DataFrame":
250        """Converts the table to a Polars `DataFrame`.
251
252        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
253        Requires the `polars` extra to be installed.
254        """
255        import polars as pl
256
257        return pl.DataFrame(self)
258
259    def __arrow_c_schema__(self) -> object:
260        """Export the schema as an `ArrowSchema` `PyCapsule`.
261
262        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
263
264        The Arrow PyCapsule Interface enables zero-copy data exchange with
265        Arrow-compatible libraries without requiring PyArrow as a dependency.
266        """
267        return self._table.__arrow_c_schema__()
268
269    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
270        """Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
271
272        The optional `requested_schema` parameter allows for potential schema conversion.
273
274        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
275
276        The Arrow PyCapsule Interface enables zero-copy data exchange with
277        Arrow-compatible libraries without requiring PyArrow as a dependency.
278        """
279        return self._table.__arrow_c_array__(requested_schema)

A class representing a single table in an Excel file

ExcelTable(table: _ExcelTable)
181    def __init__(self, table: _ExcelTable) -> None:
182        self._table = table
name: str
184    @property
185    def name(self) -> str:
186        """The name of the table"""
187        return self._table.name

The name of the table

sheet_name: str
189    @property
190    def sheet_name(self) -> str:
191        """The name of the sheet this table belongs to"""
192        return self._table.sheet_name

The name of the sheet this table belongs to

width: int
194    @property
195    def width(self) -> int:
196        """The table's width"""
197        return self._table.width

The table's width

height: int
199    @property
200    def height(self) -> int:
201        """The table's height"""
202        return self._table.height

The table's height

total_height: int
204    @property
205    def total_height(self) -> int:
206        """The table's total height"""
207        return self._table.total_height

The table's total height

offset: int
209    @property
210    def offset(self) -> int:
211        """The table's offset before data starts"""
212        return self._table.offset

The table's offset before data starts

selected_columns: list[ColumnInfo]
214    @property
215    def selected_columns(self) -> list[ColumnInfo]:
216        """The table's selected columns"""
217        return self._table.selected_columns

The table's selected columns

def available_columns(self) -> list[ColumnInfo]:
219    def available_columns(self) -> list[ColumnInfo]:
220        """The columns available for the given table"""
221        return self._table.available_columns()

The columns available for the given table

specified_dtypes: 'DTypeMap | None'
223    @property
224    def specified_dtypes(self) -> DTypeMap | None:
225        """The dtypes specified for the table"""
226        return self._table.specified_dtypes

The dtypes specified for the table

def to_arrow(self) -> pyarrow.lib.RecordBatch:
228    def to_arrow(self) -> "pa.RecordBatch":
229        """Converts the table to a pyarrow `RecordBatch`
230
231        Requires the `pyarrow` extra to be installed.
232        """
233        if not _PYARROW_AVAILABLE:
234            raise ImportError(
235                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
236            )
237        return self._table.to_arrow()

Converts the table to a pyarrow RecordBatch

Requires the pyarrow extra to be installed.

def to_pandas(self) -> pandas.core.frame.DataFrame:
239    def to_pandas(self) -> "pd.DataFrame":
240        """Converts the table to a Pandas `DataFrame`.
241
242        Requires the `pandas` extra to be installed.
243        """
244        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
245        # which we don't implement. Using pyarrow conversion for now.
246        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
247        return self.to_arrow().to_pandas()

Converts the table to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
249    def to_polars(self) -> "pl.DataFrame":
250        """Converts the table to a Polars `DataFrame`.
251
252        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
253        Requires the `polars` extra to be installed.
254        """
255        import polars as pl
256
257        return pl.DataFrame(self)

Converts the table to a Polars DataFrame.

Uses the Arrow PyCapsule Interface for zero-copy data exchange. Requires the polars extra to be installed.

DTypeFrom: TypeAlias = Literal['provided_for_all', 'provided_by_index', 'provided_by_name', 'guessed']
ColumnNameFrom: TypeAlias = Literal['provided', 'looked_up', 'generated']
class ColumnInfo:

Metadata about a single column in a sheet.

name

str. The name of the column

dtype

fastexcel.DType. The dtype of the column

index

int. The index of the column

column_name_from

fastexcel.ColumnNameFrom. How the name of the column was determined.

One of three possible values:

  • "provided": The column name was provided via the use_columns parameter
  • "looked_up": The column name was looked up from the data found in the sheet
  • "generated": The column name was generated from the column index, either because header_row was None, or because it could not be looked up
dtype_from

fastexcel.DTypeFrom. How the dtype of the column was determined.

One of three possible values:

  • "provided_by_index": The dtype was specified via the column index
  • "provided_by_name": The dtype was specified via the column name
  • "guessed": The dtype was determined from the content of the column
class CellError:
detail

str. The error message

row_offset

int. The row offset

offset_position
position

(int, int). The original row and column of the error

class CellErrors:
errors
class FastExcelError(builtins.Exception):

The base class for all fastexcel errors

class CannotRetrieveCellDataError(fastexcel.FastExcelError):

Data for a given cell cannot be retrieved

class CalamineCellError(fastexcel.FastExcelError):

calamine returned an error regarding the content of the cell

class CalamineError(fastexcel.FastExcelError):

Generic calamine error

class SheetNotFoundError(fastexcel.FastExcelError):

Sheet was not found

class ColumnNotFoundError(fastexcel.FastExcelError):

Column was not found

class ArrowError(fastexcel.FastExcelError):

Generic arrow error

class InvalidParametersError(fastexcel.FastExcelError):

Provided parameters are invalid

class UnsupportedColumnTypeCombinationError(fastexcel.FastExcelError):

Column contains an unsupported type combination