fastexcel

  1from __future__ import annotations
  2
  3import sys
  4import typing
  5from typing import TYPE_CHECKING, Callable, Literal
  6
  7if sys.version_info < (3, 10):
  8    from typing_extensions import TypeAlias
  9else:
 10    from typing import TypeAlias
 11
 12if TYPE_CHECKING:
 13    import pandas as pd
 14    import polars as pl
 15
 16from os.path import expanduser
 17from pathlib import Path
 18
 19import pyarrow as pa
 20
 21from ._fastexcel import (
 22    ArrowError,
 23    CalamineCellError,
 24    CalamineError,
 25    CannotRetrieveCellDataError,
 26    ColumnInfo,
 27    ColumnInfoNoDtype,
 28    ColumnNotFoundError,
 29    FastExcelError,
 30    InvalidParametersError,
 31    SheetNotFoundError,
 32    UnsupportedColumnTypeCombinationError,
 33    __version__,
 34    _ExcelReader,
 35    _ExcelSheet,
 36    _ExcelTable,
 37)
 38from ._fastexcel import read_excel as _read_excel
 39
 40DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
 41DTypeMap: TypeAlias = "dict[str | int, DType]"
 42ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
 43DTypeFrom: TypeAlias = Literal["provided_by_index", "provided_by_name", "guessed"]
 44SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
 45
 46
 47def _recordbatch_to_polars(rb: pa.RecordBatch) -> pl.DataFrame:
 48    import polars as pl
 49
 50    df = pl.from_arrow(data=rb)
 51    assert isinstance(df, pl.DataFrame)
 52    return df
 53
 54
 55class ExcelSheet:
 56    """A class representing a single sheet in an Excel File"""
 57
 58    def __init__(self, sheet: _ExcelSheet) -> None:
 59        self._sheet = sheet
 60
 61    @property
 62    def name(self) -> str:
 63        """The name of the sheet"""
 64        return self._sheet.name
 65
 66    @property
 67    def width(self) -> int:
 68        """The sheet's width"""
 69        return self._sheet.width
 70
 71    @property
 72    def height(self) -> int:
 73        """The sheet's height, with `skip_rows` and `nrows` applied"""
 74        return self._sheet.height
 75
 76    @property
 77    def total_height(self) -> int:
 78        """The sheet's total height"""
 79        return self._sheet.total_height
 80
 81    @property
 82    def selected_columns(self) -> list[ColumnInfo]:
 83        """The sheet's selected columns"""
 84        return self._sheet.selected_columns
 85
 86    def available_columns(self) -> list[ColumnInfo]:
 87        """The columns available for the given sheet"""
 88        return self._sheet.available_columns()
 89
 90    @property
 91    def specified_dtypes(self) -> DTypeMap | None:
 92        """The dtypes specified for the sheet"""
 93        return self._sheet.specified_dtypes
 94
 95    @property
 96    def visible(self) -> SheetVisible:
 97        """The visibility of the sheet"""
 98        return self._sheet.visible
 99
100    def to_arrow(self) -> pa.RecordBatch:
101        """Converts the sheet to a pyarrow `RecordBatch`"""
102        return self._sheet.to_arrow()
103
104    def to_pandas(self) -> "pd.DataFrame":
105        """Converts the sheet to a Pandas `DataFrame`.
106
107        Requires the `pandas` extra to be installed.
108        """
109        # We know for sure that the sheet will yield exactly one RecordBatch
110        return self.to_arrow().to_pandas()
111
112    def to_polars(self) -> "pl.DataFrame":
113        """Converts the sheet to a Polars `DataFrame`.
114
115        Requires the `polars` extra to be installed.
116        """
117        return _recordbatch_to_polars(self.to_arrow())
118
119    def __repr__(self) -> str:
120        return self._sheet.__repr__()
121
122
123class ExcelTable:
124    """A class representing a single table in an Excel file"""
125
126    def __init__(self, table: _ExcelTable) -> None:
127        self._table = table
128
129    @property
130    def name(self) -> str:
131        """The name of the table"""
132        return self._table.name
133
134    @property
135    def sheet_name(self) -> str:
136        """The name of the sheet this table belongs to"""
137        return self._table.sheet_name
138
139    @property
140    def width(self) -> int:
141        """The table's width"""
142        return self._table.width
143
144    @property
145    def height(self) -> int:
146        """The table's height"""
147        return self._table.height
148
149    @property
150    def total_height(self) -> int:
151        """The table's total height"""
152        return self._table.total_height
153
154    @property
155    def offset(self) -> int:
156        """The table's offset before data starts"""
157        return self._table.offset
158
159    @property
160    def selected_columns(self) -> list[ColumnInfo]:
161        """The table's selected columns"""
162        return self._table.selected_columns
163
164    def available_columns(self) -> list[ColumnInfo]:
165        """The columns available for the given table"""
166        return self._table.available_columns()
167
168    @property
169    def specified_dtypes(self) -> DTypeMap | None:
170        """The dtypes specified for the table"""
171        return self._table.specified_dtypes
172
173    def to_arrow(self) -> pa.RecordBatch:
174        """Converts the table to a pyarrow `RecordBatch`"""
175        return self._table.to_arrow()
176
177    def to_pandas(self) -> "pd.DataFrame":
178        """Converts the table to a Pandas `DataFrame`.
179
180        Requires the `pandas` extra to be installed.
181        """
182        # We know for sure that the table will yield exactly one RecordBatch
183        return self.to_arrow().to_pandas()
184
185    def to_polars(self) -> "pl.DataFrame":
186        """Converts the table to a Polars `DataFrame`.
187
188        Requires the `polars` extra to be installed.
189        """
190        return _recordbatch_to_polars(self.to_arrow())
191
192
193class ExcelReader:
194    """A class representing an open Excel file and allowing to read its sheets"""
195
196    def __init__(self, reader: _ExcelReader) -> None:
197        self._reader = reader
198
199    @property
200    def sheet_names(self) -> list[str]:
201        """The list of sheet names"""
202        return self._reader.sheet_names
203
204    def load_sheet(
205        self,
206        idx_or_name: int | str,
207        *,
208        header_row: int | None = 0,
209        column_names: list[str] | None = None,
210        skip_rows: int | None = None,
211        n_rows: int | None = None,
212        schema_sample_rows: int | None = 1_000,
213        dtype_coercion: Literal["coerce", "strict"] = "coerce",
214        use_columns: list[str]
215        | list[int]
216        | str
217        | Callable[[ColumnInfoNoDtype], bool]
218        | None = None,
219        dtypes: DType | DTypeMap | None = None,
220    ) -> ExcelSheet:
221        """Loads a sheet lazily by index or name.
222
223        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
224        :param header_row: The index of the row containing the column labels, default index is 0.
225                           If `None`, the sheet does not have any column labels.
226                           Any rows before the `header_row` will be automatically skipped.
227        :param column_names: Overrides headers found in the document.
228                             If `column_names` is used, `header_row` will be ignored.
229        :param n_rows: Specifies how many rows should be loaded.
230                       If `None`, all rows are loaded
231        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
232                          Any rows before the `header_row` are automatically skipped.
233                          If `header_row` is `None`:
234                            - if `skip_rows` is `None` (default): it skips all empty rows
235                            at the beginning of the sheet.
236                            - if `skip_rows` is a number, it skips the specified number
237                            of rows from the start of the sheet.
238        :param schema_sample_rows: Specifies how many rows should be used to determine
239                                   the dtype of a column. Cannot be 0. A specific dtype can be
240                                   enforced for some or all columns through the `dtypes` parameter.
241                                   If `None`, all rows will be used.
242        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
243                               will try to coerce different dtypes in a column to the same one,
244                               whereas `strict` will raise an error in case a column contains
245                               several dtypes. Note that this only applies to columns whose dtype
246                               is guessed, i.e. not specified via `dtypes`.
247        :param use_columns: Specifies the columns to use. Can either be:
248                            - `None` to select all columns
249                            - A list of strings and ints, the column names and/or indices
250                              (starting at 0)
251                            - A string, a comma separated list of Excel column letters and column
252                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
253                              `A,B,C,D,E` and `A,C,E,F`)
254                            - A callable, a function that takes a column and returns a boolean
255                              indicating whether the column should be used
256        :param dtypes: An optional dtype (for all columns)
257                       or dict of dtypes with keys as column indices or names.
258        """
259        return ExcelSheet(
260            self._reader.load_sheet(
261                idx_or_name=idx_or_name,
262                header_row=header_row,
263                column_names=column_names,
264                skip_rows=skip_rows,
265                n_rows=n_rows,
266                schema_sample_rows=schema_sample_rows,
267                dtype_coercion=dtype_coercion,
268                use_columns=use_columns,
269                dtypes=dtypes,
270                eager=False,
271            )
272        )
273
274    def table_names(self, sheet_name: str | None = None) -> list[str]:
275        """The list of table names.
276
277        Will return an empty list if no tables are found.
278
279        :param sheet_name: If given, will limit the list to the given sheet, will be faster
280        too.
281        """
282        return self._reader.table_names(sheet_name)
283
284    @typing.overload
285    def load_table(
286        self,
287        name: str,
288        *,
289        header_row: int | None = None,
290        column_names: list[str] | None = None,
291        skip_rows: int = 0,
292        n_rows: int | None = None,
293        schema_sample_rows: int | None = 1_000,
294        dtype_coercion: Literal["coerce", "strict"] = "coerce",
295        use_columns: list[str]
296        | list[int]
297        | str
298        | Callable[[ColumnInfoNoDtype], bool]
299        | None = None,
300        dtypes: DType | DTypeMap | None = None,
301        eager: Literal[False] = ...,
302    ) -> ExcelTable: ...
303    @typing.overload
304    def load_table(
305        self,
306        name: str,
307        *,
308        header_row: int | None = None,
309        column_names: list[str] | None = None,
310        skip_rows: int = 0,
311        n_rows: int | None = None,
312        schema_sample_rows: int | None = 1_000,
313        dtype_coercion: Literal["coerce", "strict"] = "coerce",
314        use_columns: list[str]
315        | list[int]
316        | str
317        | Callable[[ColumnInfoNoDtype], bool]
318        | None = None,
319        dtypes: DType | DTypeMap | None = None,
320        eager: Literal[True] = ...,
321    ) -> pa.RecordBatch: ...
322    def load_table(
323        self,
324        name: str,
325        *,
326        header_row: int | None = None,
327        column_names: list[str] | None = None,
328        skip_rows: int = 0,
329        n_rows: int | None = None,
330        schema_sample_rows: int | None = 1_000,
331        dtype_coercion: Literal["coerce", "strict"] = "coerce",
332        use_columns: list[str]
333        | list[int]
334        | str
335        | Callable[[ColumnInfoNoDtype], bool]
336        | None = None,
337        dtypes: DType | DTypeMap | None = None,
338        eager: bool = False,
339    ) -> ExcelTable | pa.RecordBatch:
340        """Loads a table by name.
341
342        :param name: The name of the table to load.
343        :param header_row: The index of the row containing the column labels.
344                           If `None`, the table's column names will be used.
345                           Any rows before the `header_row` will be automatically skipped.
346        :param column_names: Overrides headers found in the document.
347                             If `column_names` is used, `header_row` will be ignored.
348        :param n_rows: Specifies how many rows should be loaded.
349                       If `None`, all rows are loaded
350        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
351                          Any rows before the `header_row` are automatically skipped.
352                          If `header_row` is `None`, it skips the number of rows from the
353                          start of the sheet.
354        :param schema_sample_rows: Specifies how many rows should be used to determine
355                                   the dtype of a column. Cannot be 0. A specific dtype can be
356                                   enforced for some or all columns through the `dtypes` parameter.
357                                   If `None`, all rows will be used.
358        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
359                               will try to coerce different dtypes in a column to the same one,
360                               whereas `strict` will raise an error in case a column contains
361                               several dtypes. Note that this only applies to columns whose dtype
362                               is guessed, i.e. not specified via `dtypes`.
363        :param use_columns: Specifies the columns to use. Can either be:
364                            - `None` to select all columns
365                            - A list of strings and ints, the column names and/or indices
366                              (starting at 0)
367                            - A string, a comma separated list of Excel column letters and column
368                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
369                              `A,B,C,D,E` and `A,C,E,F`)
370                            - A callable, a function that takes a column and returns a boolean
371                              indicating whether the column should be used
372        :param dtypes: An optional dtype (for all columns)
373                       or dict of dtypes with keys as column indices or names.
374        """
375        output = self._reader.load_table(  # type:ignore[call-overload,misc]
376            name=name,
377            header_row=header_row,
378            column_names=column_names,
379            skip_rows=skip_rows,
380            n_rows=n_rows,
381            schema_sample_rows=schema_sample_rows,
382            dtype_coercion=dtype_coercion,
383            use_columns=use_columns,
384            dtypes=dtypes,
385            eager=eager,
386        )
387        if eager:
388            return output
389        return ExcelTable(output)
390
391    def load_sheet_eager(
392        self,
393        idx_or_name: int | str,
394        *,
395        header_row: int | None = 0,
396        column_names: list[str] | None = None,
397        skip_rows: int | None = None,
398        n_rows: int | None = None,
399        schema_sample_rows: int | None = 1_000,
400        dtype_coercion: Literal["coerce", "strict"] = "coerce",
401        use_columns: list[str] | list[int] | str | None = None,
402        dtypes: DType | DTypeMap | None = None,
403    ) -> pa.RecordBatch:
404        """Loads a sheet eagerly by index or name.
405
406        For xlsx files, this will be faster and more memory-efficient, as it will use
407        `worksheet_range_ref` under the hood, which returns borrowed types.
408
409        Refer to `load_sheet` for parameter documentation
410        """
411        return self._reader.load_sheet(
412            idx_or_name=idx_or_name,
413            header_row=header_row,
414            column_names=column_names,
415            skip_rows=skip_rows,
416            n_rows=n_rows,
417            schema_sample_rows=schema_sample_rows,
418            dtype_coercion=dtype_coercion,
419            use_columns=use_columns,
420            dtypes=dtypes,
421            eager=True,
422        )
423
424    def load_sheet_by_name(
425        self,
426        name: str,
427        *,
428        header_row: int | None = 0,
429        column_names: list[str] | None = None,
430        skip_rows: int | None = None,
431        n_rows: int | None = None,
432        schema_sample_rows: int | None = 1_000,
433        dtype_coercion: Literal["coerce", "strict"] = "coerce",
434        use_columns: list[str]
435        | list[int]
436        | str
437        | Callable[[ColumnInfoNoDtype], bool]
438        | None = None,
439        dtypes: DType | DTypeMap | None = None,
440    ) -> ExcelSheet:
441        """Loads a sheet by name.
442
443        Refer to `load_sheet` for parameter documentation
444        """
445        return self.load_sheet(
446            name,
447            header_row=header_row,
448            column_names=column_names,
449            skip_rows=skip_rows,
450            n_rows=n_rows,
451            schema_sample_rows=schema_sample_rows,
452            dtype_coercion=dtype_coercion,
453            use_columns=use_columns,
454            dtypes=dtypes,
455        )
456
457    def load_sheet_by_idx(
458        self,
459        idx: int,
460        *,
461        header_row: int | None = 0,
462        column_names: list[str] | None = None,
463        skip_rows: int | None = None,
464        n_rows: int | None = None,
465        schema_sample_rows: int | None = 1_000,
466        dtype_coercion: Literal["coerce", "strict"] = "coerce",
467        use_columns: list[str]
468        | list[int]
469        | str
470        | Callable[[ColumnInfoNoDtype], bool]
471        | None = None,
472        dtypes: DType | DTypeMap | None = None,
473    ) -> ExcelSheet:
474        """Loads a sheet by index.
475
476        Refer to `load_sheet` for parameter documentation
477        """
478        return self.load_sheet(
479            idx,
480            header_row=header_row,
481            column_names=column_names,
482            skip_rows=skip_rows,
483            n_rows=n_rows,
484            schema_sample_rows=schema_sample_rows,
485            dtype_coercion=dtype_coercion,
486            use_columns=use_columns,
487            dtypes=dtypes,
488        )
489
490    def __repr__(self) -> str:
491        return self._reader.__repr__()
492
493
494def read_excel(source: Path | str | bytes) -> ExcelReader:
495    """Opens and loads an excel file.
496
497    :param source: The path to a file or its content as bytes
498    """
499    if isinstance(source, (str, Path)):
500        source = expanduser(source)
501    return ExcelReader(_read_excel(source))
502
503
504__all__ = (
505    ## version
506    "__version__",
507    ## main entrypoint
508    "read_excel",
509    ## Python types
510    "DType",
511    "DTypeMap",
512    # Excel reader
513    "ExcelReader",
514    # Excel sheet
515    "ExcelSheet",
516    # Column metadata
517    "DTypeFrom",
518    "ColumnNameFrom",
519    "ColumnInfo",
520    # Exceptions
521    "FastExcelError",
522    "CannotRetrieveCellDataError",
523    "CalamineCellError",
524    "CalamineError",
525    "SheetNotFoundError",
526    "ColumnNotFoundError",
527    "ArrowError",
528    "InvalidParametersError",
529    "UnsupportedColumnTypeCombinationError",
530)
__version__ = '0.13.0'
def read_excel(source: pathlib.Path | str | bytes) -> ExcelReader:
495def read_excel(source: Path | str | bytes) -> ExcelReader:
496    """Opens and loads an excel file.
497
498    :param source: The path to a file or its content as bytes
499    """
500    if isinstance(source, (str, Path)):
501        source = expanduser(source)
502    return ExcelReader(_read_excel(source))

Opens and loads an excel file.

Parameters
  • source: The path to a file or its content as bytes
DType = typing.Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']
DTypeMap: TypeAlias = 'dict[str | int, DType]'
class ExcelReader:
194class ExcelReader:
195    """A class representing an open Excel file and allowing to read its sheets"""
196
197    def __init__(self, reader: _ExcelReader) -> None:
198        self._reader = reader
199
200    @property
201    def sheet_names(self) -> list[str]:
202        """The list of sheet names"""
203        return self._reader.sheet_names
204
205    def load_sheet(
206        self,
207        idx_or_name: int | str,
208        *,
209        header_row: int | None = 0,
210        column_names: list[str] | None = None,
211        skip_rows: int | None = None,
212        n_rows: int | None = None,
213        schema_sample_rows: int | None = 1_000,
214        dtype_coercion: Literal["coerce", "strict"] = "coerce",
215        use_columns: list[str]
216        | list[int]
217        | str
218        | Callable[[ColumnInfoNoDtype], bool]
219        | None = None,
220        dtypes: DType | DTypeMap | None = None,
221    ) -> ExcelSheet:
222        """Loads a sheet lazily by index or name.
223
224        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
225        :param header_row: The index of the row containing the column labels, default index is 0.
226                           If `None`, the sheet does not have any column labels.
227                           Any rows before the `header_row` will be automatically skipped.
228        :param column_names: Overrides headers found in the document.
229                             If `column_names` is used, `header_row` will be ignored.
230        :param n_rows: Specifies how many rows should be loaded.
231                       If `None`, all rows are loaded
232        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
233                          Any rows before the `header_row` are automatically skipped.
234                          If `header_row` is `None`:
235                            - if `skip_rows` is `None` (default): it skips all empty rows
236                            at the beginning of the sheet.
237                            - if `skip_rows` is a number, it skips the specified number
238                            of rows from the start of the sheet.
239        :param schema_sample_rows: Specifies how many rows should be used to determine
240                                   the dtype of a column. Cannot be 0. A specific dtype can be
241                                   enforced for some or all columns through the `dtypes` parameter.
242                                   If `None`, all rows will be used.
243        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
244                               will try to coerce different dtypes in a column to the same one,
245                               whereas `strict` will raise an error in case a column contains
246                               several dtypes. Note that this only applies to columns whose dtype
247                               is guessed, i.e. not specified via `dtypes`.
248        :param use_columns: Specifies the columns to use. Can either be:
249                            - `None` to select all columns
250                            - A list of strings and ints, the column names and/or indices
251                              (starting at 0)
252                            - A string, a comma separated list of Excel column letters and column
253                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
254                              `A,B,C,D,E` and `A,C,E,F`)
255                            - A callable, a function that takes a column and returns a boolean
256                              indicating whether the column should be used
257        :param dtypes: An optional dtype (for all columns)
258                       or dict of dtypes with keys as column indices or names.
259        """
260        return ExcelSheet(
261            self._reader.load_sheet(
262                idx_or_name=idx_or_name,
263                header_row=header_row,
264                column_names=column_names,
265                skip_rows=skip_rows,
266                n_rows=n_rows,
267                schema_sample_rows=schema_sample_rows,
268                dtype_coercion=dtype_coercion,
269                use_columns=use_columns,
270                dtypes=dtypes,
271                eager=False,
272            )
273        )
274
275    def table_names(self, sheet_name: str | None = None) -> list[str]:
276        """The list of table names.
277
278        Will return an empty list if no tables are found.
279
280        :param sheet_name: If given, will limit the list to the given sheet, will be faster
281        too.
282        """
283        return self._reader.table_names(sheet_name)
284
285    @typing.overload
286    def load_table(
287        self,
288        name: str,
289        *,
290        header_row: int | None = None,
291        column_names: list[str] | None = None,
292        skip_rows: int = 0,
293        n_rows: int | None = None,
294        schema_sample_rows: int | None = 1_000,
295        dtype_coercion: Literal["coerce", "strict"] = "coerce",
296        use_columns: list[str]
297        | list[int]
298        | str
299        | Callable[[ColumnInfoNoDtype], bool]
300        | None = None,
301        dtypes: DType | DTypeMap | None = None,
302        eager: Literal[False] = ...,
303    ) -> ExcelTable: ...
304    @typing.overload
305    def load_table(
306        self,
307        name: str,
308        *,
309        header_row: int | None = None,
310        column_names: list[str] | None = None,
311        skip_rows: int = 0,
312        n_rows: int | None = None,
313        schema_sample_rows: int | None = 1_000,
314        dtype_coercion: Literal["coerce", "strict"] = "coerce",
315        use_columns: list[str]
316        | list[int]
317        | str
318        | Callable[[ColumnInfoNoDtype], bool]
319        | None = None,
320        dtypes: DType | DTypeMap | None = None,
321        eager: Literal[True] = ...,
322    ) -> pa.RecordBatch: ...
323    def load_table(
324        self,
325        name: str,
326        *,
327        header_row: int | None = None,
328        column_names: list[str] | None = None,
329        skip_rows: int = 0,
330        n_rows: int | None = None,
331        schema_sample_rows: int | None = 1_000,
332        dtype_coercion: Literal["coerce", "strict"] = "coerce",
333        use_columns: list[str]
334        | list[int]
335        | str
336        | Callable[[ColumnInfoNoDtype], bool]
337        | None = None,
338        dtypes: DType | DTypeMap | None = None,
339        eager: bool = False,
340    ) -> ExcelTable | pa.RecordBatch:
341        """Loads a table by name.
342
343        :param name: The name of the table to load.
344        :param header_row: The index of the row containing the column labels.
345                           If `None`, the table's column names will be used.
346                           Any rows before the `header_row` will be automatically skipped.
347        :param column_names: Overrides headers found in the document.
348                             If `column_names` is used, `header_row` will be ignored.
349        :param n_rows: Specifies how many rows should be loaded.
350                       If `None`, all rows are loaded
351        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
352                          Any rows before the `header_row` are automatically skipped.
353                          If `header_row` is `None`, it skips the number of rows from the
354                          start of the sheet.
355        :param schema_sample_rows: Specifies how many rows should be used to determine
356                                   the dtype of a column. Cannot be 0. A specific dtype can be
357                                   enforced for some or all columns through the `dtypes` parameter.
358                                   If `None`, all rows will be used.
359        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
360                               will try to coerce different dtypes in a column to the same one,
361                               whereas `strict` will raise an error in case a column contains
362                               several dtypes. Note that this only applies to columns whose dtype
363                               is guessed, i.e. not specified via `dtypes`.
364        :param use_columns: Specifies the columns to use. Can either be:
365                            - `None` to select all columns
366                            - A list of strings and ints, the column names and/or indices
367                              (starting at 0)
368                            - A string, a comma separated list of Excel column letters and column
369                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
370                              `A,B,C,D,E` and `A,C,E,F`)
371                            - A callable, a function that takes a column and returns a boolean
372                              indicating whether the column should be used
373        :param dtypes: An optional dtype (for all columns)
374                       or dict of dtypes with keys as column indices or names.
375        """
376        output = self._reader.load_table(  # type:ignore[call-overload,misc]
377            name=name,
378            header_row=header_row,
379            column_names=column_names,
380            skip_rows=skip_rows,
381            n_rows=n_rows,
382            schema_sample_rows=schema_sample_rows,
383            dtype_coercion=dtype_coercion,
384            use_columns=use_columns,
385            dtypes=dtypes,
386            eager=eager,
387        )
388        if eager:
389            return output
390        return ExcelTable(output)
391
392    def load_sheet_eager(
393        self,
394        idx_or_name: int | str,
395        *,
396        header_row: int | None = 0,
397        column_names: list[str] | None = None,
398        skip_rows: int | None = None,
399        n_rows: int | None = None,
400        schema_sample_rows: int | None = 1_000,
401        dtype_coercion: Literal["coerce", "strict"] = "coerce",
402        use_columns: list[str] | list[int] | str | None = None,
403        dtypes: DType | DTypeMap | None = None,
404    ) -> pa.RecordBatch:
405        """Loads a sheet eagerly by index or name.
406
407        For xlsx files, this will be faster and more memory-efficient, as it will use
408        `worksheet_range_ref` under the hood, which returns borrowed types.
409
410        Refer to `load_sheet` for parameter documentation
411        """
412        return self._reader.load_sheet(
413            idx_or_name=idx_or_name,
414            header_row=header_row,
415            column_names=column_names,
416            skip_rows=skip_rows,
417            n_rows=n_rows,
418            schema_sample_rows=schema_sample_rows,
419            dtype_coercion=dtype_coercion,
420            use_columns=use_columns,
421            dtypes=dtypes,
422            eager=True,
423        )
424
425    def load_sheet_by_name(
426        self,
427        name: str,
428        *,
429        header_row: int | None = 0,
430        column_names: list[str] | None = None,
431        skip_rows: int | None = None,
432        n_rows: int | None = None,
433        schema_sample_rows: int | None = 1_000,
434        dtype_coercion: Literal["coerce", "strict"] = "coerce",
435        use_columns: list[str]
436        | list[int]
437        | str
438        | Callable[[ColumnInfoNoDtype], bool]
439        | None = None,
440        dtypes: DType | DTypeMap | None = None,
441    ) -> ExcelSheet:
442        """Loads a sheet by name.
443
444        Refer to `load_sheet` for parameter documentation
445        """
446        return self.load_sheet(
447            name,
448            header_row=header_row,
449            column_names=column_names,
450            skip_rows=skip_rows,
451            n_rows=n_rows,
452            schema_sample_rows=schema_sample_rows,
453            dtype_coercion=dtype_coercion,
454            use_columns=use_columns,
455            dtypes=dtypes,
456        )
457
458    def load_sheet_by_idx(
459        self,
460        idx: int,
461        *,
462        header_row: int | None = 0,
463        column_names: list[str] | None = None,
464        skip_rows: int | None = None,
465        n_rows: int | None = None,
466        schema_sample_rows: int | None = 1_000,
467        dtype_coercion: Literal["coerce", "strict"] = "coerce",
468        use_columns: list[str]
469        | list[int]
470        | str
471        | Callable[[ColumnInfoNoDtype], bool]
472        | None = None,
473        dtypes: DType | DTypeMap | None = None,
474    ) -> ExcelSheet:
475        """Loads a sheet by index.
476
477        Refer to `load_sheet` for parameter documentation
478        """
479        return self.load_sheet(
480            idx,
481            header_row=header_row,
482            column_names=column_names,
483            skip_rows=skip_rows,
484            n_rows=n_rows,
485            schema_sample_rows=schema_sample_rows,
486            dtype_coercion=dtype_coercion,
487            use_columns=use_columns,
488            dtypes=dtypes,
489        )
490
491    def __repr__(self) -> str:
492        return self._reader.__repr__()

A class representing an open Excel file and allowing to read its sheets

ExcelReader(reader: _ExcelReader)
197    def __init__(self, reader: _ExcelReader) -> None:
198        self._reader = reader
sheet_names: list[str]
200    @property
201    def sheet_names(self) -> list[str]:
202        """The list of sheet names"""
203        return self._reader.sheet_names

The list of sheet names

def load_sheet( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
205    def load_sheet(
206        self,
207        idx_or_name: int | str,
208        *,
209        header_row: int | None = 0,
210        column_names: list[str] | None = None,
211        skip_rows: int | None = None,
212        n_rows: int | None = None,
213        schema_sample_rows: int | None = 1_000,
214        dtype_coercion: Literal["coerce", "strict"] = "coerce",
215        use_columns: list[str]
216        | list[int]
217        | str
218        | Callable[[ColumnInfoNoDtype], bool]
219        | None = None,
220        dtypes: DType | DTypeMap | None = None,
221    ) -> ExcelSheet:
222        """Loads a sheet lazily by index or name.
223
224        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
225        :param header_row: The index of the row containing the column labels, default index is 0.
226                           If `None`, the sheet does not have any column labels.
227                           Any rows before the `header_row` will be automatically skipped.
228        :param column_names: Overrides headers found in the document.
229                             If `column_names` is used, `header_row` will be ignored.
230        :param n_rows: Specifies how many rows should be loaded.
231                       If `None`, all rows are loaded
232        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
233                          Any rows before the `header_row` are automatically skipped.
234                          If `header_row` is `None`:
235                            - if `skip_rows` is `None` (default): it skips all empty rows
236                            at the beginning of the sheet.
237                            - if `skip_rows` is a number, it skips the specified number
238                            of rows from the start of the sheet.
239        :param schema_sample_rows: Specifies how many rows should be used to determine
240                                   the dtype of a column. Cannot be 0. A specific dtype can be
241                                   enforced for some or all columns through the `dtypes` parameter.
242                                   If `None`, all rows will be used.
243        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
244                               will try to coerce different dtypes in a column to the same one,
245                               whereas `strict` will raise an error in case a column contains
246                               several dtypes. Note that this only applies to columns whose dtype
247                               is guessed, i.e. not specified via `dtypes`.
248        :param use_columns: Specifies the columns to use. Can either be:
249                            - `None` to select all columns
250                            - A list of strings and ints, the column names and/or indices
251                              (starting at 0)
252                            - A string, a comma separated list of Excel column letters and column
253                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
254                              `A,B,C,D,E` and `A,C,E,F`)
255                            - A callable, a function that takes a column and returns a boolean
256                              indicating whether the column should be used
257        :param dtypes: An optional dtype (for all columns)
258                       or dict of dtypes with keys as column indices or names.
259        """
260        return ExcelSheet(
261            self._reader.load_sheet(
262                idx_or_name=idx_or_name,
263                header_row=header_row,
264                column_names=column_names,
265                skip_rows=skip_rows,
266                n_rows=n_rows,
267                schema_sample_rows=schema_sample_rows,
268                dtype_coercion=dtype_coercion,
269                use_columns=use_columns,
270                dtypes=dtypes,
271                eager=False,
272            )
273        )

Loads a sheet lazily by index or name.

Parameters
  • idx_or_name: The index (starting at 0) or the name of the sheet to load.
  • header_row: The index of the row containing the column labels, default index is 0. If None, the sheet does not have any column labels. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None:
    • if skip_rows is None (default): it skips all empty rows at the beginning of the sheet.
    • if skip_rows is a number, it skips the specified number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”, which would result in A,B,C,D,E and A,C,E,F)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def table_names(self, sheet_name: str | None = None) -> list[str]:
275    def table_names(self, sheet_name: str | None = None) -> list[str]:
276        """The list of table names.
277
278        Will return an empty list if no tables are found.
279
280        :param sheet_name: If given, will limit the list to the given sheet, will be faster
281        too.
282        """
283        return self._reader.table_names(sheet_name)

The list of table names.

Will return an empty list if no tables are found.

Parameters
  • sheet_name: If given, will limit the list to the given sheet, will be faster too.
def load_table( self, name: str, *, header_row: int | None = None, column_names: list[str] | None = None, skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> fastexcel.ExcelTable | pyarrow.lib.RecordBatch:
323    def load_table(
324        self,
325        name: str,
326        *,
327        header_row: int | None = None,
328        column_names: list[str] | None = None,
329        skip_rows: int = 0,
330        n_rows: int | None = None,
331        schema_sample_rows: int | None = 1_000,
332        dtype_coercion: Literal["coerce", "strict"] = "coerce",
333        use_columns: list[str]
334        | list[int]
335        | str
336        | Callable[[ColumnInfoNoDtype], bool]
337        | None = None,
338        dtypes: DType | DTypeMap | None = None,
339        eager: bool = False,
340    ) -> ExcelTable | pa.RecordBatch:
341        """Loads a table by name.
342
343        :param name: The name of the table to load.
344        :param header_row: The index of the row containing the column labels.
345                           If `None`, the table's column names will be used.
346                           Any rows before the `header_row` will be automatically skipped.
347        :param column_names: Overrides headers found in the document.
348                             If `column_names` is used, `header_row` will be ignored.
349        :param n_rows: Specifies how many rows should be loaded.
350                       If `None`, all rows are loaded
351        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
352                          Any rows before the `header_row` are automatically skipped.
353                          If `header_row` is `None`, it skips the number of rows from the
354                          start of the sheet.
355        :param schema_sample_rows: Specifies how many rows should be used to determine
356                                   the dtype of a column. Cannot be 0. A specific dtype can be
357                                   enforced for some or all columns through the `dtypes` parameter.
358                                   If `None`, all rows will be used.
359        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
360                               will try to coerce different dtypes in a column to the same one,
361                               whereas `strict` will raise an error in case a column contains
362                               several dtypes. Note that this only applies to columns whose dtype
363                               is guessed, i.e. not specified via `dtypes`.
364        :param use_columns: Specifies the columns to use. Can either be:
365                            - `None` to select all columns
366                            - A list of strings and ints, the column names and/or indices
367                              (starting at 0)
368                            - A string, a comma separated list of Excel column letters and column
369                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
370                              `A,B,C,D,E` and `A,C,E,F`)
371                            - A callable, a function that takes a column and returns a boolean
372                              indicating whether the column should be used
373        :param dtypes: An optional dtype (for all columns)
374                       or dict of dtypes with keys as column indices or names.
375        """
376        output = self._reader.load_table(  # type:ignore[call-overload,misc]
377            name=name,
378            header_row=header_row,
379            column_names=column_names,
380            skip_rows=skip_rows,
381            n_rows=n_rows,
382            schema_sample_rows=schema_sample_rows,
383            dtype_coercion=dtype_coercion,
384            use_columns=use_columns,
385            dtypes=dtypes,
386            eager=eager,
387        )
388        if eager:
389            return output
390        return ExcelTable(output)

Loads a table by name.

Parameters
  • name: The name of the table to load.
  • header_row: The index of the row containing the column labels. If None, the table's column names will be used. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None, it skips the number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”, which would result in A,B,C,D,E and A,C,E,F)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def load_sheet_eager( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: list[str] | list[int] | str | None = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> pyarrow.lib.RecordBatch:
392    def load_sheet_eager(
393        self,
394        idx_or_name: int | str,
395        *,
396        header_row: int | None = 0,
397        column_names: list[str] | None = None,
398        skip_rows: int | None = None,
399        n_rows: int | None = None,
400        schema_sample_rows: int | None = 1_000,
401        dtype_coercion: Literal["coerce", "strict"] = "coerce",
402        use_columns: list[str] | list[int] | str | None = None,
403        dtypes: DType | DTypeMap | None = None,
404    ) -> pa.RecordBatch:
405        """Loads a sheet eagerly by index or name.
406
407        For xlsx files, this will be faster and more memory-efficient, as it will use
408        `worksheet_range_ref` under the hood, which returns borrowed types.
409
410        Refer to `load_sheet` for parameter documentation
411        """
412        return self._reader.load_sheet(
413            idx_or_name=idx_or_name,
414            header_row=header_row,
415            column_names=column_names,
416            skip_rows=skip_rows,
417            n_rows=n_rows,
418            schema_sample_rows=schema_sample_rows,
419            dtype_coercion=dtype_coercion,
420            use_columns=use_columns,
421            dtypes=dtypes,
422            eager=True,
423        )

Loads a sheet eagerly by index or name.

For xlsx files, this will be faster and more memory-efficient, as it will use worksheet_range_ref under the hood, which returns borrowed types.

Refer to load_sheet for parameter documentation

def load_sheet_by_name( self, name: str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
425    def load_sheet_by_name(
426        self,
427        name: str,
428        *,
429        header_row: int | None = 0,
430        column_names: list[str] | None = None,
431        skip_rows: int | None = None,
432        n_rows: int | None = None,
433        schema_sample_rows: int | None = 1_000,
434        dtype_coercion: Literal["coerce", "strict"] = "coerce",
435        use_columns: list[str]
436        | list[int]
437        | str
438        | Callable[[ColumnInfoNoDtype], bool]
439        | None = None,
440        dtypes: DType | DTypeMap | None = None,
441    ) -> ExcelSheet:
442        """Loads a sheet by name.
443
444        Refer to `load_sheet` for parameter documentation
445        """
446        return self.load_sheet(
447            name,
448            header_row=header_row,
449            column_names=column_names,
450            skip_rows=skip_rows,
451            n_rows=n_rows,
452            schema_sample_rows=schema_sample_rows,
453            dtype_coercion=dtype_coercion,
454            use_columns=use_columns,
455            dtypes=dtypes,
456        )

Loads a sheet by name.

Refer to load_sheet for parameter documentation

def load_sheet_by_idx( self, idx: int, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
458    def load_sheet_by_idx(
459        self,
460        idx: int,
461        *,
462        header_row: int | None = 0,
463        column_names: list[str] | None = None,
464        skip_rows: int | None = None,
465        n_rows: int | None = None,
466        schema_sample_rows: int | None = 1_000,
467        dtype_coercion: Literal["coerce", "strict"] = "coerce",
468        use_columns: list[str]
469        | list[int]
470        | str
471        | Callable[[ColumnInfoNoDtype], bool]
472        | None = None,
473        dtypes: DType | DTypeMap | None = None,
474    ) -> ExcelSheet:
475        """Loads a sheet by index.
476
477        Refer to `load_sheet` for parameter documentation
478        """
479        return self.load_sheet(
480            idx,
481            header_row=header_row,
482            column_names=column_names,
483            skip_rows=skip_rows,
484            n_rows=n_rows,
485            schema_sample_rows=schema_sample_rows,
486            dtype_coercion=dtype_coercion,
487            use_columns=use_columns,
488            dtypes=dtypes,
489        )

Loads a sheet by index.

Refer to load_sheet for parameter documentation

class ExcelSheet:
 56class ExcelSheet:
 57    """A class representing a single sheet in an Excel File"""
 58
 59    def __init__(self, sheet: _ExcelSheet) -> None:
 60        self._sheet = sheet
 61
 62    @property
 63    def name(self) -> str:
 64        """The name of the sheet"""
 65        return self._sheet.name
 66
 67    @property
 68    def width(self) -> int:
 69        """The sheet's width"""
 70        return self._sheet.width
 71
 72    @property
 73    def height(self) -> int:
 74        """The sheet's height, with `skip_rows` and `nrows` applied"""
 75        return self._sheet.height
 76
 77    @property
 78    def total_height(self) -> int:
 79        """The sheet's total height"""
 80        return self._sheet.total_height
 81
 82    @property
 83    def selected_columns(self) -> list[ColumnInfo]:
 84        """The sheet's selected columns"""
 85        return self._sheet.selected_columns
 86
 87    def available_columns(self) -> list[ColumnInfo]:
 88        """The columns available for the given sheet"""
 89        return self._sheet.available_columns()
 90
 91    @property
 92    def specified_dtypes(self) -> DTypeMap | None:
 93        """The dtypes specified for the sheet"""
 94        return self._sheet.specified_dtypes
 95
 96    @property
 97    def visible(self) -> SheetVisible:
 98        """The visibility of the sheet"""
 99        return self._sheet.visible
100
101    def to_arrow(self) -> pa.RecordBatch:
102        """Converts the sheet to a pyarrow `RecordBatch`"""
103        return self._sheet.to_arrow()
104
105    def to_pandas(self) -> "pd.DataFrame":
106        """Converts the sheet to a Pandas `DataFrame`.
107
108        Requires the `pandas` extra to be installed.
109        """
110        # We know for sure that the sheet will yield exactly one RecordBatch
111        return self.to_arrow().to_pandas()
112
113    def to_polars(self) -> "pl.DataFrame":
114        """Converts the sheet to a Polars `DataFrame`.
115
116        Requires the `polars` extra to be installed.
117        """
118        return _recordbatch_to_polars(self.to_arrow())
119
120    def __repr__(self) -> str:
121        return self._sheet.__repr__()

A class representing a single sheet in an Excel File

ExcelSheet(sheet: _ExcelSheet)
59    def __init__(self, sheet: _ExcelSheet) -> None:
60        self._sheet = sheet
name: str
62    @property
63    def name(self) -> str:
64        """The name of the sheet"""
65        return self._sheet.name

The name of the sheet

width: int
67    @property
68    def width(self) -> int:
69        """The sheet's width"""
70        return self._sheet.width

The sheet's width

height: int
72    @property
73    def height(self) -> int:
74        """The sheet's height, with `skip_rows` and `nrows` applied"""
75        return self._sheet.height

The sheet's height, with skip_rows and nrows applied

total_height: int
77    @property
78    def total_height(self) -> int:
79        """The sheet's total height"""
80        return self._sheet.total_height

The sheet's total height

selected_columns: list[ColumnInfo]
82    @property
83    def selected_columns(self) -> list[ColumnInfo]:
84        """The sheet's selected columns"""
85        return self._sheet.selected_columns

The sheet's selected columns

def available_columns(self) -> list[ColumnInfo]:
87    def available_columns(self) -> list[ColumnInfo]:
88        """The columns available for the given sheet"""
89        return self._sheet.available_columns()

The columns available for the given sheet

specified_dtypes: 'DTypeMap | None'
91    @property
92    def specified_dtypes(self) -> DTypeMap | None:
93        """The dtypes specified for the sheet"""
94        return self._sheet.specified_dtypes

The dtypes specified for the sheet

visible: Literal['visible', 'hidden', 'veryhidden']
96    @property
97    def visible(self) -> SheetVisible:
98        """The visibility of the sheet"""
99        return self._sheet.visible

The visibility of the sheet

def to_arrow(self) -> pyarrow.lib.RecordBatch:
101    def to_arrow(self) -> pa.RecordBatch:
102        """Converts the sheet to a pyarrow `RecordBatch`"""
103        return self._sheet.to_arrow()

Converts the sheet to a pyarrow RecordBatch

def to_pandas(self) -> pandas.core.frame.DataFrame:
105    def to_pandas(self) -> "pd.DataFrame":
106        """Converts the sheet to a Pandas `DataFrame`.
107
108        Requires the `pandas` extra to be installed.
109        """
110        # We know for sure that the sheet will yield exactly one RecordBatch
111        return self.to_arrow().to_pandas()

Converts the sheet to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
113    def to_polars(self) -> "pl.DataFrame":
114        """Converts the sheet to a Polars `DataFrame`.
115
116        Requires the `polars` extra to be installed.
117        """
118        return _recordbatch_to_polars(self.to_arrow())

Converts the sheet to a Polars DataFrame.

Requires the polars extra to be installed.

DTypeFrom: TypeAlias = Literal['provided_by_index', 'provided_by_name', 'guessed']
ColumnNameFrom: TypeAlias = Literal['provided', 'looked_up', 'generated']
class ColumnInfo:

This class provides information about a single column in a sheet

column_name_from

fastexcel.ColumnNameFrom. How the name of the column was determined.

One of three possible values:

  • "provided": The column name was provided via the use_columns parameter
  • "looked_up": The column name was looked up from the data found in the sheet
  • "generated": The column name was generated from the column index, either because header_row was None, or because it could not be looked up
dtype_from

fastexcel.DTypeFrom. How the dtype of the column was determined.

One of three possible values:

  • "provided_by_index": The dtype was specified via the column index
  • "provided_by_name": The dtype was specified via the column name
  • "guessed": The dtype was determined from the content of the column
dtype

fastexcel.DType. The dtype of the column

index

int. The index of the column

name

str. The name of the column

class FastExcelError(builtins.Exception):

The base class for all fastexcel errors

class CannotRetrieveCellDataError(fastexcel.FastExcelError):

Data for a given cell cannot be retrieved

class CalamineCellError(fastexcel.FastExcelError):

calamine returned an error regarding the content of the cell

class CalamineError(fastexcel.FastExcelError):

Generic calamine error

class SheetNotFoundError(fastexcel.FastExcelError):

Sheet was not found

class ColumnNotFoundError(fastexcel.FastExcelError):

Column was not found

class ArrowError(fastexcel.FastExcelError):

Generic arrow error

class InvalidParametersError(fastexcel.FastExcelError):

Provided parameters are invalid

class UnsupportedColumnTypeCombinationError(fastexcel.FastExcelError):

Column contains an unsupported type combination