fastexcel

  1from __future__ import annotations
  2
  3import sys
  4import typing
  5from typing import TYPE_CHECKING, Callable, Literal
  6
  7if sys.version_info < (3, 10):
  8    from typing_extensions import TypeAlias
  9else:
 10    from typing import TypeAlias
 11
 12if TYPE_CHECKING:
 13    import pandas as pd
 14    import polars as pl
 15
 16from os.path import expanduser
 17from pathlib import Path
 18
 19import pyarrow as pa
 20
 21from ._fastexcel import (
 22    ArrowError,
 23    CalamineCellError,
 24    CalamineError,
 25    CannotRetrieveCellDataError,
 26    ColumnInfo,
 27    ColumnInfoNoDtype,
 28    ColumnNotFoundError,
 29    FastExcelError,
 30    InvalidParametersError,
 31    SheetNotFoundError,
 32    UnsupportedColumnTypeCombinationError,
 33    __version__,
 34    _ExcelReader,
 35    _ExcelSheet,
 36    _ExcelTable,
 37)
 38from ._fastexcel import read_excel as _read_excel
 39
 40DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
 41DTypeMap: TypeAlias = "dict[str | int, DType]"
 42ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
 43DTypeFrom: TypeAlias = Literal["provided_by_index", "provided_by_name", "guessed"]
 44SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
 45
 46
 47def _recordbatch_to_polars(rb: pa.RecordBatch) -> pl.DataFrame:
 48    import polars as pl
 49
 50    df = pl.from_arrow(data=rb)
 51    assert isinstance(df, pl.DataFrame)
 52    return df
 53
 54
 55class ExcelSheet:
 56    """A class representing a single sheet in an Excel File"""
 57
 58    def __init__(self, sheet: _ExcelSheet) -> None:
 59        self._sheet = sheet
 60
 61    @property
 62    def name(self) -> str:
 63        """The name of the sheet"""
 64        return self._sheet.name
 65
 66    @property
 67    def width(self) -> int:
 68        """The sheet's width"""
 69        return self._sheet.width
 70
 71    @property
 72    def height(self) -> int:
 73        """The sheet's height, with `skip_rows` and `nrows` applied"""
 74        return self._sheet.height
 75
 76    @property
 77    def total_height(self) -> int:
 78        """The sheet's total height"""
 79        return self._sheet.total_height
 80
 81    @property
 82    def selected_columns(self) -> list[ColumnInfo]:
 83        """The sheet's selected columns"""
 84        return self._sheet.selected_columns
 85
 86    def available_columns(self) -> list[ColumnInfo]:
 87        """The columns available for the given sheet"""
 88        return self._sheet.available_columns()
 89
 90    @property
 91    def specified_dtypes(self) -> DTypeMap | None:
 92        """The dtypes specified for the sheet"""
 93        return self._sheet.specified_dtypes
 94
 95    @property
 96    def visible(self) -> SheetVisible:
 97        """The visibility of the sheet"""
 98        return self._sheet.visible
 99
100    def to_arrow(self) -> pa.RecordBatch:
101        """Converts the sheet to a pyarrow `RecordBatch`"""
102        return self._sheet.to_arrow()
103
104    def to_pandas(self) -> "pd.DataFrame":
105        """Converts the sheet to a Pandas `DataFrame`.
106
107        Requires the `pandas` extra to be installed.
108        """
109        # We know for sure that the sheet will yield exactly one RecordBatch
110        return self.to_arrow().to_pandas()
111
112    def to_polars(self) -> "pl.DataFrame":
113        """Converts the sheet to a Polars `DataFrame`.
114
115        Requires the `polars` extra to be installed.
116        """
117        return _recordbatch_to_polars(self.to_arrow())
118
119    def __repr__(self) -> str:
120        return self._sheet.__repr__()
121
122
123class ExcelTable:
124    """A class representing a single table in an Excel file"""
125
126    def __init__(self, table: _ExcelTable) -> None:
127        self._table = table
128
129    @property
130    def name(self) -> str:
131        """The name of the table"""
132        return self._table.name
133
134    @property
135    def sheet_name(self) -> str:
136        """The name of the sheet this table belongs to"""
137        return self._table.sheet_name
138
139    @property
140    def width(self) -> int:
141        """The table's width"""
142        return self._table.width
143
144    @property
145    def height(self) -> int:
146        """The table's height"""
147        return self._table.height
148
149    @property
150    def total_height(self) -> int:
151        """The table's total height"""
152        return self._table.total_height
153
154    @property
155    def offset(self) -> int:
156        """The table's offset before data starts"""
157        return self._table.offset
158
159    @property
160    def selected_columns(self) -> list[ColumnInfo]:
161        """The table's selected columns"""
162        return self._table.selected_columns
163
164    def available_columns(self) -> list[ColumnInfo]:
165        """The columns available for the given table"""
166        return self._table.available_columns()
167
168    @property
169    def specified_dtypes(self) -> DTypeMap | None:
170        """The dtypes specified for the table"""
171        return self._table.specified_dtypes
172
173    def to_arrow(self) -> pa.RecordBatch:
174        """Converts the table to a pyarrow `RecordBatch`"""
175        return self._table.to_arrow()
176
177    def to_pandas(self) -> "pd.DataFrame":
178        """Converts the table to a Pandas `DataFrame`.
179
180        Requires the `pandas` extra to be installed.
181        """
182        # We know for sure that the table will yield exactly one RecordBatch
183        return self.to_arrow().to_pandas()
184
185    def to_polars(self) -> "pl.DataFrame":
186        """Converts the table to a Polars `DataFrame`.
187
188        Requires the `polars` extra to be installed.
189        """
190        return _recordbatch_to_polars(self.to_arrow())
191
192
193class ExcelReader:
194    """A class representing an open Excel file and allowing to read its sheets"""
195
196    def __init__(self, reader: _ExcelReader) -> None:
197        self._reader = reader
198
199    @property
200    def sheet_names(self) -> list[str]:
201        """The list of sheet names"""
202        return self._reader.sheet_names
203
204    def load_sheet(
205        self,
206        idx_or_name: int | str,
207        *,
208        header_row: int | None = 0,
209        column_names: list[str] | None = None,
210        skip_rows: int | None = None,
211        n_rows: int | None = None,
212        schema_sample_rows: int | None = 1_000,
213        dtype_coercion: Literal["coerce", "strict"] = "coerce",
214        use_columns: list[str]
215        | list[int]
216        | str
217        | Callable[[ColumnInfoNoDtype], bool]
218        | None = None,
219        dtypes: DType | DTypeMap | None = None,
220    ) -> ExcelSheet:
221        """Loads a sheet lazily by index or name.
222
223        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
224        :param header_row: The index of the row containing the column labels, default index is 0.
225                           If `None`, the sheet does not have any column labels.
226                           Any rows before the `header_row` will be automatically skipped.
227        :param column_names: Overrides headers found in the document.
228                             If `column_names` is used, `header_row` will be ignored.
229        :param n_rows: Specifies how many rows should be loaded.
230                       If `None`, all rows are loaded
231        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
232                          Any rows before the `header_row` are automatically skipped.
233                          If `header_row` is `None`:
234                            - if `skip_rows` is `None` (default): it skips all empty rows
235                            at the beginning of the sheet.
236                            - if `skip_rows` is a number, it skips the specified number
237                            of rows from the start of the sheet.
238        :param schema_sample_rows: Specifies how many rows should be used to determine
239                                   the dtype of a column. Cannot be 0. A specific dtype can be
240                                   enforced for some or all columns through the `dtypes` parameter.
241                                   If `None`, all rows will be used.
242        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
243                               will try to coerce different dtypes in a column to the same one,
244                               whereas `strict` will raise an error in case a column contains
245                               several dtypes. Note that this only applies to columns whose dtype
246                               is guessed, i.e. not specified via `dtypes`.
247        :param use_columns: Specifies the columns to use. Can either be:
248                            - `None` to select all columns
249                            - A list of strings and ints, the column names and/or indices
250                              (starting at 0)
251                            - A string, a comma separated list of Excel column letters and column
252                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
253                              `A,B,C,D,E` and `A,C,E,F`)
254                            - A callable, a function that takes a column and returns a boolean
255                              indicating whether the column should be used
256        :param dtypes: An optional dtype (for all columns)
257                       or dict of dtypes with keys as column indices or names.
258        """
259        return ExcelSheet(
260            self._reader.load_sheet(
261                idx_or_name=idx_or_name,
262                header_row=header_row,
263                column_names=column_names,
264                skip_rows=skip_rows,
265                n_rows=n_rows,
266                schema_sample_rows=schema_sample_rows,
267                dtype_coercion=dtype_coercion,
268                use_columns=use_columns,
269                dtypes=dtypes,
270                eager=False,
271            )
272        )
273
274    def table_names(self, sheet_name: str | None = None) -> list[str]:
275        """The list of table names.
276
277        Will return an empty list if no tables are found.
278
279        :param sheet_name: If given, will limit the list to the given sheet, will be faster
280        too.
281        """
282        return self._reader.table_names(sheet_name)
283
284    @typing.overload
285    def load_table(
286        self,
287        name: str,
288        *,
289        header_row: int | None = None,
290        column_names: list[str] | None = None,
291        skip_rows: int = 0,
292        n_rows: int | None = None,
293        schema_sample_rows: int | None = 1_000,
294        dtype_coercion: Literal["coerce", "strict"] = "coerce",
295        use_columns: list[str]
296        | list[int]
297        | str
298        | Callable[[ColumnInfoNoDtype], bool]
299        | None = None,
300        dtypes: DType | DTypeMap | None = None,
301        eager: Literal[False] = ...,
302    ) -> ExcelTable: ...
303    @typing.overload
304    def load_table(
305        self,
306        name: str,
307        *,
308        header_row: int | None = None,
309        column_names: list[str] | None = None,
310        skip_rows: int = 0,
311        n_rows: int | None = None,
312        schema_sample_rows: int | None = 1_000,
313        dtype_coercion: Literal["coerce", "strict"] = "coerce",
314        use_columns: list[str]
315        | list[int]
316        | str
317        | Callable[[ColumnInfoNoDtype], bool]
318        | None = None,
319        dtypes: DType | DTypeMap | None = None,
320        eager: Literal[True] = ...,
321    ) -> pa.RecordBatch: ...
322    def load_table(
323        self,
324        name: str,
325        *,
326        header_row: int | None = None,
327        column_names: list[str] | None = None,
328        skip_rows: int = 0,
329        n_rows: int | None = None,
330        schema_sample_rows: int | None = 1_000,
331        dtype_coercion: Literal["coerce", "strict"] = "coerce",
332        use_columns: list[str]
333        | list[int]
334        | str
335        | Callable[[ColumnInfoNoDtype], bool]
336        | None = None,
337        dtypes: DType | DTypeMap | None = None,
338        eager: bool = False,
339    ) -> ExcelTable | pa.RecordBatch:
340        """Loads a table by name.
341
342        :param name: The name of the table to load.
343        :param header_row: The index of the row containing the column labels.
344                           If `None`, the table's column names will be used.
345                           Any rows before the `header_row` will be automatically skipped.
346        :param column_names: Overrides headers found in the document.
347                             If `column_names` is used, `header_row` will be ignored.
348        :param n_rows: Specifies how many rows should be loaded.
349                       If `None`, all rows are loaded
350        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
351                          Any rows before the `header_row` are automatically skipped.
352                          If `header_row` is `None`, it skips the number of rows from the
353                          start of the sheet.
354        :param schema_sample_rows: Specifies how many rows should be used to determine
355                                   the dtype of a column. Cannot be 0. A specific dtype can be
356                                   enforced for some or all columns through the `dtypes` parameter.
357                                   If `None`, all rows will be used.
358        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
359                               will try to coerce different dtypes in a column to the same one,
360                               whereas `strict` will raise an error in case a column contains
361                               several dtypes. Note that this only applies to columns whose dtype
362                               is guessed, i.e. not specified via `dtypes`.
363        :param use_columns: Specifies the columns to use. Can either be:
364                            - `None` to select all columns
365                            - A list of strings and ints, the column names and/or indices
366                              (starting at 0)
367                            - A string, a comma separated list of Excel column letters and column
368                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
369                              `A,B,C,D,E` and `A,C,E,F`)
370                            - A callable, a function that takes a column and returns a boolean
371                              indicating whether the column should be used
372        :param dtypes: An optional dtype (for all columns)
373                       or dict of dtypes with keys as column indices or names.
374        """
375        output = self._reader.load_table(  # type:ignore[call-overload,misc]
376            name=name,
377            header_row=header_row,
378            column_names=column_names,
379            skip_rows=skip_rows,
380            n_rows=n_rows,
381            schema_sample_rows=schema_sample_rows,
382            dtype_coercion=dtype_coercion,
383            use_columns=use_columns,
384            dtypes=dtypes,
385            eager=eager,
386        )
387        if eager:
388            return output
389        return ExcelTable(output)
390
391    def load_sheet_eager(
392        self,
393        idx_or_name: int | str,
394        *,
395        header_row: int | None = 0,
396        column_names: list[str] | None = None,
397        skip_rows: int | None = None,
398        n_rows: int | None = None,
399        schema_sample_rows: int | None = 1_000,
400        dtype_coercion: Literal["coerce", "strict"] = "coerce",
401        use_columns: list[str] | list[int] | str | None = None,
402        dtypes: DType | DTypeMap | None = None,
403    ) -> pa.RecordBatch:
404        """Loads a sheet eagerly by index or name.
405
406        For xlsx files, this will be faster and more memory-efficient, as it will use
407        `worksheet_range_ref` under the hood, which returns borrowed types.
408
409        Refer to `load_sheet` for parameter documentation
410        """
411        return self._reader.load_sheet(
412            idx_or_name=idx_or_name,
413            header_row=header_row,
414            column_names=column_names,
415            skip_rows=skip_rows,
416            n_rows=n_rows,
417            schema_sample_rows=schema_sample_rows,
418            dtype_coercion=dtype_coercion,
419            use_columns=use_columns,
420            dtypes=dtypes,
421            eager=True,
422        )
423
424    def load_sheet_by_name(
425        self,
426        name: str,
427        *,
428        header_row: int | None = 0,
429        column_names: list[str] | None = None,
430        skip_rows: int | None = None,
431        n_rows: int | None = None,
432        schema_sample_rows: int | None = 1_000,
433        dtype_coercion: Literal["coerce", "strict"] = "coerce",
434        use_columns: list[str]
435        | list[int]
436        | str
437        | Callable[[ColumnInfoNoDtype], bool]
438        | None = None,
439        dtypes: DType | DTypeMap | None = None,
440    ) -> ExcelSheet:
441        """Loads a sheet by name.
442
443        Refer to `load_sheet` for parameter documentation
444        """
445        return self.load_sheet(
446            name,
447            header_row=header_row,
448            column_names=column_names,
449            skip_rows=skip_rows,
450            n_rows=n_rows,
451            schema_sample_rows=schema_sample_rows,
452            dtype_coercion=dtype_coercion,
453            use_columns=use_columns,
454            dtypes=dtypes,
455        )
456
457    def load_sheet_by_idx(
458        self,
459        idx: int,
460        *,
461        header_row: int | None = 0,
462        column_names: list[str] | None = None,
463        skip_rows: int | None = None,
464        n_rows: int | None = None,
465        schema_sample_rows: int | None = 1_000,
466        dtype_coercion: Literal["coerce", "strict"] = "coerce",
467        use_columns: list[str]
468        | list[int]
469        | str
470        | Callable[[ColumnInfoNoDtype], bool]
471        | None = None,
472        dtypes: DType | DTypeMap | None = None,
473    ) -> ExcelSheet:
474        """Loads a sheet by index.
475
476        Refer to `load_sheet` for parameter documentation
477        """
478        return self.load_sheet(
479            idx,
480            header_row=header_row,
481            column_names=column_names,
482            skip_rows=skip_rows,
483            n_rows=n_rows,
484            schema_sample_rows=schema_sample_rows,
485            dtype_coercion=dtype_coercion,
486            use_columns=use_columns,
487            dtypes=dtypes,
488        )
489
490    def __repr__(self) -> str:
491        return self._reader.__repr__()
492
493
494def read_excel(source: Path | str | bytes) -> ExcelReader:
495    """Opens and loads an excel file.
496
497    :param source: The path to a file or its content as bytes
498    """
499    if isinstance(source, (str, Path)):
500        source = expanduser(source)
501    return ExcelReader(_read_excel(source))
502
503
504__all__ = (
505    ## version
506    "__version__",
507    ## main entrypoint
508    "read_excel",
509    ## Python types
510    "DType",
511    "DTypeMap",
512    # Excel reader
513    "ExcelReader",
514    # Excel sheet
515    "ExcelSheet",
516    # Column metadata
517    "DTypeFrom",
518    "ColumnNameFrom",
519    "ColumnInfo",
520    # Exceptions
521    "FastExcelError",
522    "CannotRetrieveCellDataError",
523    "CalamineCellError",
524    "CalamineError",
525    "SheetNotFoundError",
526    "ColumnNotFoundError",
527    "ArrowError",
528    "InvalidParametersError",
529    "UnsupportedColumnTypeCombinationError",
530)
__version__ = '0.13.0'
def read_excel(source: pathlib.Path | str | bytes) -> ExcelReader:
495def read_excel(source: Path | str | bytes) -> ExcelReader:
496    """Opens and loads an excel file.
497
498    :param source: The path to a file or its content as bytes
499    """
500    if isinstance(source, (str, Path)):
501        source = expanduser(source)
502    return ExcelReader(_read_excel(source))

Opens and loads an excel file.

Parameters
  • source: The path to a file or its content as bytes
DType = typing.Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']
DTypeMap: TypeAlias = 'dict[str | int, DType]'
class ExcelReader:
194class ExcelReader:
195    """A class representing an open Excel file and allowing to read its sheets"""
196
197    def __init__(self, reader: _ExcelReader) -> None:
198        self._reader = reader
199
200    @property
201    def sheet_names(self) -> list[str]:
202        """The list of sheet names"""
203        return self._reader.sheet_names
204
205    def load_sheet(
206        self,
207        idx_or_name: int | str,
208        *,
209        header_row: int | None = 0,
210        column_names: list[str] | None = None,
211        skip_rows: int | None = None,
212        n_rows: int | None = None,
213        schema_sample_rows: int | None = 1_000,
214        dtype_coercion: Literal["coerce", "strict"] = "coerce",
215        use_columns: list[str]
216        | list[int]
217        | str
218        | Callable[[ColumnInfoNoDtype], bool]
219        | None = None,
220        dtypes: DType | DTypeMap | None = None,
221    ) -> ExcelSheet:
222        """Loads a sheet lazily by index or name.
223
224        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
225        :param header_row: The index of the row containing the column labels, default index is 0.
226                           If `None`, the sheet does not have any column labels.
227                           Any rows before the `header_row` will be automatically skipped.
228        :param column_names: Overrides headers found in the document.
229                             If `column_names` is used, `header_row` will be ignored.
230        :param n_rows: Specifies how many rows should be loaded.
231                       If `None`, all rows are loaded
232        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
233                          Any rows before the `header_row` are automatically skipped.
234                          If `header_row` is `None`:
235                            - if `skip_rows` is `None` (default): it skips all empty rows
236                            at the beginning of the sheet.
237                            - if `skip_rows` is a number, it skips the specified number
238                            of rows from the start of the sheet.
239        :param schema_sample_rows: Specifies how many rows should be used to determine
240                                   the dtype of a column. Cannot be 0. A specific dtype can be
241                                   enforced for some or all columns through the `dtypes` parameter.
242                                   If `None`, all rows will be used.
243        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
244                               will try to coerce different dtypes in a column to the same one,
245                               whereas `strict` will raise an error in case a column contains
246                               several dtypes. Note that this only applies to columns whose dtype
247                               is guessed, i.e. not specified via `dtypes`.
248        :param use_columns: Specifies the columns to use. Can either be:
249                            - `None` to select all columns
250                            - A list of strings and ints, the column names and/or indices
251                              (starting at 0)
252                            - A string, a comma separated list of Excel column letters and column
253                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
254                              `A,B,C,D,E` and `A,C,E,F`)
255                            - A callable, a function that takes a column and returns a boolean
256                              indicating whether the column should be used
257        :param dtypes: An optional dtype (for all columns)
258                       or dict of dtypes with keys as column indices or names.
259        """
260        return ExcelSheet(
261            self._reader.load_sheet(
262                idx_or_name=idx_or_name,
263                header_row=header_row,
264                column_names=column_names,
265                skip_rows=skip_rows,
266                n_rows=n_rows,
267                schema_sample_rows=schema_sample_rows,
268                dtype_coercion=dtype_coercion,
269                use_columns=use_columns,
270                dtypes=dtypes,
271                eager=False,
272            )
273        )
274
275    def table_names(self, sheet_name: str | None = None) -> list[str]:
276        """The list of table names.
277
278        Will return an empty list if no tables are found.
279
280        :param sheet_name: If given, will limit the list to the given sheet, will be faster
281        too.
282        """
283        return self._reader.table_names(sheet_name)
284
285    @typing.overload
286    def load_table(
287        self,
288        name: str,
289        *,
290        header_row: int | None = None,
291        column_names: list[str] | None = None,
292        skip_rows: int = 0,
293        n_rows: int | None = None,
294        schema_sample_rows: int | None = 1_000,
295        dtype_coercion: Literal["coerce", "strict"] = "coerce",
296        use_columns: list[str]
297        | list[int]
298        | str
299        | Callable[[ColumnInfoNoDtype], bool]
300        | None = None,
301        dtypes: DType | DTypeMap | None = None,
302        eager: Literal[False] = ...,
303    ) -> ExcelTable: ...
304    @typing.overload
305    def load_table(
306        self,
307        name: str,
308        *,
309        header_row: int | None = None,
310        column_names: list[str] | None = None,
311        skip_rows: int = 0,
312        n_rows: int | None = None,
313        schema_sample_rows: int | None = 1_000,
314        dtype_coercion: Literal["coerce", "strict"] = "coerce",
315        use_columns: list[str]
316        | list[int]
317        | str
318        | Callable[[ColumnInfoNoDtype], bool]
319        | None = None,
320        dtypes: DType | DTypeMap | None = None,
321        eager: Literal[True] = ...,
322    ) -> pa.RecordBatch: ...
323    def load_table(
324        self,
325        name: str,
326        *,
327        header_row: int | None = None,
328        column_names: list[str] | None = None,
329        skip_rows: int = 0,
330        n_rows: int | None = None,
331        schema_sample_rows: int | None = 1_000,
332        dtype_coercion: Literal["coerce", "strict"] = "coerce",
333        use_columns: list[str]
334        | list[int]
335        | str
336        | Callable[[ColumnInfoNoDtype], bool]
337        | None = None,
338        dtypes: DType | DTypeMap | None = None,
339        eager: bool = False,
340    ) -> ExcelTable | pa.RecordBatch:
341        """Loads a table by name.
342
343        :param name: The name of the table to load.
344        :param header_row: The index of the row containing the column labels.
345                           If `None`, the table's column names will be used.
346                           Any rows before the `header_row` will be automatically skipped.
347        :param column_names: Overrides headers found in the document.
348                             If `column_names` is used, `header_row` will be ignored.
349        :param n_rows: Specifies how many rows should be loaded.
350                       If `None`, all rows are loaded
351        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
352                          Any rows before the `header_row` are automatically skipped.
353                          If `header_row` is `None`, it skips the number of rows from the
354                          start of the sheet.
355        :param schema_sample_rows: Specifies how many rows should be used to determine
356                                   the dtype of a column. Cannot be 0. A specific dtype can be
357                                   enforced for some or all columns through the `dtypes` parameter.
358                                   If `None`, all rows will be used.
359        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
360                               will try to coerce different dtypes in a column to the same one,
361                               whereas `strict` will raise an error in case a column contains
362                               several dtypes. Note that this only applies to columns whose dtype
363                               is guessed, i.e. not specified via `dtypes`.
364        :param use_columns: Specifies the columns to use. Can either be:
365                            - `None` to select all columns
366                            - A list of strings and ints, the column names and/or indices
367                              (starting at 0)
368                            - A string, a comma separated list of Excel column letters and column
369                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
370                              `A,B,C,D,E` and `A,C,E,F`)
371                            - A callable, a function that takes a column and returns a boolean
372                              indicating whether the column should be used
373        :param dtypes: An optional dtype (for all columns)
374                       or dict of dtypes with keys as column indices or names.
375        """
376        output = self._reader.load_table(  # type:ignore[call-overload,misc]
377            name=name,
378            header_row=header_row,
379            column_names=column_names,
380            skip_rows=skip_rows,
381            n_rows=n_rows,
382            schema_sample_rows=schema_sample_rows,
383            dtype_coercion=dtype_coercion,
384            use_columns=use_columns,
385            dtypes=dtypes,
386            eager=eager,
387        )
388        if eager:
389            return output
390        return ExcelTable(output)
391
392    def load_sheet_eager(
393        self,
394        idx_or_name: int | str,
395        *,
396        header_row: int | None = 0,
397        column_names: list[str] | None = None,
398        skip_rows: int | None = None,
399        n_rows: int | None = None,
400        schema_sample_rows: int | None = 1_000,
401        dtype_coercion: Literal["coerce", "strict"] = "coerce",
402        use_columns: list[str] | list[int] | str | None = None,
403        dtypes: DType | DTypeMap | None = None,
404    ) -> pa.RecordBatch:
405        """Loads a sheet eagerly by index or name.
406
407        For xlsx files, this will be faster and more memory-efficient, as it will use
408        `worksheet_range_ref` under the hood, which returns borrowed types.
409
410        Refer to `load_sheet` for parameter documentation
411        """
412        return self._reader.load_sheet(
413            idx_or_name=idx_or_name,
414            header_row=header_row,
415            column_names=column_names,
416            skip_rows=skip_rows,
417            n_rows=n_rows,
418            schema_sample_rows=schema_sample_rows,
419            dtype_coercion=dtype_coercion,
420            use_columns=use_columns,
421            dtypes=dtypes,
422            eager=True,
423        )
424
425    def load_sheet_by_name(
426        self,
427        name: str,
428        *,
429        header_row: int | None = 0,
430        column_names: list[str] | None = None,
431        skip_rows: int | None = None,
432        n_rows: int | None = None,
433        schema_sample_rows: int | None = 1_000,
434        dtype_coercion: Literal["coerce", "strict"] = "coerce",
435        use_columns: list[str]
436        | list[int]
437        | str
438        | Callable[[ColumnInfoNoDtype], bool]
439        | None = None,
440        dtypes: DType | DTypeMap | None = None,
441    ) -> ExcelSheet:
442        """Loads a sheet by name.
443
444        Refer to `load_sheet` for parameter documentation
445        """
446        return self.load_sheet(
447            name,
448            header_row=header_row,
449            column_names=column_names,
450            skip_rows=skip_rows,
451            n_rows=n_rows,
452            schema_sample_rows=schema_sample_rows,
453            dtype_coercion=dtype_coercion,
454            use_columns=use_columns,
455            dtypes=dtypes,
456        )
457
458    def load_sheet_by_idx(
459        self,
460        idx: int,
461        *,
462        header_row: int | None = 0,
463        column_names: list[str] | None = None,
464        skip_rows: int | None = None,
465        n_rows: int | None = None,
466        schema_sample_rows: int | None = 1_000,
467        dtype_coercion: Literal["coerce", "strict"] = "coerce",
468        use_columns: list[str]
469        | list[int]
470        | str
471        | Callable[[ColumnInfoNoDtype], bool]
472        | None = None,
473        dtypes: DType | DTypeMap | None = None,
474    ) -> ExcelSheet:
475        """Loads a sheet by index.
476
477        Refer to `load_sheet` for parameter documentation
478        """
479        return self.load_sheet(
480            idx,
481            header_row=header_row,
482            column_names=column_names,
483            skip_rows=skip_rows,
484            n_rows=n_rows,
485            schema_sample_rows=schema_sample_rows,
486            dtype_coercion=dtype_coercion,
487            use_columns=use_columns,
488            dtypes=dtypes,
489        )
490
491    def __repr__(self) -> str:
492        return self._reader.__repr__()

A class representing an open Excel file and allowing to read its sheets

ExcelReader(reader: _ExcelReader)
197    def __init__(self, reader: _ExcelReader) -> None:
198        self._reader = reader
sheet_names: list[str]
200    @property
201    def sheet_names(self) -> list[str]:
202        """The list of sheet names"""
203        return self._reader.sheet_names

The list of sheet names

def load_sheet( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
205    def load_sheet(
206        self,
207        idx_or_name: int | str,
208        *,
209        header_row: int | None = 0,
210        column_names: list[str] | None = None,
211        skip_rows: int | None = None,
212        n_rows: int | None = None,
213        schema_sample_rows: int | None = 1_000,
214        dtype_coercion: Literal["coerce", "strict"] = "coerce",
215        use_columns: list[str]
216        | list[int]
217        | str
218        | Callable[[ColumnInfoNoDtype], bool]
219        | None = None,
220        dtypes: DType | DTypeMap | None = None,
221    ) -> ExcelSheet:
222        """Loads a sheet lazily by index or name.
223
224        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
225        :param header_row: The index of the row containing the column labels, default index is 0.
226                           If `None`, the sheet does not have any column labels.
227                           Any rows before the `header_row` will be automatically skipped.
228        :param column_names: Overrides headers found in the document.
229                             If `column_names` is used, `header_row` will be ignored.
230        :param n_rows: Specifies how many rows should be loaded.
231                       If `None`, all rows are loaded
232        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
233                          Any rows before the `header_row` are automatically skipped.
234                          If `header_row` is `None`:
235                            - if `skip_rows` is `None` (default): it skips all empty rows
236                            at the beginning of the sheet.
237                            - if `skip_rows` is a number, it skips the specified number
238                            of rows from the start of the sheet.
239        :param schema_sample_rows: Specifies how many rows should be used to determine
240                                   the dtype of a column. Cannot be 0. A specific dtype can be
241                                   enforced for some or all columns through the `dtypes` parameter.
242                                   If `None`, all rows will be used.
243        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
244                               will try to coerce different dtypes in a column to the same one,
245                               whereas `strict` will raise an error in case a column contains
246                               several dtypes. Note that this only applies to columns whose dtype
247                               is guessed, i.e. not specified via `dtypes`.
248        :param use_columns: Specifies the columns to use. Can either be:
249                            - `None` to select all columns
250                            - A list of strings and ints, the column names and/or indices
251                              (starting at 0)
252                            - A string, a comma separated list of Excel column letters and column
253                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
254                              `A,B,C,D,E` and `A,C,E,F`)
255                            - A callable, a function that takes a column and returns a boolean
256                              indicating whether the column should be used
257        :param dtypes: An optional dtype (for all columns)
258                       or dict of dtypes with keys as column indices or names.
259        """
260        return ExcelSheet(
261            self._reader.load_sheet(
262                idx_or_name=idx_or_name,
263                header_row=header_row,
264                column_names=column_names,
265                skip_rows=skip_rows,
266                n_rows=n_rows,
267                schema_sample_rows=schema_sample_rows,
268                dtype_coercion=dtype_coercion,
269                use_columns=use_columns,
270                dtypes=dtypes,
271                eager=False,
272            )
273        )

Loads a sheet lazily by index or name.

Parameters
  • idx_or_name: The index (starting at 0) or the name of the sheet to load.
  • header_row: The index of the row containing the column labels, default index is 0. If None, the sheet does not have any column labels. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None:
    • if skip_rows is None (default): it skips all empty rows at the beginning of the sheet.
    • if skip_rows is a number, it skips the specified number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”, which would result in A,B,C,D,E and A,C,E,F)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def table_names(self, sheet_name: str | None = None) -> list[str]:
275    def table_names(self, sheet_name: str | None = None) -> list[str]:
276        """The list of table names.
277
278        Will return an empty list if no tables are found.
279
280        :param sheet_name: If given, will limit the list to the given sheet, will be faster
281        too.
282        """
283        return self._reader.table_names(sheet_name)

The list of table names.

Will return an empty list if no tables are found.

Parameters
  • sheet_name: If given, will limit the list to the given sheet, will be faster too.
def load_table( self, name: str, *, header_row: int | None = None, column_names: list[str] | None = None, skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> fastexcel.ExcelTable | pyarrow.lib.RecordBatch:
323    def load_table(
324        self,
325        name: str,
326        *,
327        header_row: int | None = None,
328        column_names: list[str] | None = None,
329        skip_rows: int = 0,
330        n_rows: int | None = None,
331        schema_sample_rows: int | None = 1_000,
332        dtype_coercion: Literal["coerce", "strict"] = "coerce",
333        use_columns: list[str]
334        | list[int]
335        | str
336        | Callable[[ColumnInfoNoDtype], bool]
337        | None = None,
338        dtypes: DType | DTypeMap | None = None,
339        eager: bool = False,
340    ) -> ExcelTable | pa.RecordBatch:
341        """Loads a table by name.
342
343        :param name: The name of the table to load.
344        :param header_row: The index of the row containing the column labels.
345                           If `None`, the table's column names will be used.
346                           Any rows before the `header_row` will be automatically skipped.
347        :param column_names: Overrides headers found in the document.
348                             If `column_names` is used, `header_row` will be ignored.
349        :param n_rows: Specifies how many rows should be loaded.
350                       If `None`, all rows are loaded
351        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
352                          Any rows before the `header_row` are automatically skipped.
353                          If `header_row` is `None`, it skips the number of rows from the
354                          start of the sheet.
355        :param schema_sample_rows: Specifies how many rows should be used to determine
356                                   the dtype of a column. Cannot be 0. A specific dtype can be
357                                   enforced for some or all columns through the `dtypes` parameter.
358                                   If `None`, all rows will be used.
359        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
360                               will try to coerce different dtypes in a column to the same one,
361                               whereas `strict` will raise an error in case a column contains
362                               several dtypes. Note that this only applies to columns whose dtype
363                               is guessed, i.e. not specified via `dtypes`.
364        :param use_columns: Specifies the columns to use. Can either be:
365                            - `None` to select all columns
366                            - A list of strings and ints, the column names and/or indices
367                              (starting at 0)
368                            - A string, a comma separated list of Excel column letters and column
369                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
370                              `A,B,C,D,E` and `A,C,E,F`)
371                            - A callable, a function that takes a column and returns a boolean
372                              indicating whether the column should be used
373        :param dtypes: An optional dtype (for all columns)
374                       or dict of dtypes with keys as column indices or names.
375        """
376        output = self._reader.load_table(  # type:ignore[call-overload,misc]
377            name=name,
378            header_row=header_row,
379            column_names=column_names,
380            skip_rows=skip_rows,
381            n_rows=n_rows,
382            schema_sample_rows=schema_sample_rows,
383            dtype_coercion=dtype_coercion,
384            use_columns=use_columns,
385            dtypes=dtypes,
386            eager=eager,
387        )
388        if eager:
389            return output
390        return ExcelTable(output)

Loads a table by name.

Parameters
  • name: The name of the table to load.
  • header_row: The index of the row containing the column labels. If None, the table's column names will be used. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None, it skips the number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”, which would result in A,B,C,D,E and A,C,E,F)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def load_sheet_eager( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: list[str] | list[int] | str | None = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> pyarrow.lib.RecordBatch:
392    def load_sheet_eager(
393        self,
394        idx_or_name: int | str,
395        *,
396        header_row: int | None = 0,
397        column_names: list[str] | None = None,
398        skip_rows: int | None = None,
399        n_rows: int | None = None,
400        schema_sample_rows: int | None = 1_000,
401        dtype_coercion: Literal["coerce", "strict"] = "coerce",
402        use_columns: list[str] | list[int] | str | None = None,
403        dtypes: DType | DTypeMap | None = None,
404    ) -> pa.RecordBatch:
405        """Loads a sheet eagerly by index or name.
406
407        For xlsx files, this will be faster and more memory-efficient, as it will use
408        `worksheet_range_ref` under the hood, which returns borrowed types.
409
410        Refer to `load_sheet` for parameter documentation
411        """
412        return self._reader.load_sheet(
413            idx_or_name=idx_or_name,
414            header_row=header_row,
415            column_names=column_names,
416            skip_rows=skip_rows,
417            n_rows=n_rows,
418            schema_sample_rows=schema_sample_rows,
419            dtype_coercion=dtype_coercion,
420            use_columns=use_columns,
421            dtypes=dtypes,
422            eager=True,
423        )

Loads a sheet eagerly by index or name.

For xlsx files, this will be faster and more memory-efficient, as it will use worksheet_range_ref under the hood, which returns borrowed types.

Refer to load_sheet for parameter documentation

def load_sheet_by_name( self, name: str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
425    def load_sheet_by_name(
426        self,
427        name: str,
428        *,
429        header_row: int | None = 0,
430        column_names: list[str] | None = None,
431        skip_rows: int | None = None,
432        n_rows: int | None = None,
433        schema_sample_rows: int | None = 1_000,
434        dtype_coercion: Literal["coerce", "strict"] = "coerce",
435        use_columns: list[str]
436        | list[int]
437        | str
438        | Callable[[ColumnInfoNoDtype], bool]
439        | None = None,
440        dtypes: DType | DTypeMap | None = None,
441    ) -> ExcelSheet:
442        """Loads a sheet by name.
443
444        Refer to `load_sheet` for parameter documentation
445        """
446        return self.load_sheet(
447            name,
448            header_row=header_row,
449            column_names=column_names,
450            skip_rows=skip_rows,
451            n_rows=n_rows,
452            schema_sample_rows=schema_sample_rows,
453            dtype_coercion=dtype_coercion,
454            use_columns=use_columns,
455            dtypes=dtypes,
456        )

Loads a sheet by name.

Refer to load_sheet for parameter documentation

def load_sheet_by_idx( self, idx: int, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfoNoDtype], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
458    def load_sheet_by_idx(
459        self,
460        idx: int,
461        *,
462        header_row: int | None = 0,
463        column_names: list[str] | None = None,
464        skip_rows: int | None = None,
465        n_rows: int | None = None,
466        schema_sample_rows: int | None = 1_000,
467        dtype_coercion: Literal["coerce", "strict"] = "coerce",
468        use_columns: list[str]
469        | list[int]
470        | str
471        | Callable[[ColumnInfoNoDtype], bool]
472        | None = None,
473        dtypes: DType | DTypeMap | None = None,
474    ) -> ExcelSheet:
475        """Loads a sheet by index.
476
477        Refer to `load_sheet` for parameter documentation
478        """
479        return self.load_sheet(
480            idx,
481            header_row=header_row,
482            column_names=column_names,
483            skip_rows=skip_rows,
484            n_rows=n_rows,
485            schema_sample_rows=schema_sample_rows,
486            dtype_coercion=dtype_coercion,
487            use_columns=use_columns,
488            dtypes=dtypes,
489        )

Loads a sheet by index.

Refer to load_sheet for parameter documentation

class ExcelSheet:
 56class ExcelSheet:
 57    """A class representing a single sheet in an Excel File"""
 58
 59    def __init__(self, sheet: _ExcelSheet) -> None:
 60        self._sheet = sheet
 61
 62    @property
 63    def name(self) -> str:
 64        """The name of the sheet"""
 65        return self._sheet.name
 66
 67    @property
 68    def width(self) -> int:
 69        """The sheet's width"""
 70        return self._sheet.width
 71
 72    @property
 73    def height(self) -> int:
 74        """The sheet's height, with `skip_rows` and `nrows` applied"""
 75        return self._sheet.height
 76
 77    @property
 78    def total_height(self) -> int:
 79        """The sheet's total height"""
 80        return self._sheet.total_height
 81
 82    @property
 83    def selected_columns(self) -> list[ColumnInfo]:
 84        """The sheet's selected columns"""
 85        return self._sheet.selected_columns
 86
 87    def available_columns(self) -> list[ColumnInfo]:
 88        """The columns available for the given sheet"""
 89        return self._sheet.available_columns()
 90
 91    @property
 92    def specified_dtypes(self) -> DTypeMap | None:
 93        """The dtypes specified for the sheet"""
 94        return self._sheet.specified_dtypes
 95
 96    @property
 97    def visible(self) -> SheetVisible:
 98        """The visibility of the sheet"""
 99        return self._sheet.visible
100
101    def to_arrow(self) -> pa.RecordBatch:
102        """Converts the sheet to a pyarrow `RecordBatch`"""
103        return self._sheet.to_arrow()
104
105    def to_pandas(self) -> "pd.DataFrame":
106        """Converts the sheet to a Pandas `DataFrame`.
107
108        Requires the `pandas` extra to be installed.
109        """
110        # We know for sure that the sheet will yield exactly one RecordBatch
111        return self.to_arrow().to_pandas()
112
113    def to_polars(self) -> "pl.DataFrame":
114        """Converts the sheet to a Polars `DataFrame`.
115
116        Requires the `polars` extra to be installed.
117        """
118        return _recordbatch_to_polars(self.to_arrow())
119
120    def __repr__(self) -> str:
121        return self._sheet.__repr__()

A class representing a single sheet in an Excel File

ExcelSheet(sheet: _ExcelSheet)
59    def __init__(self, sheet: _ExcelSheet) -> None:
60        self._sheet = sheet
name: str
62    @property
63    def name(self) -> str:
64        """The name of the sheet"""
65        return self._sheet.name

The name of the sheet

width: int
67    @property
68    def width(self) -> int:
69        """The sheet's width"""
70        return self._sheet.width

The sheet's width

height: int
72    @property
73    def height(self) -> int:
74        """The sheet's height, with `skip_rows` and `nrows` applied"""
75        return self._sheet.height

The sheet's height, with skip_rows and nrows applied

total_height: int
77    @property
78    def total_height(self) -> int:
79        """The sheet's total height"""
80        return self._sheet.total_height

The sheet's total height

selected_columns: list[ColumnInfo]
82    @property
83    def selected_columns(self) -> list[ColumnInfo]:
84        """The sheet's selected columns"""
85        return self._sheet.selected_columns

The sheet's selected columns

def available_columns(self) -> list[ColumnInfo]:
87    def available_columns(self) -> list[ColumnInfo]:
88        """The columns available for the given sheet"""
89        return self._sheet.available_columns()

The columns available for the given sheet

specified_dtypes: 'DTypeMap | None'
91    @property
92    def specified_dtypes(self) -> DTypeMap | None:
93        """The dtypes specified for the sheet"""
94        return self._sheet.specified_dtypes

The dtypes specified for the sheet

visible: Literal['visible', 'hidden', 'veryhidden']
96    @property
97    def visible(self) -> SheetVisible:
98        """The visibility of the sheet"""
99        return self._sheet.visible

The visibility of the sheet

def to_arrow(self) -> pyarrow.lib.RecordBatch:
101    def to_arrow(self) -> pa.RecordBatch:
102        """Converts the sheet to a pyarrow `RecordBatch`"""
103        return self._sheet.to_arrow()

Converts the sheet to a pyarrow RecordBatch

def to_pandas(self) -> pandas.core.frame.DataFrame:
105    def to_pandas(self) -> "pd.DataFrame":
106        """Converts the sheet to a Pandas `DataFrame`.
107
108        Requires the `pandas` extra to be installed.
109        """
110        # We know for sure that the sheet will yield exactly one RecordBatch
111        return self.to_arrow().to_pandas()

Converts the sheet to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
113    def to_polars(self) -> "pl.DataFrame":
114        """Converts the sheet to a Polars `DataFrame`.
115
116        Requires the `polars` extra to be installed.
117        """
118        return _recordbatch_to_polars(self.to_arrow())

Converts the sheet to a Polars DataFrame.

Requires the polars extra to be installed.

DTypeFrom: TypeAlias = Literal['provided_by_index', 'provided_by_name', 'guessed']
ColumnNameFrom: TypeAlias = Literal['provided', 'looked_up', 'generated']
class ColumnInfo:

This class provides information about a single column in a sheet

dtype_from

fastexcel.DTypeFrom. How the dtype of the column was determined.

One of three possible values:

  • "provided_by_index": The dtype was specified via the column index
  • "provided_by_name": The dtype was specified via the column name
  • "guessed": The dtype was determined from the content of the column
dtype

fastexcel.DType. The dtype of the column

column_name_from

fastexcel.ColumnNameFrom. How the name of the column was determined.

One of three possible values:

  • "provided": The column name was provided via the use_columns parameter
  • "looked_up": The column name was looked up from the data found in the sheet
  • "generated": The column name was generated from the column index, either because header_row was None, or because it could not be looked up
index

int. The index of the column

name

str. The name of the column

class FastExcelError(builtins.Exception):

The base class for all fastexcel errors

class CannotRetrieveCellDataError(fastexcel.FastExcelError):

Data for a given cell cannot be retrieved

class CalamineCellError(fastexcel.FastExcelError):

calamine returned an error regarding the content of the cell

class CalamineError(fastexcel.FastExcelError):

Generic calamine error

class SheetNotFoundError(fastexcel.FastExcelError):

Sheet was not found

class ColumnNotFoundError(fastexcel.FastExcelError):

Column was not found

class ArrowError(fastexcel.FastExcelError):

Generic arrow error

class InvalidParametersError(fastexcel.FastExcelError):

Provided parameters are invalid

class UnsupportedColumnTypeCombinationError(fastexcel.FastExcelError):

Column contains an unsupported type combination