fastexcel

  1from __future__ import annotations
  2
  3import sys
  4import typing
  5from typing import TYPE_CHECKING, Callable, Literal
  6
  7if sys.version_info < (3, 10):
  8    from typing_extensions import TypeAlias
  9else:
 10    from typing import TypeAlias
 11
 12if TYPE_CHECKING:
 13    import pandas as pd
 14    import polars as pl
 15
 16from os.path import expanduser
 17from pathlib import Path
 18
 19import pyarrow as pa
 20
 21from ._fastexcel import (
 22    ArrowError,
 23    CalamineCellError,
 24    CalamineError,
 25    CannotRetrieveCellDataError,
 26    ColumnInfo,
 27    ColumnNotFoundError,
 28    FastExcelError,
 29    InvalidParametersError,
 30    SheetNotFoundError,
 31    UnsupportedColumnTypeCombinationError,
 32    __version__,
 33    _ExcelReader,
 34    _ExcelSheet,
 35    _ExcelTable,
 36)
 37from ._fastexcel import read_excel as _read_excel
 38
 39DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
 40DTypeMap: TypeAlias = "dict[str | int, DType]"
 41ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
 42DTypeFrom: TypeAlias = Literal["provided_by_index", "provided_by_name", "guessed"]
 43SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
 44
 45
 46def _recordbatch_to_polars(rb: pa.RecordBatch) -> pl.DataFrame:
 47    import polars as pl
 48
 49    df = pl.from_arrow(data=rb)
 50    assert isinstance(df, pl.DataFrame)
 51    return df
 52
 53
 54class ExcelSheet:
 55    """A class representing a single sheet in an Excel File"""
 56
 57    def __init__(self, sheet: _ExcelSheet) -> None:
 58        self._sheet = sheet
 59
 60    @property
 61    def name(self) -> str:
 62        """The name of the sheet"""
 63        return self._sheet.name
 64
 65    @property
 66    def width(self) -> int:
 67        """The sheet's width"""
 68        return self._sheet.width
 69
 70    @property
 71    def height(self) -> int:
 72        """The sheet's height, with `skip_rows` and `nrows` applied"""
 73        return self._sheet.height
 74
 75    @property
 76    def total_height(self) -> int:
 77        """The sheet's total height"""
 78        return self._sheet.total_height
 79
 80    @property
 81    def selected_columns(self) -> list[ColumnInfo]:
 82        """The sheet's selected columns"""
 83        return self._sheet.selected_columns
 84
 85    @property
 86    def available_columns(self) -> list[ColumnInfo]:
 87        """The columns available for the given sheet"""
 88        return self._sheet.available_columns
 89
 90    @property
 91    def specified_dtypes(self) -> DTypeMap | None:
 92        """The dtypes specified for the sheet"""
 93        return self._sheet.specified_dtypes
 94
 95    @property
 96    def visible(self) -> SheetVisible:
 97        """The visibility of the sheet"""
 98        return self._sheet.visible
 99
100    def to_arrow(self) -> pa.RecordBatch:
101        """Converts the sheet to a pyarrow `RecordBatch`"""
102        return self._sheet.to_arrow()
103
104    def to_pandas(self) -> "pd.DataFrame":
105        """Converts the sheet to a Pandas `DataFrame`.
106
107        Requires the `pandas` extra to be installed.
108        """
109        # We know for sure that the sheet will yield exactly one RecordBatch
110        return self.to_arrow().to_pandas()
111
112    def to_polars(self) -> "pl.DataFrame":
113        """Converts the sheet to a Polars `DataFrame`.
114
115        Requires the `polars` extra to be installed.
116        """
117        return _recordbatch_to_polars(self.to_arrow())
118
119    def __repr__(self) -> str:
120        return self._sheet.__repr__()
121
122
123class ExcelTable:
124    """A class representing a single table in an Excel file"""
125
126    def __init__(self, table: _ExcelTable) -> None:
127        self._table = table
128
129    @property
130    def name(self) -> str:
131        """The name of the table"""
132        return self._table.name
133
134    @property
135    def sheet_name(self) -> str:
136        """The name of the sheet this table belongs to"""
137        return self._table.sheet_name
138
139    @property
140    def width(self) -> int:
141        """The table's width"""
142        return self._table.width
143
144    @property
145    def height(self) -> int:
146        """The table's height"""
147        return self._table.height
148
149    @property
150    def total_height(self) -> int:
151        """The table's total height"""
152        return self._table.total_height
153
154    @property
155    def offset(self) -> int:
156        """The table's offset before data starts"""
157        return self._table.offset
158
159    @property
160    def selected_columns(self) -> list[ColumnInfo]:
161        """The table's selected columns"""
162        return self._table.selected_columns
163
164    @property
165    def available_columns(self) -> list[ColumnInfo]:
166        """The columns available for the given table"""
167        return self._table.available_columns
168
169    @property
170    def specified_dtypes(self) -> DTypeMap | None:
171        """The dtypes specified for the table"""
172        return self._table.specified_dtypes
173
174    def to_arrow(self) -> pa.RecordBatch:
175        """Converts the table to a pyarrow `RecordBatch`"""
176        return self._table.to_arrow()
177
178    def to_pandas(self) -> "pd.DataFrame":
179        """Converts the table to a Pandas `DataFrame`.
180
181        Requires the `pandas` extra to be installed.
182        """
183        # We know for sure that the table will yield exactly one RecordBatch
184        return self.to_arrow().to_pandas()
185
186    def to_polars(self) -> "pl.DataFrame":
187        """Converts the table to a Polars `DataFrame`.
188
189        Requires the `polars` extra to be installed.
190        """
191        return _recordbatch_to_polars(self.to_arrow())
192
193
194class ExcelReader:
195    """A class representing an open Excel file and allowing to read its sheets"""
196
197    def __init__(self, reader: _ExcelReader) -> None:
198        self._reader = reader
199
200    @property
201    def sheet_names(self) -> list[str]:
202        """The list of sheet names"""
203        return self._reader.sheet_names
204
205    def load_sheet(
206        self,
207        idx_or_name: int | str,
208        *,
209        header_row: int | None = 0,
210        column_names: list[str] | None = None,
211        skip_rows: int | None = None,
212        n_rows: int | None = None,
213        schema_sample_rows: int | None = 1_000,
214        dtype_coercion: Literal["coerce", "strict"] = "coerce",
215        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
216        dtypes: DType | DTypeMap | None = None,
217    ) -> ExcelSheet:
218        """Loads a sheet lazily by index or name.
219
220        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
221        :param header_row: The index of the row containing the column labels, default index is 0.
222                           If `None`, the sheet does not have any column labels.
223                           Any rows before the `header_row` will be automatically skipped.
224        :param column_names: Overrides headers found in the document.
225                             If `column_names` is used, `header_row` will be ignored.
226        :param n_rows: Specifies how many rows should be loaded.
227                       If `None`, all rows are loaded
228        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
229                          Any rows before the `header_row` are automatically skipped.
230                          If `header_row` is `None`:
231                            - if `skip_rows` is `None` (default): it skips all empty rows
232                            at the beginning of the sheet.
233                            - if `skip_rows` is a number, it skips the specified number
234                            of rows from the start of the sheet.
235        :param schema_sample_rows: Specifies how many rows should be used to determine
236                                   the dtype of a column. Cannot be 0. A specific dtype can be
237                                   enforced for some or all columns through the `dtypes` parameter.
238                                   If `None`, all rows will be used.
239        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
240                               will try to coerce different dtypes in a column to the same one,
241                               whereas `strict` will raise an error in case a column contains
242                               several dtypes. Note that this only applies to columns whose dtype
243                               is guessed, i.e. not specified via `dtypes`.
244        :param use_columns: Specifies the columns to use. Can either be:
245                            - `None` to select all columns
246                            - A list of strings and ints, the column names and/or indices
247                              (starting at 0)
248                            - A string, a comma separated list of Excel column letters and column
249                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
250                              `A,B,C,D,E` and `A,C,E,F`)
251                            - A callable, a function that takes a column and returns a boolean
252                              indicating whether the column should be used
253        :param dtypes: An optional dtype (for all columns)
254                       or dict of dtypes with keys as column indices or names.
255        """
256        return ExcelSheet(
257            self._reader.load_sheet(
258                idx_or_name=idx_or_name,
259                header_row=header_row,
260                column_names=column_names,
261                skip_rows=skip_rows,
262                n_rows=n_rows,
263                schema_sample_rows=schema_sample_rows,
264                dtype_coercion=dtype_coercion,
265                use_columns=use_columns,
266                dtypes=dtypes,
267                eager=False,
268            )
269        )
270
271    def table_names(self, sheet_name: str | None = None) -> list[str]:
272        """The list of table names.
273
274        Will return an empty list if no tables are found.
275
276        :param sheet_name: If given, will limit the list to the given sheet, will be faster
277        too.
278        """
279        return self._reader.table_names(sheet_name)
280
281    @typing.overload
282    def load_table(
283        self,
284        name: str,
285        *,
286        header_row: int | None = None,
287        column_names: list[str] | None = None,
288        skip_rows: int = 0,
289        n_rows: int | None = None,
290        schema_sample_rows: int | None = 1_000,
291        dtype_coercion: Literal["coerce", "strict"] = "coerce",
292        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
293        dtypes: DType | DTypeMap | None = None,
294        eager: Literal[False] = ...,
295    ) -> ExcelTable: ...
296    @typing.overload
297    def load_table(
298        self,
299        name: str,
300        *,
301        header_row: int | None = None,
302        column_names: list[str] | None = None,
303        skip_rows: int = 0,
304        n_rows: int | None = None,
305        schema_sample_rows: int | None = 1_000,
306        dtype_coercion: Literal["coerce", "strict"] = "coerce",
307        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
308        dtypes: DType | DTypeMap | None = None,
309        eager: Literal[True] = ...,
310    ) -> pa.RecordBatch: ...
311    def load_table(
312        self,
313        name: str,
314        *,
315        header_row: int | None = None,
316        column_names: list[str] | None = None,
317        skip_rows: int = 0,
318        n_rows: int | None = None,
319        schema_sample_rows: int | None = 1_000,
320        dtype_coercion: Literal["coerce", "strict"] = "coerce",
321        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
322        dtypes: DType | DTypeMap | None = None,
323        eager: bool = False,
324    ) -> ExcelTable | pa.RecordBatch:
325        """Loads a table by name.
326
327        :param name: The name of the table to load.
328        :param header_row: The index of the row containing the column labels.
329                           If `None`, the table's column names will be used.
330                           Any rows before the `header_row` will be automatically skipped.
331        :param column_names: Overrides headers found in the document.
332                             If `column_names` is used, `header_row` will be ignored.
333        :param n_rows: Specifies how many rows should be loaded.
334                       If `None`, all rows are loaded
335        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
336                          Any rows before the `header_row` are automatically skipped.
337                          If `header_row` is `None`, it skips the number of rows from the
338                          start of the sheet.
339        :param schema_sample_rows: Specifies how many rows should be used to determine
340                                   the dtype of a column. Cannot be 0. A specific dtype can be
341                                   enforced for some or all columns through the `dtypes` parameter.
342                                   If `None`, all rows will be used.
343        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
344                               will try to coerce different dtypes in a column to the same one,
345                               whereas `strict` will raise an error in case a column contains
346                               several dtypes. Note that this only applies to columns whose dtype
347                               is guessed, i.e. not specified via `dtypes`.
348        :param use_columns: Specifies the columns to use. Can either be:
349                            - `None` to select all columns
350                            - A list of strings and ints, the column names and/or indices
351                              (starting at 0)
352                            - A string, a comma separated list of Excel column letters and column
353                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
354                              `A,B,C,D,E` and `A,C,E,F`)
355                            - A callable, a function that takes a column and returns a boolean
356                              indicating whether the column should be used
357        :param dtypes: An optional dtype (for all columns)
358                       or dict of dtypes with keys as column indices or names.
359        """
360        output = self._reader.load_table(  # type:ignore[call-overload,misc]
361            name=name,
362            header_row=header_row,
363            column_names=column_names,
364            skip_rows=skip_rows,
365            n_rows=n_rows,
366            schema_sample_rows=schema_sample_rows,
367            dtype_coercion=dtype_coercion,
368            use_columns=use_columns,
369            dtypes=dtypes,
370            eager=eager,
371        )
372        if eager:
373            return output
374        return ExcelTable(output)
375
376    def load_sheet_eager(
377        self,
378        idx_or_name: int | str,
379        *,
380        header_row: int | None = 0,
381        column_names: list[str] | None = None,
382        skip_rows: int | None = None,
383        n_rows: int | None = None,
384        schema_sample_rows: int | None = 1_000,
385        dtype_coercion: Literal["coerce", "strict"] = "coerce",
386        use_columns: list[str] | list[int] | str | None = None,
387        dtypes: DType | DTypeMap | None = None,
388    ) -> pa.RecordBatch:
389        """Loads a sheet eagerly by index or name.
390
391        For xlsx files, this will be faster and more memory-efficient, as it will use
392        `worksheet_range_ref` under the hood, which returns borrowed types.
393
394        Refer to `load_sheet` for parameter documentation
395        """
396        return self._reader.load_sheet(
397            idx_or_name=idx_or_name,
398            header_row=header_row,
399            column_names=column_names,
400            skip_rows=skip_rows,
401            n_rows=n_rows,
402            schema_sample_rows=schema_sample_rows,
403            dtype_coercion=dtype_coercion,
404            use_columns=use_columns,
405            dtypes=dtypes,
406            eager=True,
407        )
408
409    def load_sheet_by_name(
410        self,
411        name: str,
412        *,
413        header_row: int | None = 0,
414        column_names: list[str] | None = None,
415        skip_rows: int | None = None,
416        n_rows: int | None = None,
417        schema_sample_rows: int | None = 1_000,
418        dtype_coercion: Literal["coerce", "strict"] = "coerce",
419        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
420        dtypes: DType | DTypeMap | None = None,
421    ) -> ExcelSheet:
422        """Loads a sheet by name.
423
424        Refer to `load_sheet` for parameter documentation
425        """
426        return self.load_sheet(
427            name,
428            header_row=header_row,
429            column_names=column_names,
430            skip_rows=skip_rows,
431            n_rows=n_rows,
432            schema_sample_rows=schema_sample_rows,
433            dtype_coercion=dtype_coercion,
434            use_columns=use_columns,
435            dtypes=dtypes,
436        )
437
438    def load_sheet_by_idx(
439        self,
440        idx: int,
441        *,
442        header_row: int | None = 0,
443        column_names: list[str] | None = None,
444        skip_rows: int | None = None,
445        n_rows: int | None = None,
446        schema_sample_rows: int | None = 1_000,
447        dtype_coercion: Literal["coerce", "strict"] = "coerce",
448        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
449        dtypes: DType | DTypeMap | None = None,
450    ) -> ExcelSheet:
451        """Loads a sheet by index.
452
453        Refer to `load_sheet` for parameter documentation
454        """
455        return self.load_sheet(
456            idx,
457            header_row=header_row,
458            column_names=column_names,
459            skip_rows=skip_rows,
460            n_rows=n_rows,
461            schema_sample_rows=schema_sample_rows,
462            dtype_coercion=dtype_coercion,
463            use_columns=use_columns,
464            dtypes=dtypes,
465        )
466
467    def __repr__(self) -> str:
468        return self._reader.__repr__()
469
470
471def read_excel(source: Path | str | bytes) -> ExcelReader:
472    """Opens and loads an excel file.
473
474    :param source: The path to a file or its content as bytes
475    """
476    if isinstance(source, (str, Path)):
477        source = expanduser(source)
478    return ExcelReader(_read_excel(source))
479
480
481__all__ = (
482    ## version
483    "__version__",
484    ## main entrypoint
485    "read_excel",
486    ## Python types
487    "DType",
488    "DTypeMap",
489    # Excel reader
490    "ExcelReader",
491    # Excel sheet
492    "ExcelSheet",
493    # Column metadata
494    "DTypeFrom",
495    "ColumnNameFrom",
496    "ColumnInfo",
497    # Exceptions
498    "FastExcelError",
499    "CannotRetrieveCellDataError",
500    "CalamineCellError",
501    "CalamineError",
502    "SheetNotFoundError",
503    "ColumnNotFoundError",
504    "ArrowError",
505    "InvalidParametersError",
506    "UnsupportedColumnTypeCombinationError",
507)
__version__ = '0.12.1'
def read_excel(source: pathlib.Path | str | bytes) -> ExcelReader:
472def read_excel(source: Path | str | bytes) -> ExcelReader:
473    """Opens and loads an excel file.
474
475    :param source: The path to a file or its content as bytes
476    """
477    if isinstance(source, (str, Path)):
478        source = expanduser(source)
479    return ExcelReader(_read_excel(source))

Opens and loads an excel file.

Parameters
  • source: The path to a file or its content as bytes
DType = typing.Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']
DTypeMap: TypeAlias = 'dict[str | int, DType]'
class ExcelReader:
195class ExcelReader:
196    """A class representing an open Excel file and allowing to read its sheets"""
197
198    def __init__(self, reader: _ExcelReader) -> None:
199        self._reader = reader
200
201    @property
202    def sheet_names(self) -> list[str]:
203        """The list of sheet names"""
204        return self._reader.sheet_names
205
206    def load_sheet(
207        self,
208        idx_or_name: int | str,
209        *,
210        header_row: int | None = 0,
211        column_names: list[str] | None = None,
212        skip_rows: int | None = None,
213        n_rows: int | None = None,
214        schema_sample_rows: int | None = 1_000,
215        dtype_coercion: Literal["coerce", "strict"] = "coerce",
216        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
217        dtypes: DType | DTypeMap | None = None,
218    ) -> ExcelSheet:
219        """Loads a sheet lazily by index or name.
220
221        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
222        :param header_row: The index of the row containing the column labels, default index is 0.
223                           If `None`, the sheet does not have any column labels.
224                           Any rows before the `header_row` will be automatically skipped.
225        :param column_names: Overrides headers found in the document.
226                             If `column_names` is used, `header_row` will be ignored.
227        :param n_rows: Specifies how many rows should be loaded.
228                       If `None`, all rows are loaded
229        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
230                          Any rows before the `header_row` are automatically skipped.
231                          If `header_row` is `None`:
232                            - if `skip_rows` is `None` (default): it skips all empty rows
233                            at the beginning of the sheet.
234                            - if `skip_rows` is a number, it skips the specified number
235                            of rows from the start of the sheet.
236        :param schema_sample_rows: Specifies how many rows should be used to determine
237                                   the dtype of a column. Cannot be 0. A specific dtype can be
238                                   enforced for some or all columns through the `dtypes` parameter.
239                                   If `None`, all rows will be used.
240        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
241                               will try to coerce different dtypes in a column to the same one,
242                               whereas `strict` will raise an error in case a column contains
243                               several dtypes. Note that this only applies to columns whose dtype
244                               is guessed, i.e. not specified via `dtypes`.
245        :param use_columns: Specifies the columns to use. Can either be:
246                            - `None` to select all columns
247                            - A list of strings and ints, the column names and/or indices
248                              (starting at 0)
249                            - A string, a comma separated list of Excel column letters and column
250                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
251                              `A,B,C,D,E` and `A,C,E,F`)
252                            - A callable, a function that takes a column and returns a boolean
253                              indicating whether the column should be used
254        :param dtypes: An optional dtype (for all columns)
255                       or dict of dtypes with keys as column indices or names.
256        """
257        return ExcelSheet(
258            self._reader.load_sheet(
259                idx_or_name=idx_or_name,
260                header_row=header_row,
261                column_names=column_names,
262                skip_rows=skip_rows,
263                n_rows=n_rows,
264                schema_sample_rows=schema_sample_rows,
265                dtype_coercion=dtype_coercion,
266                use_columns=use_columns,
267                dtypes=dtypes,
268                eager=False,
269            )
270        )
271
272    def table_names(self, sheet_name: str | None = None) -> list[str]:
273        """The list of table names.
274
275        Will return an empty list if no tables are found.
276
277        :param sheet_name: If given, will limit the list to the given sheet, will be faster
278        too.
279        """
280        return self._reader.table_names(sheet_name)
281
282    @typing.overload
283    def load_table(
284        self,
285        name: str,
286        *,
287        header_row: int | None = None,
288        column_names: list[str] | None = None,
289        skip_rows: int = 0,
290        n_rows: int | None = None,
291        schema_sample_rows: int | None = 1_000,
292        dtype_coercion: Literal["coerce", "strict"] = "coerce",
293        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
294        dtypes: DType | DTypeMap | None = None,
295        eager: Literal[False] = ...,
296    ) -> ExcelTable: ...
297    @typing.overload
298    def load_table(
299        self,
300        name: str,
301        *,
302        header_row: int | None = None,
303        column_names: list[str] | None = None,
304        skip_rows: int = 0,
305        n_rows: int | None = None,
306        schema_sample_rows: int | None = 1_000,
307        dtype_coercion: Literal["coerce", "strict"] = "coerce",
308        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
309        dtypes: DType | DTypeMap | None = None,
310        eager: Literal[True] = ...,
311    ) -> pa.RecordBatch: ...
312    def load_table(
313        self,
314        name: str,
315        *,
316        header_row: int | None = None,
317        column_names: list[str] | None = None,
318        skip_rows: int = 0,
319        n_rows: int | None = None,
320        schema_sample_rows: int | None = 1_000,
321        dtype_coercion: Literal["coerce", "strict"] = "coerce",
322        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
323        dtypes: DType | DTypeMap | None = None,
324        eager: bool = False,
325    ) -> ExcelTable | pa.RecordBatch:
326        """Loads a table by name.
327
328        :param name: The name of the table to load.
329        :param header_row: The index of the row containing the column labels.
330                           If `None`, the table's column names will be used.
331                           Any rows before the `header_row` will be automatically skipped.
332        :param column_names: Overrides headers found in the document.
333                             If `column_names` is used, `header_row` will be ignored.
334        :param n_rows: Specifies how many rows should be loaded.
335                       If `None`, all rows are loaded
336        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
337                          Any rows before the `header_row` are automatically skipped.
338                          If `header_row` is `None`, it skips the number of rows from the
339                          start of the sheet.
340        :param schema_sample_rows: Specifies how many rows should be used to determine
341                                   the dtype of a column. Cannot be 0. A specific dtype can be
342                                   enforced for some or all columns through the `dtypes` parameter.
343                                   If `None`, all rows will be used.
344        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
345                               will try to coerce different dtypes in a column to the same one,
346                               whereas `strict` will raise an error in case a column contains
347                               several dtypes. Note that this only applies to columns whose dtype
348                               is guessed, i.e. not specified via `dtypes`.
349        :param use_columns: Specifies the columns to use. Can either be:
350                            - `None` to select all columns
351                            - A list of strings and ints, the column names and/or indices
352                              (starting at 0)
353                            - A string, a comma separated list of Excel column letters and column
354                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
355                              `A,B,C,D,E` and `A,C,E,F`)
356                            - A callable, a function that takes a column and returns a boolean
357                              indicating whether the column should be used
358        :param dtypes: An optional dtype (for all columns)
359                       or dict of dtypes with keys as column indices or names.
360        """
361        output = self._reader.load_table(  # type:ignore[call-overload,misc]
362            name=name,
363            header_row=header_row,
364            column_names=column_names,
365            skip_rows=skip_rows,
366            n_rows=n_rows,
367            schema_sample_rows=schema_sample_rows,
368            dtype_coercion=dtype_coercion,
369            use_columns=use_columns,
370            dtypes=dtypes,
371            eager=eager,
372        )
373        if eager:
374            return output
375        return ExcelTable(output)
376
377    def load_sheet_eager(
378        self,
379        idx_or_name: int | str,
380        *,
381        header_row: int | None = 0,
382        column_names: list[str] | None = None,
383        skip_rows: int | None = None,
384        n_rows: int | None = None,
385        schema_sample_rows: int | None = 1_000,
386        dtype_coercion: Literal["coerce", "strict"] = "coerce",
387        use_columns: list[str] | list[int] | str | None = None,
388        dtypes: DType | DTypeMap | None = None,
389    ) -> pa.RecordBatch:
390        """Loads a sheet eagerly by index or name.
391
392        For xlsx files, this will be faster and more memory-efficient, as it will use
393        `worksheet_range_ref` under the hood, which returns borrowed types.
394
395        Refer to `load_sheet` for parameter documentation
396        """
397        return self._reader.load_sheet(
398            idx_or_name=idx_or_name,
399            header_row=header_row,
400            column_names=column_names,
401            skip_rows=skip_rows,
402            n_rows=n_rows,
403            schema_sample_rows=schema_sample_rows,
404            dtype_coercion=dtype_coercion,
405            use_columns=use_columns,
406            dtypes=dtypes,
407            eager=True,
408        )
409
410    def load_sheet_by_name(
411        self,
412        name: str,
413        *,
414        header_row: int | None = 0,
415        column_names: list[str] | None = None,
416        skip_rows: int | None = None,
417        n_rows: int | None = None,
418        schema_sample_rows: int | None = 1_000,
419        dtype_coercion: Literal["coerce", "strict"] = "coerce",
420        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
421        dtypes: DType | DTypeMap | None = None,
422    ) -> ExcelSheet:
423        """Loads a sheet by name.
424
425        Refer to `load_sheet` for parameter documentation
426        """
427        return self.load_sheet(
428            name,
429            header_row=header_row,
430            column_names=column_names,
431            skip_rows=skip_rows,
432            n_rows=n_rows,
433            schema_sample_rows=schema_sample_rows,
434            dtype_coercion=dtype_coercion,
435            use_columns=use_columns,
436            dtypes=dtypes,
437        )
438
439    def load_sheet_by_idx(
440        self,
441        idx: int,
442        *,
443        header_row: int | None = 0,
444        column_names: list[str] | None = None,
445        skip_rows: int | None = None,
446        n_rows: int | None = None,
447        schema_sample_rows: int | None = 1_000,
448        dtype_coercion: Literal["coerce", "strict"] = "coerce",
449        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
450        dtypes: DType | DTypeMap | None = None,
451    ) -> ExcelSheet:
452        """Loads a sheet by index.
453
454        Refer to `load_sheet` for parameter documentation
455        """
456        return self.load_sheet(
457            idx,
458            header_row=header_row,
459            column_names=column_names,
460            skip_rows=skip_rows,
461            n_rows=n_rows,
462            schema_sample_rows=schema_sample_rows,
463            dtype_coercion=dtype_coercion,
464            use_columns=use_columns,
465            dtypes=dtypes,
466        )
467
468    def __repr__(self) -> str:
469        return self._reader.__repr__()

A class representing an open Excel file and allowing to read its sheets

ExcelReader(reader: _ExcelReader)
198    def __init__(self, reader: _ExcelReader) -> None:
199        self._reader = reader
sheet_names: list[str]
201    @property
202    def sheet_names(self) -> list[str]:
203        """The list of sheet names"""
204        return self._reader.sheet_names

The list of sheet names

def load_sheet( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfo], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
206    def load_sheet(
207        self,
208        idx_or_name: int | str,
209        *,
210        header_row: int | None = 0,
211        column_names: list[str] | None = None,
212        skip_rows: int | None = None,
213        n_rows: int | None = None,
214        schema_sample_rows: int | None = 1_000,
215        dtype_coercion: Literal["coerce", "strict"] = "coerce",
216        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
217        dtypes: DType | DTypeMap | None = None,
218    ) -> ExcelSheet:
219        """Loads a sheet lazily by index or name.
220
221        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
222        :param header_row: The index of the row containing the column labels, default index is 0.
223                           If `None`, the sheet does not have any column labels.
224                           Any rows before the `header_row` will be automatically skipped.
225        :param column_names: Overrides headers found in the document.
226                             If `column_names` is used, `header_row` will be ignored.
227        :param n_rows: Specifies how many rows should be loaded.
228                       If `None`, all rows are loaded
229        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
230                          Any rows before the `header_row` are automatically skipped.
231                          If `header_row` is `None`:
232                            - if `skip_rows` is `None` (default): it skips all empty rows
233                            at the beginning of the sheet.
234                            - if `skip_rows` is a number, it skips the specified number
235                            of rows from the start of the sheet.
236        :param schema_sample_rows: Specifies how many rows should be used to determine
237                                   the dtype of a column. Cannot be 0. A specific dtype can be
238                                   enforced for some or all columns through the `dtypes` parameter.
239                                   If `None`, all rows will be used.
240        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
241                               will try to coerce different dtypes in a column to the same one,
242                               whereas `strict` will raise an error in case a column contains
243                               several dtypes. Note that this only applies to columns whose dtype
244                               is guessed, i.e. not specified via `dtypes`.
245        :param use_columns: Specifies the columns to use. Can either be:
246                            - `None` to select all columns
247                            - A list of strings and ints, the column names and/or indices
248                              (starting at 0)
249                            - A string, a comma separated list of Excel column letters and column
250                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
251                              `A,B,C,D,E` and `A,C,E,F`)
252                            - A callable, a function that takes a column and returns a boolean
253                              indicating whether the column should be used
254        :param dtypes: An optional dtype (for all columns)
255                       or dict of dtypes with keys as column indices or names.
256        """
257        return ExcelSheet(
258            self._reader.load_sheet(
259                idx_or_name=idx_or_name,
260                header_row=header_row,
261                column_names=column_names,
262                skip_rows=skip_rows,
263                n_rows=n_rows,
264                schema_sample_rows=schema_sample_rows,
265                dtype_coercion=dtype_coercion,
266                use_columns=use_columns,
267                dtypes=dtypes,
268                eager=False,
269            )
270        )

Loads a sheet lazily by index or name.

Parameters
  • idx_or_name: The index (starting at 0) or the name of the sheet to load.
  • header_row: The index of the row containing the column labels, default index is 0. If None, the sheet does not have any column labels. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None:
    • if skip_rows is None (default): it skips all empty rows at the beginning of the sheet.
    • if skip_rows is a number, it skips the specified number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”, which would result in A,B,C,D,E and A,C,E,F)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def table_names(self, sheet_name: str | None = None) -> list[str]:
272    def table_names(self, sheet_name: str | None = None) -> list[str]:
273        """The list of table names.
274
275        Will return an empty list if no tables are found.
276
277        :param sheet_name: If given, will limit the list to the given sheet, will be faster
278        too.
279        """
280        return self._reader.table_names(sheet_name)

The list of table names.

Will return an empty list if no tables are found.

Parameters
  • sheet_name: If given, will limit the list to the given sheet, will be faster too.
def load_table( self, name: str, *, header_row: int | None = None, column_names: list[str] | None = None, skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfo], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None, eager: bool = False) -> fastexcel.ExcelTable | pyarrow.lib.RecordBatch:
312    def load_table(
313        self,
314        name: str,
315        *,
316        header_row: int | None = None,
317        column_names: list[str] | None = None,
318        skip_rows: int = 0,
319        n_rows: int | None = None,
320        schema_sample_rows: int | None = 1_000,
321        dtype_coercion: Literal["coerce", "strict"] = "coerce",
322        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
323        dtypes: DType | DTypeMap | None = None,
324        eager: bool = False,
325    ) -> ExcelTable | pa.RecordBatch:
326        """Loads a table by name.
327
328        :param name: The name of the table to load.
329        :param header_row: The index of the row containing the column labels.
330                           If `None`, the table's column names will be used.
331                           Any rows before the `header_row` will be automatically skipped.
332        :param column_names: Overrides headers found in the document.
333                             If `column_names` is used, `header_row` will be ignored.
334        :param n_rows: Specifies how many rows should be loaded.
335                       If `None`, all rows are loaded
336        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.
337                          Any rows before the `header_row` are automatically skipped.
338                          If `header_row` is `None`, it skips the number of rows from the
339                          start of the sheet.
340        :param schema_sample_rows: Specifies how many rows should be used to determine
341                                   the dtype of a column. Cannot be 0. A specific dtype can be
342                                   enforced for some or all columns through the `dtypes` parameter.
343                                   If `None`, all rows will be used.
344        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
345                               will try to coerce different dtypes in a column to the same one,
346                               whereas `strict` will raise an error in case a column contains
347                               several dtypes. Note that this only applies to columns whose dtype
348                               is guessed, i.e. not specified via `dtypes`.
349        :param use_columns: Specifies the columns to use. Can either be:
350                            - `None` to select all columns
351                            - A list of strings and ints, the column names and/or indices
352                              (starting at 0)
353                            - A string, a comma separated list of Excel column letters and column
354                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
355                              `A,B,C,D,E` and `A,C,E,F`)
356                            - A callable, a function that takes a column and returns a boolean
357                              indicating whether the column should be used
358        :param dtypes: An optional dtype (for all columns)
359                       or dict of dtypes with keys as column indices or names.
360        """
361        output = self._reader.load_table(  # type:ignore[call-overload,misc]
362            name=name,
363            header_row=header_row,
364            column_names=column_names,
365            skip_rows=skip_rows,
366            n_rows=n_rows,
367            schema_sample_rows=schema_sample_rows,
368            dtype_coercion=dtype_coercion,
369            use_columns=use_columns,
370            dtypes=dtypes,
371            eager=eager,
372        )
373        if eager:
374            return output
375        return ExcelTable(output)

Loads a table by name.

Parameters
  • name: The name of the table to load.
  • header_row: The index of the row containing the column labels. If None, the table's column names will be used. Any rows before the header_row will be automatically skipped.
  • column_names: Overrides headers found in the document. If column_names is used, header_row will be ignored.
  • n_rows: Specifies how many rows should be loaded. If None, all rows are loaded
  • skip_rows: Specifies how many rows should be skipped after the header_row. Any rows before the header_row are automatically skipped. If header_row is None, it skips the number of rows from the start of the sheet.
  • schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. Cannot be 0. A specific dtype can be enforced for some or all columns through the dtypes parameter. If None, all rows will be used.
  • dtype_coercion: Specifies how type coercion should behave. coerce (the default) will try to coerce different dtypes in a column to the same one, whereas strict will raise an error in case a column contains several dtypes. Note that this only applies to columns whose dtype is guessed, i.e. not specified via dtypes.
  • use_columns: Specifies the columns to use. Can either be:
    • None to select all columns
    • A list of strings and ints, the column names and/or indices (starting at 0)
    • A string, a comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”, which would result in A,B,C,D,E and A,C,E,F)
    • A callable, a function that takes a column and returns a boolean indicating whether the column should be used
  • dtypes: An optional dtype (for all columns) or dict of dtypes with keys as column indices or names.
def load_sheet_eager( self, idx_or_name: int | str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: list[str] | list[int] | str | None = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> pyarrow.lib.RecordBatch:
377    def load_sheet_eager(
378        self,
379        idx_or_name: int | str,
380        *,
381        header_row: int | None = 0,
382        column_names: list[str] | None = None,
383        skip_rows: int | None = None,
384        n_rows: int | None = None,
385        schema_sample_rows: int | None = 1_000,
386        dtype_coercion: Literal["coerce", "strict"] = "coerce",
387        use_columns: list[str] | list[int] | str | None = None,
388        dtypes: DType | DTypeMap | None = None,
389    ) -> pa.RecordBatch:
390        """Loads a sheet eagerly by index or name.
391
392        For xlsx files, this will be faster and more memory-efficient, as it will use
393        `worksheet_range_ref` under the hood, which returns borrowed types.
394
395        Refer to `load_sheet` for parameter documentation
396        """
397        return self._reader.load_sheet(
398            idx_or_name=idx_or_name,
399            header_row=header_row,
400            column_names=column_names,
401            skip_rows=skip_rows,
402            n_rows=n_rows,
403            schema_sample_rows=schema_sample_rows,
404            dtype_coercion=dtype_coercion,
405            use_columns=use_columns,
406            dtypes=dtypes,
407            eager=True,
408        )

Loads a sheet eagerly by index or name.

For xlsx files, this will be faster and more memory-efficient, as it will use worksheet_range_ref under the hood, which returns borrowed types.

Refer to load_sheet for parameter documentation

def load_sheet_by_name( self, name: str, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfo], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
410    def load_sheet_by_name(
411        self,
412        name: str,
413        *,
414        header_row: int | None = 0,
415        column_names: list[str] | None = None,
416        skip_rows: int | None = None,
417        n_rows: int | None = None,
418        schema_sample_rows: int | None = 1_000,
419        dtype_coercion: Literal["coerce", "strict"] = "coerce",
420        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
421        dtypes: DType | DTypeMap | None = None,
422    ) -> ExcelSheet:
423        """Loads a sheet by name.
424
425        Refer to `load_sheet` for parameter documentation
426        """
427        return self.load_sheet(
428            name,
429            header_row=header_row,
430            column_names=column_names,
431            skip_rows=skip_rows,
432            n_rows=n_rows,
433            schema_sample_rows=schema_sample_rows,
434            dtype_coercion=dtype_coercion,
435            use_columns=use_columns,
436            dtypes=dtypes,
437        )

Loads a sheet by name.

Refer to load_sheet for parameter documentation

def load_sheet_by_idx( self, idx: int, *, header_row: int | None = 0, column_names: list[str] | None = None, skip_rows: int | None = None, n_rows: int | None = None, schema_sample_rows: int | None = 1000, dtype_coercion: Literal['coerce', 'strict'] = 'coerce', use_columns: Union[list[str], list[int], str, Callable[[ColumnInfo], bool], NoneType] = None, dtypes: Union[Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration'], dict[str | int, Literal['null', 'int', 'float', 'string', 'boolean', 'datetime', 'date', 'duration']], NoneType] = None) -> ExcelSheet:
439    def load_sheet_by_idx(
440        self,
441        idx: int,
442        *,
443        header_row: int | None = 0,
444        column_names: list[str] | None = None,
445        skip_rows: int | None = None,
446        n_rows: int | None = None,
447        schema_sample_rows: int | None = 1_000,
448        dtype_coercion: Literal["coerce", "strict"] = "coerce",
449        use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
450        dtypes: DType | DTypeMap | None = None,
451    ) -> ExcelSheet:
452        """Loads a sheet by index.
453
454        Refer to `load_sheet` for parameter documentation
455        """
456        return self.load_sheet(
457            idx,
458            header_row=header_row,
459            column_names=column_names,
460            skip_rows=skip_rows,
461            n_rows=n_rows,
462            schema_sample_rows=schema_sample_rows,
463            dtype_coercion=dtype_coercion,
464            use_columns=use_columns,
465            dtypes=dtypes,
466        )

Loads a sheet by index.

Refer to load_sheet for parameter documentation

class ExcelSheet:
 55class ExcelSheet:
 56    """A class representing a single sheet in an Excel File"""
 57
 58    def __init__(self, sheet: _ExcelSheet) -> None:
 59        self._sheet = sheet
 60
 61    @property
 62    def name(self) -> str:
 63        """The name of the sheet"""
 64        return self._sheet.name
 65
 66    @property
 67    def width(self) -> int:
 68        """The sheet's width"""
 69        return self._sheet.width
 70
 71    @property
 72    def height(self) -> int:
 73        """The sheet's height, with `skip_rows` and `nrows` applied"""
 74        return self._sheet.height
 75
 76    @property
 77    def total_height(self) -> int:
 78        """The sheet's total height"""
 79        return self._sheet.total_height
 80
 81    @property
 82    def selected_columns(self) -> list[ColumnInfo]:
 83        """The sheet's selected columns"""
 84        return self._sheet.selected_columns
 85
 86    @property
 87    def available_columns(self) -> list[ColumnInfo]:
 88        """The columns available for the given sheet"""
 89        return self._sheet.available_columns
 90
 91    @property
 92    def specified_dtypes(self) -> DTypeMap | None:
 93        """The dtypes specified for the sheet"""
 94        return self._sheet.specified_dtypes
 95
 96    @property
 97    def visible(self) -> SheetVisible:
 98        """The visibility of the sheet"""
 99        return self._sheet.visible
100
101    def to_arrow(self) -> pa.RecordBatch:
102        """Converts the sheet to a pyarrow `RecordBatch`"""
103        return self._sheet.to_arrow()
104
105    def to_pandas(self) -> "pd.DataFrame":
106        """Converts the sheet to a Pandas `DataFrame`.
107
108        Requires the `pandas` extra to be installed.
109        """
110        # We know for sure that the sheet will yield exactly one RecordBatch
111        return self.to_arrow().to_pandas()
112
113    def to_polars(self) -> "pl.DataFrame":
114        """Converts the sheet to a Polars `DataFrame`.
115
116        Requires the `polars` extra to be installed.
117        """
118        return _recordbatch_to_polars(self.to_arrow())
119
120    def __repr__(self) -> str:
121        return self._sheet.__repr__()

A class representing a single sheet in an Excel File

ExcelSheet(sheet: _ExcelSheet)
58    def __init__(self, sheet: _ExcelSheet) -> None:
59        self._sheet = sheet
name: str
61    @property
62    def name(self) -> str:
63        """The name of the sheet"""
64        return self._sheet.name

The name of the sheet

width: int
66    @property
67    def width(self) -> int:
68        """The sheet's width"""
69        return self._sheet.width

The sheet's width

height: int
71    @property
72    def height(self) -> int:
73        """The sheet's height, with `skip_rows` and `nrows` applied"""
74        return self._sheet.height

The sheet's height, with skip_rows and nrows applied

total_height: int
76    @property
77    def total_height(self) -> int:
78        """The sheet's total height"""
79        return self._sheet.total_height

The sheet's total height

selected_columns: list[ColumnInfo]
81    @property
82    def selected_columns(self) -> list[ColumnInfo]:
83        """The sheet's selected columns"""
84        return self._sheet.selected_columns

The sheet's selected columns

available_columns: list[ColumnInfo]
86    @property
87    def available_columns(self) -> list[ColumnInfo]:
88        """The columns available for the given sheet"""
89        return self._sheet.available_columns

The columns available for the given sheet

specified_dtypes: 'DTypeMap | None'
91    @property
92    def specified_dtypes(self) -> DTypeMap | None:
93        """The dtypes specified for the sheet"""
94        return self._sheet.specified_dtypes

The dtypes specified for the sheet

visible: Literal['visible', 'hidden', 'veryhidden']
96    @property
97    def visible(self) -> SheetVisible:
98        """The visibility of the sheet"""
99        return self._sheet.visible

The visibility of the sheet

def to_arrow(self) -> pyarrow.lib.RecordBatch:
101    def to_arrow(self) -> pa.RecordBatch:
102        """Converts the sheet to a pyarrow `RecordBatch`"""
103        return self._sheet.to_arrow()

Converts the sheet to a pyarrow RecordBatch

def to_pandas(self) -> pandas.core.frame.DataFrame:
105    def to_pandas(self) -> "pd.DataFrame":
106        """Converts the sheet to a Pandas `DataFrame`.
107
108        Requires the `pandas` extra to be installed.
109        """
110        # We know for sure that the sheet will yield exactly one RecordBatch
111        return self.to_arrow().to_pandas()

Converts the sheet to a Pandas DataFrame.

Requires the pandas extra to be installed.

def to_polars(self) -> polars.dataframe.frame.DataFrame:
113    def to_polars(self) -> "pl.DataFrame":
114        """Converts the sheet to a Polars `DataFrame`.
115
116        Requires the `polars` extra to be installed.
117        """
118        return _recordbatch_to_polars(self.to_arrow())

Converts the sheet to a Polars DataFrame.

Requires the polars extra to be installed.

DTypeFrom: TypeAlias = Literal['provided_by_index', 'provided_by_name', 'guessed']
ColumnNameFrom: TypeAlias = Literal['provided', 'looked_up', 'generated']
class ColumnInfo:

This class provides information about a single column in a sheet

dtype

fastexcel.DType. The dtype of the column

index

int. The index of the column

name

str. The name of the column

column_name_from

fastexcel.ColumnNameFrom. How the name of the column was determined.

One of three possible values:

  • "provided": The column name was provided via the use_columns parameter
  • "looked_up": The column name was looked up from the data found in the sheet
  • "generated": The column name was generated from the column index, either because header_row was None, or because it could not be looked up
dtype_from

fastexcel.DTypeFrom. How the dtype of the column was determined.

One of three possible values:

  • "provided_by_index": The dtype was specified via the column index
  • "provided_by_name": The dtype was specified via the column name
  • "guessed": The dtype was determined from the content of the column
class FastExcelError(builtins.Exception):

The base class for all fastexcel errors

class CannotRetrieveCellDataError(fastexcel.FastExcelError):

Data for a given cell cannot be retrieved

class CalamineCellError(fastexcel.FastExcelError):

calamine returned an error regarding the content of the cell

class CalamineError(fastexcel.FastExcelError):

Generic calamine error

class SheetNotFoundError(fastexcel.FastExcelError):

Sheet was not found

class ColumnNotFoundError(fastexcel.FastExcelError):

Column was not found

class ArrowError(fastexcel.FastExcelError):

Generic arrow error

class InvalidParametersError(fastexcel.FastExcelError):

Provided parameters are invalid

class UnsupportedColumnTypeCombinationError(fastexcel.FastExcelError):

Column contains an unsupported type combination