simpleml.save_patterns.serializers.pandas

Pandas Save/Load Utils and Patterns

Module Contents

Classes

PandasCSVSerializer

PandasJSONSerializer

PandasParquetSerializer

PandasPersistenceMethods

Base class for internal Pandas serialization/deserialization options

Attributes

__author__

simpleml.save_patterns.serializers.pandas.__author__ = Elisha Yadgaran[source]
class simpleml.save_patterns.serializers.pandas.PandasCSVSerializer[source]

Bases: simpleml.save_patterns.base.BaseSerializer

static deserialize(filepath, source_directory='system_temp', **kwargs)[source]
Parameters
  • filepath (str) –

  • source_directory (str) –

Return type

Dict[str, pandas.DataFrame]

static serialize(obj, filepath, format_directory=CSV_DIRECTORY, format_extension='.csv', destination_directory='system_temp', **kwargs)[source]
Parameters
  • obj (pandas.DataFrame) –

  • filepath (str) –

  • format_directory (str) –

  • format_extension (str) –

  • destination_directory (str) –

Return type

Dict[str, str]

class simpleml.save_patterns.serializers.pandas.PandasJSONSerializer[source]

Bases: simpleml.save_patterns.base.BaseSerializer

static deserialize(filepath, source_directory='system_temp', **kwargs)[source]
Parameters
  • filepath (str) –

  • source_directory (str) –

Return type

Dict[str, pandas.DataFrame]

static serialize(obj, filepath, format_directory=JSON_DIRECTORY, format_extension='.jsonl', destination_directory='system_temp', **kwargs)[source]
Parameters
  • obj (pandas.DataFrame) –

  • filepath (str) –

  • format_directory (str) –

  • format_extension (str) –

  • destination_directory (str) –

Return type

Dict[str, str]

class simpleml.save_patterns.serializers.pandas.PandasParquetSerializer[source]

Bases: simpleml.save_patterns.base.BaseSerializer

static deserialize(filepath, source_directory='system_temp', **kwargs)[source]
Parameters
  • filepath (str) –

  • source_directory (str) –

Return type

Dict[str, pandas.DataFrame]

static serialize(obj, filepath, format_directory=PARQUET_DIRECTORY, format_extension='.parquet', destination_directory='system_temp', **kwargs)[source]
Parameters
  • obj (pandas.DataFrame) –

  • filepath (str) –

  • format_directory (str) –

  • format_extension (str) –

  • destination_directory (str) –

Return type

Dict[str, str]

class simpleml.save_patterns.serializers.pandas.PandasPersistenceMethods[source]

Bases: object

Base class for internal Pandas serialization/deserialization options

Wraps pd.Dataframe methods with sensible defaults

https://pandas.pydata.org/docs/reference/io.html https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html

INDEX_COLUMN = simpleml_index[source]
static df_to_sql(engine, df, table, dtype=None, schema='public', if_exists='replace', sep='|', encoding='utf8', index=False)[source]

Utility to bulk insert pandas dataframe via copy from

Parameters
  • df (pandas.DataFrame) – dataframe to insert

  • table (str) – destination table

  • dtype (Optional[Dict[str, str]]) – column schema of destination table

  • schema (str) – destination schema

  • if_exists (str) – what to do if destination table exists; valid inputs are:

  • sep (str) –

  • encoding (str) –

  • index (bool) –

Return type

None

[replace, append, fail] :param sep: separator key between cells :param encoding: character encoding to use :param index: whether to output index with data

static read_bigquery(**kwargs)[source]
Return type

pandas.DataFrame

static read_clipboard(**kwargs)[source]
Return type

pandas.DataFrame

classmethod read_csv(cls, filename, **kwargs)[source]

Helper method to read in a csv file

Parameters

filename (str) –

Return type

pandas.DataFrame

static read_excel(**kwargs)[source]
Return type

pandas.DataFrame

static read_feather(**kwargs)[source]
Return type

pandas.DataFrame

static read_fwf(**kwargs)[source]
Return type

pandas.DataFrame

static read_hdf(filepath, **kwargs)[source]
Parameters

filepath (str) –

Return type

pandas.DataFrame

static read_html(**kwargs)[source]
Return type

pandas.DataFrame

classmethod read_json(cls, filepath, orient='records', lines=True, **kwargs)[source]
Parameters
  • filepath (str) –

  • orient (str) –

  • lines (bool) –

Return type

pandas.DataFrame

static read_orc(filepath, **kwargs)[source]
Parameters

filepath (str) –

Return type

pandas.DataFrame

static read_parquet(filepath, **kwargs)[source]
Parameters

filepath (str) –

Return type

pandas.DataFrame

static read_pickle(**kwargs)[source]
Return type

pandas.DataFrame

static read_sas(**kwargs)[source]
Return type

pandas.DataFrame

static read_spss(**kwargs)[source]
Return type

pandas.DataFrame

static read_sql(**kwargs)[source]
Return type

pandas.DataFrame

static read_sql_query(query, connection, **kwargs)[source]

Helper method to read in sql data

Parameters

query (str) –

Return type

pandas.DataFrame

static read_sql_table(**kwargs)[source]
Return type

pandas.DataFrame

static read_stata(**kwargs)[source]
Return type

pandas.DataFrame

static read_table(**kwargs)[source]
Return type

pandas.DataFrame

static read_xml(**kwargs)[source]
Return type

pandas.DataFrame

static to_clipboard(df, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • overwrite (bool) –

Return type

None

classmethod to_csv(cls, df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

static to_excel(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

static to_feather(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

static to_html(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

classmethod to_json(cls, df, filepath, overwrite=True, lines=True, orient='records', **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

  • lines (bool) –

  • orient (str) –

Return type

None

static to_latex(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

static to_parquet(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

static to_pickle(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

static to_stata(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None

static to_xml(df, filepath, overwrite=True, **kwargs)[source]
Parameters
  • df (pandas.DataFrame) –

  • filepath (str) –

  • overwrite (bool) –

Return type

None