Module ezspreadsheet
A simple API to store/load python objects to/from spreadsheets
Limitations
Currently only classes with 51 or less attributes are supported
Notes
- xlsx writing is significantly faster than csv currently
- When readable flag is set:
- values are deserailized as strings not their original types (list, tuple, dict etc.)
- Since csv is auto-interpreted for newline termination characters in most applications tabs are used as delimiters instead on csv files
Examples
Store some animal instances in a spreadsheet called 'animals.xlsx'
from ezspreadsheet import Spreadsheet
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
leopard_gecko = Animal('Leopard Gecko', 'Least Concern')
philippine_eagle = Animal('Philippine Eagle', 'Threatened')
with Spreadsheet('animals.xlsx', Animal) as output_sheet:
output_sheet.store(leopard_gecko, philippine_eagle)
Store a list of instances into a spreadsheet called 'users.csv'
from ezspreadsheet import Spreadsheet
import random
import string
from dataclasses import dataclass
@dataclass
class User():
Name:str
Age:int
Weight:int
Family: list # Note that Iterables will be flattened to a string with newline seperators
instances = []
ranstring = lambda: ''.join(random.choices(string.ascii_uppercase, k=10)) # Generates a random 10 character string
for i in range(1000):
instances.append(User(ranstring(), random.randint(12,100), random.randint(75,400), [ranstring(), ranstring(), ranstring()]))
with Spreadsheet('users.csv', User) as output_sheet:
output_sheet.store(instances)
Read the values back from the spreadsheet in example 1
from ezspreadsheet import Spreadsheet
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
leopard_gecko = Animal('Leopard Gecko', 'Least Concern')
philippine_eagle = Animal('Philippine Eagle', 'Threatened')
with Spreadsheet('animals.xlsx') as output_sheet: # Returned values will be a namedtuple called Animal
returned_class, instances = output_sheet.load("Animal")
for instance in instances:
print(instance) """prints Animal(name='Leopard Gecko', conservation_status='Least Concern')
Animal(name='Philippine Eagle', conservation_status='Threatened')"""
with Spreadsheet('animals.xlsx', Animal) as output_sheet: # Returned values will be an Animal class
returned_class, instances = output_sheet.load("Animal")
for instance in instances:
print(vars(instance)) """prints: {'name': 'Leopard Gecko', 'conservation_status': 'Least Concern'}
{'name': 'Philippine Eagle', 'conservation_status': 'Threatened'}"""
Expand source code
"""A simple API to store/load python objects to/from spreadsheets
Limitations
-----------
Currently only classes with 51 or less attributes are supported
Notes
-----
- xlsx writing is significantly faster than csv currently
- When readable flag is set:
- values are deserailized as strings not their original types (list, tuple, dict etc.)
- Since csv is auto-interpreted for newline termination characters in most applications tabs are used as delimiters instead on csv files
Examples
--------
### Store some animal instances in a spreadsheet called 'animals.xlsx'
```
from ezspreadsheet import Spreadsheet
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
leopard_gecko = Animal('Leopard Gecko', 'Least Concern')
philippine_eagle = Animal('Philippine Eagle', 'Threatened')
with Spreadsheet('animals.xlsx', Animal) as output_sheet:
output_sheet.store(leopard_gecko, philippine_eagle)
```
### Store a list of instances into a spreadsheet called 'users.csv'
```
from ezspreadsheet import Spreadsheet
import random
import string
from dataclasses import dataclass
@dataclass
class User():
Name:str
Age:int
Weight:int
Family: list # Note that Iterables will be flattened to a string with newline seperators
instances = []
ranstring = lambda: ''.join(random.choices(string.ascii_uppercase, k=10)) # Generates a random 10 character string
for i in range(1000):
instances.append(User(ranstring(), random.randint(12,100), random.randint(75,400), [ranstring(), ranstring(), ranstring()]))
with Spreadsheet('users.csv', User) as output_sheet:
output_sheet.store(instances)
```
### Read the values back from the spreadsheet in example 1
```
from ezspreadsheet import Spreadsheet
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
leopard_gecko = Animal('Leopard Gecko', 'Least Concern')
philippine_eagle = Animal('Philippine Eagle', 'Threatened')
with Spreadsheet('animals.xlsx') as output_sheet: # Returned values will be a namedtuple called Animal
returned_class, instances = output_sheet.load("Animal")
for instance in instances:
print(instance) \"\"\"prints Animal(name='Leopard Gecko', conservation_status='Least Concern')\nAnimal(name='Philippine Eagle', conservation_status='Threatened')\"\"\"
with Spreadsheet('animals.xlsx', Animal) as output_sheet: # Returned values will be an Animal class
returned_class, instances = output_sheet.load("Animal")
for instance in instances:
print(vars(instance)) \"\"\"prints: {'name': 'Leopard Gecko', 'conservation_status': 'Least Concern'}\n{'name': 'Philippine Eagle', 'conservation_status': 'Threatened'}\"\"\"
```
"""
import csv # Used to read and write to CSV files
import logging # Used to log data for debugging and transparency
import datetime # Used to validate type assertions for datetime instances
from collections import namedtuple # Used to deserailize classes from spreadsheets and instances
from typing import Any, Union, Iterable # Used for type hinting and type assertions on various class methods
# Third party dependencies
import colored # Colours terminal output for emphasis
from openpyxl import Workbook, load_workbook # Used to open and operate with xlsx files
from openpyxl.styles import Font, Alignment # Used to style various output to xlsx files
class Spreadsheet():
"""A class that allows serialization/deserialization of python objects to csv or xlsx files
Parameters
----------
file_name : (str)
The name of the .xlsx or .csv file that will be saved out to or loaded in
class_identifier : (object or bool)
The class object for instances you want to store, see example(s) for details
If not specified (left as False), it's assumed you only want to load values
Raises
------
ValueError
In three cases:
1. If instances provided to Spreadsheet.store() do not match type used to construct Spreadsheet instance
2. If class provided has more than 51 attributes (see limitations section of docs for details)
3. The provided file is not a .xlsx file or a .csv file
Examples
--------
## Store some animal instances in a spreadsheet called 'animals.xlsx'
```
from ezspreadsheet import Spreadsheet
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
leopard_gecko = Animal('Leopard Gecko', 'Least Concern')
philippine_eagle = Animal('Philippine Eagle', 'Threatened')
with Spreadsheet('animals.xlsx', Animal) as output_sheet:
output_sheet.store(leopard_gecko, philippine_eagle)
```
"""
def __init__(self, file_name:str, class_identifier:object=False):
if file_name.endswith(".xlsx"):
self.spreadsheet = _XLSX_Spreadsheet(file_name, class_identifier)
elif file_name.endswith(".csv"):
self.spreadsheet = _CSV_Spreadsheet(file_name, class_identifier)
else:
raise ValueError(f"Provided file {file_name} is not a csv or xlsx file")
def __enter__(self):
"""entrypoint for the context manager"""
return self.spreadsheet.__enter__()
def __exit__(self, exc_type, exc_value, traceback):
"""Exitpoint for the context manager
Returns
-------
bool
True if the context manager ran into no issues saving files
"""
return self.spreadsheet.__exit__(exc_type, exc_value, traceback)
def store(self, *instances:Union[object, Iterable[object]], readable:bool = False):
"""Takes in instance(s) of the specified class to store
Parameters
----------
instances : (Iterable[object] or arbitrary number of isntances)
The instances with the data you want to store
readable : bool
If True iterable attributes are written as readable values instead of directly storing iterables, by default False
Notes
-----
- iterables stored while readable == true cannot be deserialized to their original type
Raises
------
ValueError
If an instance is not the correct type
Notes
-----
- Any methods are not serialized, only attribtues
Examples
--------
## Store some animal instances in a spreadsheet called 'animals.xlsx'
```
from ezspreadsheet import Spreadsheet
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
leopard_gecko = Animal('Leopard Gecko', 'Least Concern')
philippine_eagle = Animal('Philippine Eagle', 'Threatened')
with Spreadsheet('animals.xlsx', Animal) as output_sheet:
output_sheet.store(leopard_gecko, philippine_eagle)
```
"""
return self.spreadsheet.store(*instances, readable=readable)
def load(self, name:str) -> tuple:
"""Loads the class, and instances stored inside Spreadsheet at self.file_name
Parameters
----------
name : str
The name you want to assign the class that is returned
Notes
-----
- if self.class_identifier is specified on Spreadsheet instantiation then that class is used instead of instantiating a new one
- if self.class_identifier is not specified a subclass of namedtuple is instantiated and passed back
Returns
-------
tuple
First return value is the constructor used to create instances (class if class_identifier is specified, else custom derived class), and second all the found instances
Raises
------
ValueError
If file is empty, or header of file is not equivalent to provided class attributes
Notes
-----
- If you didn't specify a class identifier when opening the spreadsheet the returned values are namedtuples and not full class instances
Examples
--------
## Loading some stored values of the Animal class from animals.xlsx
```
with Spreadsheet('animals.xlsx') as loaded_sheet:
Animal, instances = loaded_sheet.load('Animal')
# NOTE: Animal at this point is a namedtuple constructor, not a full python class
print(Animal) # Prints: <class '__main__.Animal'>
print(instances) # Prints: [Animal(name='Leopard Gecko', conservation_status='Least Concern'), Animal(name='Philippine Eagle', conservation_status='Threatened')]
```
## Loading some stored values of the Animal class from animals.xlsx with the class identifier specified
```
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
with Spreadsheet('animals.xlsx', Animal) as loaded_sheet:
Animal, instances = loaded_sheet.load('Animal')
print(Animal) # Prints: <class '__main__.Animal'>
for instance in instances:
print(vars(instance)) # Since these are real class instances we can use vars()
'''prints:
{'name': 'Leopard Gecko', 'conservation_status': 'Least Concern'}
{'name': 'Philippine Eagle', 'conservation_status': 'Threatened'}
'''
```
"""
self.spreadsheet.load(name)
def _get_values_from_instance(self, instance:object) -> list:
"""Get's the instance's attribute values
Parameters
----------
instance : object
The instance to pull the attribute values from
Returns
-------
list
The values for the attributes from the instance
"""
logging.debug(f"Attributes are {self.class_attributes}")
values = [] # All the values of the attributes in order
for attribute in self.class_attributes:
logging.debug(f"Looking for attribute {attribute} found value {instance.__dict__[attribute]}")
values.append(instance.__dict__[attribute])
return values
class _XLSX_Spreadsheet(Spreadsheet):
"""A class that takes in instances of objects and serializes them to xlsx files
Parameters
----------
file_name : (str)
The name of the .xlsx file that will be saved out (extension can be included or excluded)
class_identifier : (object or bool)
The class object for instances you want to store, see example(s) for details
If not specified (left as False), it's assumed you only want to load values
Raises
------
ValueError
In two cases:
1. If instances provided to Spreadsheet.store() do not match type used to construct Spreadsheet instance
2. If class provided has more than 51 attributes (see limitations section of docs for details)
"""
def __init__(self, file_name:str, class_identifier:object=False):
self.file_name = file_name
self.workbook = None
self.worksheet = None
self.class_identifier = class_identifier
self.class_attributes = None
if class_identifier:
# Get all attributes of class defined in __init__
self.class_attributes = class_identifier.__init__.__code__.co_varnames[1::] # Skip the self
if len(self.class_attributes) > 51:
raise ValueError(f"Provided class {class_identifier.__name__} has more than 51 attributes")
def __enter__(self):
"""Entrypoint for the context manager
Returns
-------
Spreadsheet
Reference to self
"""
self.workbook = Workbook()
self.worksheet = self.workbook.active
self.worksheet.page_setup.fitToWidth = 1
return self
def __exit__(self, exc_type, exc_value, traceback):
"""Exitpoint for the context manager
Returns
-------
bool
True if the context manager ran into no issues saving files
"""
if exc_type is None and exc_value is None:
try:
self.workbook.save(self.file_name)
print(f"{self.file_name} successfully saved")
except PermissionError:
input(f"{colored.fg(1)}File {self.file_name} is currently open{colored.fg(15)}\nPlease close it and hit enter to save file: ")
self.workbook.save(self.file_name)
return True
else:
print(f"{colored.fg(1)}Ran into exception {exc_type.__name__}, with value {exc_value} here is traceback{colored.fg(15)}")
return False
def _add_row_to_spreadsheet(self, data:list, row:int, style:Font = False, readable:bool = False):
"""Take in some data, and an int for a row and add that data to that row
Parameters
----------
data : list
The data you want to store in the row
row : int
The index of the row to store the data to
style : Font, optional
If you want to supply custom formatting for the row, by default False
readable : bool
If True iterable attributes are written as readable values instead of directly storing iterables, by default False
"""
# The value that will be converted using chr() for column identifiers i.e. A1 B1 etc.
column_identifier = 65 # Initialize to ord() value of 'A'
for value in data:
if column_identifier == 91: # Roll over to Ax column identifiers from x column identifiers
label = f"AA{row}"
elif column_identifier > 91: # If beyond Z in column identifiers
label = f"A{chr(column_identifier-26)}{row}"
else: # If before or at Z in column identifiers
label = f"{chr(column_identifier)}{row}"
logging.debug(f"{value} will be written to {label}")
# Apply styles if specified
if style:
self.worksheet[label].font = style
# Add value to worksheet
if type(value) not in [str, int, float, datetime.datetime]:
if type(value) == dict and readable:
print("Serializing dictionary in readable format") # TODO: remove
logging.debug("Serializing dictionary in readable format")
flattened_value = ""
for key in value:
flattened_value += f"- {key}: {value[key]}\n"
self.worksheet[label] = flattened_value
elif readable:
# If value is an Iterable that's not a str, int or float then flatten it to a str
logging.debug(f"Serializing {type(value)} in readable format")
flattened_value = ""
for sub_value in value:
flattened_value += f"- {str(sub_value)}\n"
self.worksheet[label] = flattened_value
else:
# Value is not a str, int, float or datetime object (all can be natively serialized)
self.worksheet[label] = str(value)
else: # If value is a string, int, float or datetime object
self.worksheet[label] = value
# Apply wrap text formatting to all rows that aren't the heading
if not row == 1:
self.worksheet[label].alignment = Alignment(wrapText=True)
# Increment the column identifiers variable to move to next column letter
column_identifier += 1
def store(self, *instances:Union[object, Iterable[object]], readable:bool = False):
"""Takes in instance(s) of the specified class to store
Parameters
----------
instances : (Iterable[object] or arbitrary number of isntances)
The instances with the data you want to store
readable : bool
If True iterable attributes are written as readable values instead of directly storing iterables, by default False
Raises
------
ValueError
If an instance is not the correct type
"""
print(f"Beginning to store {self.class_identifier.__name__} instances to {self.file_name}")
current_row = 1 # The current row that the iteration is at
# Add heading with the list of class attributes to A1
if not self.class_attributes:
raise ValueError("No class constructor provided, cannot store instances")
self._add_row_to_spreadsheet(self.class_attributes, current_row, Font(bold=True, size=14))
current_row += 1 # Increment row to start with row right after heading
logging.debug(f"Instances are {instances}")
# Check if instance provided is a class of correct type, or an Iterable
for current_instance in instances:
logging.debug(f"Instance is {str(current_instance)}")
if isinstance(current_instance, Iterable): # If argument is an Iterable (i.e. list, tuple etc.)
for sub_instance in current_instance:
if not isinstance(sub_instance, self.class_identifier): # Validate sub-instance is correct type
raise ValueError(f"Provided instance: {sub_instance} is not of type {self.class_identifier}")
else:
self._add_row_to_spreadsheet(self._get_values_from_instance(sub_instance), current_row, readable=readable)
current_row += 1
elif not isinstance(current_instance, self.class_identifier): # If argument is not correct type
raise ValueError(f"Provided instance: {current_instance} is not of type {self.class_identifier}")
else: # If argument is a single class instance of the correct type
logging.debug(f"Adding values from {str(current_instance)}: {self._get_values_from_instance(current_instance)}")
self._add_row_to_spreadsheet(self._get_values_from_instance(current_instance), current_row, readable=readable)
current_row += 1
def _load_values(self) -> list:
"""Yields each row of values to be consumed inside self.load()
Yields
-------
list
The values for a given row
"""
for values in self.worksheet.values:
values = list(values)
for index, value in enumerate(values):
# Deserialize iterables like lists, tuples and dicts
if type(value) == str:
if value.startswith("["): # Deserialize lists that were not stored with self.store(readable=True)
value = value[1:-2].replace("\'", "").replace('\"', "").split(',')
values[index] = [v.strip() for v in value]
elif value.startswith("("): # Deserialize tuples that were not stored with self.store(readable=True)
value = value[1:-2].replace("\'", "").replace('\"', "").split(',')
values[index] = tuple(v.strip() for v in value)
elif value.startswith("{"): # Deserialize dicts that were not stored with self.store(readable=True)
key_value_pairs = value[1:-2].replace("\'", "").replace('\"', "").split(',')
result = {}
for pair in key_value_pairs:
key, value = pair.split(":")
key = key.strip()
if type(value) == str:
result[key] = value.strip()
else:
result[key] = value
values[index] = result
yield values
def load(self, name:str) -> tuple:
"""Loads the class, and instances stored inside Spreadsheet at self.file_name
Parameters
----------
name : str
The name you want to assign the class that is returned
Returns
-------
tuple
First return value is the constructor used to create instances (class if class_identifier is specified, else namedtuple), and second all the found instances
Raises
------
ValueError
In 2 cases:
1. If spreadsheet file is empty
2. If header does not match provided class attributes
"""
self.workbook = load_workbook(self.file_name)
self.worksheet = self.workbook.active
values = self._load_values()
instances = []
try:
header = tuple(next(values)) # skip the attributes
except StopIteration:
raise ValueError("Provided spreadsheet is empty")
if self.class_identifier: # If class was specified
logging.debug(f"Class identifier {self.class_identifier} specified")
if self.class_attributes != header: # Validate the file header is the same as the attributes provided
raise ValueError(f"Provided header {header} is not the same as class attribues {self.class_attributes}")
constructor = self.class_identifier
for instance_values in values:
instances.append(self.class_identifier(*instance_values))
else:
logging.debug("No class identifier specified, generating namedtuple")
# Get attributes from first row
constructor = namedtuple(name, header)
for instance_values in values:
instances.append(constructor._make(instance_values))
logging.debug(f"Returning: {constructor}\n\n{instances}")
return constructor, instances
class _CSV_Spreadsheet(Spreadsheet):
def __init__(self, file_name:str, class_identifier:object=False):
self.file_name = file_name
self.spreadsheet_file = None
self.reader = None
self.writer = None
self.read = False
self.written = False
self.class_identifier = class_identifier
self.class_attributes = None
if class_identifier:
# Get all attributes of class defined in __init__
self.class_attributes = class_identifier.__init__.__code__.co_varnames[1::] # Skip the self
if len(self.class_attributes) > 51:
raise ValueError(f"Provided class {class_identifier.__name__} has more than 51 attributes")
def __enter__(self):
"""entrypoint for the context manager"""
try:
self.spreadsheet_file = open(self.file_name, 'r+', newline='\n')
except PermissionError:
input(f"{colored.fg(1)}File {self.file_name} is currently open{colored.fg(15)}\nPlease close it and hit enter to open file: ")
self.spreadsheet_file = open(self.file_name, 'r+', newline='\n')
except FileNotFoundError:
temp = open(self.file_name, 'w+', newline='\n')
temp.close()
self.spreadsheet_file = open(self.file_name, 'r+', newline='\n')
return self
def __exit__(self, exc_type, exc_value, traceback):
if exc_type is None and exc_value is None:
try:
self.spreadsheet_file.close()
if self.written:
print(f"{self.file_name} successfully saved")
if self.read:
print(f"{self.file_name} successfully read")
except PermissionError:
input(f"{colored.fg(1)}File {self.file_name} is currently open{colored.fg(15)}\nPlease close it and hit enter to save file: ")
self.spreadsheet_file.close()
return True
else:
print(f"{colored.fg(1)}Ran into exception {exc_type.__name__}, with value {exc_value} here is traceback{colored.fg(15)}")
return False
def store(self, *instances:Union[object, Iterable[object]], readable:bool = False):
"""Takes in instance(s) of the specified class to store
Parameters
----------
instances : (Iterable[object] or arbitrary number of isntances)
The instances with the data you want to store
readable : bool
If True iterable attributes are written as readable values instead of directly storing iterables, by default False
Raises
------
ValueError
If an instance is not the correct type, or no class constructor is provided
"""
if not self.writer:
self.writer = csv.writer(self.spreadsheet_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# Write header
if not self.class_attributes:
raise ValueError("No class constructor provided, cannot store instances")
self.writer.writerow(self.class_attributes)
all_instance_values = [] # A list that will contain just the values of all instances
for instance in instances: # Grab each instances' attribute values and store them in all_instance_values
logging.debug(f"Instance is {str(instance)}")
if isinstance(instance, Iterable): # If argument is an Iterable (i.e. list, tuple etc.)
for sub_instance in instance:
if not isinstance(sub_instance, self.class_identifier): # Validate sub-instance is correct type
raise ValueError(f"Provided instance: {sub_instance} is not of type {self.class_identifier}")
else:
instance_values = self._get_values_from_instance(sub_instance)
all_instance_values.append(instance_values)
elif not isinstance(instance, self.class_identifier): # If instance is not correct type
raise ValueError(f"Provided instance: {instance} is not of type {self.class_identifier}")
else: # If instance provided is a single instance of correct type
instance_values = self._get_values_from_instance(instance)
all_instance_values.append(instance_values)
if readable: # Write iterables as readable forms if flag is specified
for instance_values in all_instance_values:
for index, value in enumerate(instance_values):
if isinstance(value, Iterable): # If the current instance attribute is an iterable
if type(value) not in [str, int, float, datetime.datetime]:
if type(value) == dict and readable:
logging.debug("Serializing dictionary in readable format")
flattened_value = ""
for key in value:
flattened_value += f"- {key}: {value[key]} \t"
instance_values[index] = flattened_value
else:
# If value is an Iterable that's not a str, int or float then flatten it to a str
logging.debug(f"Serializing {type(value)} in readable format")
flattened_value = ""
for sub_value in value:
flattened_value += f"- {str(sub_value)} \t"
instance_values[index] = flattened_value
self.written = True
self.writer.writerows(all_instance_values)
def _load_values(self):
for row in self.reader:
values = list(row)
for index, value in enumerate(values):
# Deserialize iterables like lists, tuples and dicts
if type(value) == str:
if value.startswith("["): # Deserialize lists that were not stored with self.store(readable=True)
value = value[1:-2].replace("\'", "").replace('\"', "").split(',')
values[index] = [v.strip() for v in value]
elif value.startswith("("): # Deserialize tuples that were not stored with self.store(readable=True)
value = value[1:-2].replace("\'", "").replace('\"', "").split(',')
values[index] = tuple(v.strip() for v in value)
elif value.startswith("{"): # Deserialize dicts that were not stored with self.store(readable=True)
key_value_pairs = value[1:-2].replace("\'", "").replace('\"', "").split(',')
result = {}
for pair in key_value_pairs:
key, value = pair.split(":")
key = key.strip()
if type(value) == str:
result[key] = value.strip()
else:
result[key] = value
values[index] = result
elif value.isdigit():
values[index] = int(value)
elif value.isdecimal():
values[index] = float(value)
elif value.startswith("-"): # possible negative integer
if value[1::].isdigit():
values[index] = int(value[1::]) * -1
if value[1::].isdecimal():
values[index] = float(value[1::]) * -1
yield values
def load(self, name:str) -> tuple:
"""Loads values from provided spreadsheet file
Parameters
----------
name : str
The name you want to give to the returned class if no class constructor is provided
Returns
-------
tuple
First the class constructor, second the instances retrieved from the file
Raises
------
ValueError
In 2 cases:
1. If spreadsheet file is empty
2. If header does not match provided class attributes
"""
if not self.reader:
self.reader = csv.reader(self.spreadsheet_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
values = self._load_values()
try:
header = next(values)
logging.debug(f"Found header: {header}")
except StopIteration:
raise ValueError(f"File {self.file_name} is empty")
if self.class_attributes:
if not tuple(header) == self.class_attributes:
raise ValueError(f"Header: {header} is not equivalent to privided class attributes: {self.class_attributes}")
else:
self.class_attributes = header
instances = []
if self.class_identifier: # If class was specified
logging.debug(f"Class identifier {self.class_identifier} specified")
constructor = self.class_identifier
for instance_values in values:
if len(instance_values) == len(self.class_attributes): # Sometimes a nonetype is deserialized
instances.append(self.class_identifier(*instance_values))
elif instance_values:
print(f"Row {instance_values} was skipped")
else:
logging.debug("No class identifier specified, generating namedtuple")
# Get attributes from first row
base_named_tuple = namedtuple(name, header)
class constructor(base_named_tuple):
__dict__ = property(base_named_tuple._asdict)
__name__ = property(base_named_tuple.__name__)
constructor.__name__ = name
for instance_values in values:
if len(instance_values) == len(self.class_attributes):
instances.append(constructor._make(instance_values))
logging.debug(f"Returning: {constructor}\n\n{instances}")
self.read = True
return constructor, instances
if __name__ == "__main__": # local test code to play around with
class Animal():
def __init__(self, name:str, conservation_status:str):
self.name = name
self.conservation_status = conservation_status
leopard_gecko = Animal('Leopard Gecko', 'Least Concern')
philippine_eagle = Animal('Philippine Eagle', 'Threatened')
with Spreadsheet('animals.csv', Animal) as output_sheet:
output_sheet.store(leopard_gecko, philippine_eagle)
with Spreadsheet('animals.csv') as loaded_sheet:
animals, instances = loaded_sheet.load('animals')
Classes
class Spreadsheet (file_name: str, class_identifier: object = False)
-
A class that allows serialization/deserialization of python objects to csv or xlsx files
Parameters
file_name
:(str)
- The name of the .xlsx or .csv file that will be saved out to or loaded in
class_identifier
:(object
orbool)
- The class object for instances you want to store, see example(s) for details If not specified (left as False), it's assumed you only want to load values
Raises
ValueError
- In three cases:
1. If instances provided to Spreadsheet.store() do not match type used to construct Spreadsheet instance 2. If class provided has more than 51 attributes (see limitations section of docs for details) 3. The provided file is not a .xlsx file or a .csv file
Examples
Store some animal instances in a spreadsheet called 'animals.xlsx'
from ezspreadsheet import Spreadsheet class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status leopard_gecko = Animal('Leopard Gecko', 'Least Concern') philippine_eagle = Animal('Philippine Eagle', 'Threatened') with Spreadsheet('animals.xlsx', Animal) as output_sheet: output_sheet.store(leopard_gecko, philippine_eagle)
Expand source code
class Spreadsheet(): """A class that allows serialization/deserialization of python objects to csv or xlsx files Parameters ---------- file_name : (str) The name of the .xlsx or .csv file that will be saved out to or loaded in class_identifier : (object or bool) The class object for instances you want to store, see example(s) for details If not specified (left as False), it's assumed you only want to load values Raises ------ ValueError In three cases: 1. If instances provided to Spreadsheet.store() do not match type used to construct Spreadsheet instance 2. If class provided has more than 51 attributes (see limitations section of docs for details) 3. The provided file is not a .xlsx file or a .csv file Examples -------- ## Store some animal instances in a spreadsheet called 'animals.xlsx' ``` from ezspreadsheet import Spreadsheet class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status leopard_gecko = Animal('Leopard Gecko', 'Least Concern') philippine_eagle = Animal('Philippine Eagle', 'Threatened') with Spreadsheet('animals.xlsx', Animal) as output_sheet: output_sheet.store(leopard_gecko, philippine_eagle) ``` """ def __init__(self, file_name:str, class_identifier:object=False): if file_name.endswith(".xlsx"): self.spreadsheet = _XLSX_Spreadsheet(file_name, class_identifier) elif file_name.endswith(".csv"): self.spreadsheet = _CSV_Spreadsheet(file_name, class_identifier) else: raise ValueError(f"Provided file {file_name} is not a csv or xlsx file") def __enter__(self): """entrypoint for the context manager""" return self.spreadsheet.__enter__() def __exit__(self, exc_type, exc_value, traceback): """Exitpoint for the context manager Returns ------- bool True if the context manager ran into no issues saving files """ return self.spreadsheet.__exit__(exc_type, exc_value, traceback) def store(self, *instances:Union[object, Iterable[object]], readable:bool = False): """Takes in instance(s) of the specified class to store Parameters ---------- instances : (Iterable[object] or arbitrary number of isntances) The instances with the data you want to store readable : bool If True iterable attributes are written as readable values instead of directly storing iterables, by default False Notes ----- - iterables stored while readable == true cannot be deserialized to their original type Raises ------ ValueError If an instance is not the correct type Notes ----- - Any methods are not serialized, only attribtues Examples -------- ## Store some animal instances in a spreadsheet called 'animals.xlsx' ``` from ezspreadsheet import Spreadsheet class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status leopard_gecko = Animal('Leopard Gecko', 'Least Concern') philippine_eagle = Animal('Philippine Eagle', 'Threatened') with Spreadsheet('animals.xlsx', Animal) as output_sheet: output_sheet.store(leopard_gecko, philippine_eagle) ``` """ return self.spreadsheet.store(*instances, readable=readable) def load(self, name:str) -> tuple: """Loads the class, and instances stored inside Spreadsheet at self.file_name Parameters ---------- name : str The name you want to assign the class that is returned Notes ----- - if self.class_identifier is specified on Spreadsheet instantiation then that class is used instead of instantiating a new one - if self.class_identifier is not specified a subclass of namedtuple is instantiated and passed back Returns ------- tuple First return value is the constructor used to create instances (class if class_identifier is specified, else custom derived class), and second all the found instances Raises ------ ValueError If file is empty, or header of file is not equivalent to provided class attributes Notes ----- - If you didn't specify a class identifier when opening the spreadsheet the returned values are namedtuples and not full class instances Examples -------- ## Loading some stored values of the Animal class from animals.xlsx ``` with Spreadsheet('animals.xlsx') as loaded_sheet: Animal, instances = loaded_sheet.load('Animal') # NOTE: Animal at this point is a namedtuple constructor, not a full python class print(Animal) # Prints: <class '__main__.Animal'> print(instances) # Prints: [Animal(name='Leopard Gecko', conservation_status='Least Concern'), Animal(name='Philippine Eagle', conservation_status='Threatened')] ``` ## Loading some stored values of the Animal class from animals.xlsx with the class identifier specified ``` class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status with Spreadsheet('animals.xlsx', Animal) as loaded_sheet: Animal, instances = loaded_sheet.load('Animal') print(Animal) # Prints: <class '__main__.Animal'> for instance in instances: print(vars(instance)) # Since these are real class instances we can use vars() '''prints: {'name': 'Leopard Gecko', 'conservation_status': 'Least Concern'} {'name': 'Philippine Eagle', 'conservation_status': 'Threatened'} ''' ``` """ self.spreadsheet.load(name) def _get_values_from_instance(self, instance:object) -> list: """Get's the instance's attribute values Parameters ---------- instance : object The instance to pull the attribute values from Returns ------- list The values for the attributes from the instance """ logging.debug(f"Attributes are {self.class_attributes}") values = [] # All the values of the attributes in order for attribute in self.class_attributes: logging.debug(f"Looking for attribute {attribute} found value {instance.__dict__[attribute]}") values.append(instance.__dict__[attribute]) return values
Subclasses
- ezspreadsheet._CSV_Spreadsheet
- ezspreadsheet._XLSX_Spreadsheet
Methods
def load(self, name: str) ‑> tuple
-
Loads the class, and instances stored inside Spreadsheet at self.file_name
Parameters
name
:str
- The name you want to assign the class that is returned
Notes
- if self.class_identifier is specified on Spreadsheet instantiation then that class is used instead of instantiating a new one
- if self.class_identifier is not specified a subclass of namedtuple is instantiated and passed back
Returns
tuple
- First return value is the constructor used to create instances (class if class_identifier is specified, else custom derived class), and second all the found instances
Raises
ValueError
- If file is empty, or header of file is not equivalent to provided class attributes
Notes
- If you didn't specify a class identifier when opening the spreadsheet the returned values are namedtuples and not full class instances
Examples
Loading some stored values of the Animal class from animals.xlsx
with Spreadsheet('animals.xlsx') as loaded_sheet: Animal, instances = loaded_sheet.load('Animal') # NOTE: Animal at this point is a namedtuple constructor, not a full python class print(Animal) # Prints: <class '__main__.Animal'> print(instances) # Prints: [Animal(name='Leopard Gecko', conservation_status='Least Concern'), Animal(name='Philippine Eagle', conservation_status='Threatened')]
Loading some stored values of the Animal class from animals.xlsx with the class identifier specified
class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status with Spreadsheet('animals.xlsx', Animal) as loaded_sheet: Animal, instances = loaded_sheet.load('Animal') print(Animal) # Prints: <class '__main__.Animal'> for instance in instances: print(vars(instance)) # Since these are real class instances we can use vars() '''prints: {'name': 'Leopard Gecko', 'conservation_status': 'Least Concern'} {'name': 'Philippine Eagle', 'conservation_status': 'Threatened'} '''
Expand source code
def load(self, name:str) -> tuple: """Loads the class, and instances stored inside Spreadsheet at self.file_name Parameters ---------- name : str The name you want to assign the class that is returned Notes ----- - if self.class_identifier is specified on Spreadsheet instantiation then that class is used instead of instantiating a new one - if self.class_identifier is not specified a subclass of namedtuple is instantiated and passed back Returns ------- tuple First return value is the constructor used to create instances (class if class_identifier is specified, else custom derived class), and second all the found instances Raises ------ ValueError If file is empty, or header of file is not equivalent to provided class attributes Notes ----- - If you didn't specify a class identifier when opening the spreadsheet the returned values are namedtuples and not full class instances Examples -------- ## Loading some stored values of the Animal class from animals.xlsx ``` with Spreadsheet('animals.xlsx') as loaded_sheet: Animal, instances = loaded_sheet.load('Animal') # NOTE: Animal at this point is a namedtuple constructor, not a full python class print(Animal) # Prints: <class '__main__.Animal'> print(instances) # Prints: [Animal(name='Leopard Gecko', conservation_status='Least Concern'), Animal(name='Philippine Eagle', conservation_status='Threatened')] ``` ## Loading some stored values of the Animal class from animals.xlsx with the class identifier specified ``` class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status with Spreadsheet('animals.xlsx', Animal) as loaded_sheet: Animal, instances = loaded_sheet.load('Animal') print(Animal) # Prints: <class '__main__.Animal'> for instance in instances: print(vars(instance)) # Since these are real class instances we can use vars() '''prints: {'name': 'Leopard Gecko', 'conservation_status': 'Least Concern'} {'name': 'Philippine Eagle', 'conservation_status': 'Threatened'} ''' ``` """ self.spreadsheet.load(name)
def store(self, *instances: Union[object, Iterable[object]], readable: bool = False)
-
Takes in instance(s) of the specified class to store
Parameters
instances
:(Iterable[object]
orarbitrary number
ofisntances)
- The instances with the data you want to store
readable
:bool
- If True iterable attributes are written as readable values instead of directly storing iterables, by default False
Notes
- iterables stored while readable == true cannot be deserialized to their original type
Raises
ValueError
- If an instance is not the correct type
Notes
- Any methods are not serialized, only attribtues
Examples
Store some animal instances in a spreadsheet called 'animals.xlsx'
from ezspreadsheet import Spreadsheet class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status leopard_gecko = Animal('Leopard Gecko', 'Least Concern') philippine_eagle = Animal('Philippine Eagle', 'Threatened') with Spreadsheet('animals.xlsx', Animal) as output_sheet: output_sheet.store(leopard_gecko, philippine_eagle)
Expand source code
def store(self, *instances:Union[object, Iterable[object]], readable:bool = False): """Takes in instance(s) of the specified class to store Parameters ---------- instances : (Iterable[object] or arbitrary number of isntances) The instances with the data you want to store readable : bool If True iterable attributes are written as readable values instead of directly storing iterables, by default False Notes ----- - iterables stored while readable == true cannot be deserialized to their original type Raises ------ ValueError If an instance is not the correct type Notes ----- - Any methods are not serialized, only attribtues Examples -------- ## Store some animal instances in a spreadsheet called 'animals.xlsx' ``` from ezspreadsheet import Spreadsheet class Animal(): def __init__(self, name:str, conservation_status:str): self.name = name self.conservation_status = conservation_status leopard_gecko = Animal('Leopard Gecko', 'Least Concern') philippine_eagle = Animal('Philippine Eagle', 'Threatened') with Spreadsheet('animals.xlsx', Animal) as output_sheet: output_sheet.store(leopard_gecko, philippine_eagle) ``` """ return self.spreadsheet.store(*instances, readable=readable)