Quick start: working with pandas

Serenytics lets you fill a pandas DataFrame from a data query, and then upload a DataFrame to a Serenytics storage. This is useful to many ETL processes.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import serenytics
import pandas as pd

client = serenytics.Client()

# get or create a storage
# Note: open the Data menu to see this new data source
# (you need to reload the page to refresh the list of sources)
source = client.get_or_create_storage_data_source_by_name(name='example 2 storage')

# fill it with 4 rows of data
input_data = [
    {'country': 'US', 'name': 'John Doe', 'quantity': 10},
    {'country': 'US', 'name': 'Mark Harvard', 'quantity': 2},
    {'country': 'HG', 'name': 'John von Neumann', 'quantity': 1},
    {'country': 'UK', 'name': 'David Wheeler', 'quantity': 8}
]

print('loading data in the storage...')
source.reload_data(new_data=input_data)

# get all the data from a source, as a pandas dataframe
df = source.get_data().get_as_dataframe()

# use pandas for some transformations
df_clean = pd.DataFrame(df.name.str.split(' ',1).tolist(),
                        columns = ['first name','last name'])
df_clean['country'] = df['country']
df_clean['quantity'] = df['quantity']

# load the DataFrame in a new source
source = client.get_or_create_storage_data_source_by_name(name='example 2 clean names')
source.reload_data_from_dataframe(df_clean)