Read, query and write parquet files
Import the sandbox object
from shapelets.data import sandbox
Create a sandbox
playground = sandbox()
Load data into sandbox
taxis = playground.from_parquet(
rel_name="taxis",
paths=["../Benchmarks/nyc-taxi/2009/**/*.parquet"]
)
Execute query
result = playground.from_sql("""
SELECT
AVG(passenger_count)
FROM
taxis
GROUP BY
EXTRACT('day' from dropoff_at), EXTRACT('hour' from dropoff_at)
""").execute()
Visualize / Export data
result.to_pandas()