import pandas as pd
from elementary_python_sdk.core.cloud.cloud_client import ElementaryCloudClient
from elementary_python_sdk.core.tests import (
boolean_test,
elementary_test_context,
expected_range,
)
from elementary_python_sdk.core.types.asset import TableAsset
# Define "unique ids" test
@boolean_test(
name="unique_ids",
description="All user IDs must be unique",
column_name="id",
)
def test_unique_ids(df: pd.DataFrame) -> bool:
ids = df["id"].dropna().tolist()
return len(ids) == len(set(ids))
# Define "average age" test
@expected_range(
name="average_age",
min=18,
max=50,
description="Average age should be between 18 and 50",
column_name="age",
)
def test_average_age(df: pd.DataFrame) -> float:
return df["age"].mean()
def main():
# Create sample data
users_df = pd.DataFrame(
{
"id": [1, 2, 3, 4, 5, 6, 7, 8],
"age": [23, 30, 46, 76, 76, 123, 45, 32],
"country": ["Germany", "France", "Germany", "France", "", "Italy", "France", "Germany"],
}
)
# Define the tested asset
asset = TableAsset(
name="users",
database_name="prod",
schema_name="public",
table_name="users",
description="Users table",
owners=["data-team"],
tags=["pii", "production"],
depends_on=["prod.public.customers", "prod.public.orders"]
)
# Run tests and report the results
with elementary_test_context(asset=asset) as ctx:
test_average_age(users_df)
test_unique_ids(users_df)
# Initialize client and send results
PROJECT_ID = "my-python-project" # Your Python project identifier (used to deduplicate and identify assets)
API_KEY = "your-api-key"
URL = "https://app.elementary-data.com/sdk-ingest/{env_id}/batch"
client = ElementaryCloudClient(PROJECT_ID, API_KEY, URL)
client.send_to_cloud(ctx)
if __name__ == "__main__":
main()