Full Example
# What's this data contract about?
domain: seller # Domain
dataProduct: my quantum # Data product name
version: 1.1.0 # Version (follows semantic versioning)
status: active
id: 53581432-6c55-4ba2-a65f-72344a91553a
# Lots of information
description:
purpose: Views built on top of the seller tables.
limitations: Data based on seller perspective, no buyer information
usage: Predict sales over time
authoritativeDefinitions:
- type: privacy-statement
url: https://example.com/gdpr.pdf
tenant: ClimateQuantumInc
kind: DataContract
apiVersion: v3.0.1 # Standard version (follows semantic versioning)
# Infrastructure & servers
servers:
- server: my-postgres
type: postgres
host: localhost
port: 5432
database: pypl-edw
schema: pp_access_views
# Dataset, schema and quality
schema:
- name: tbl
physicalName: tbl_1
physicalType: table
description: Provides core payment metrics
authoritativeDefinitions:
- url: https://catalog.data.gov/dataset/air-quality
type: businessDefinition
- url: https://youtu.be/jbY1BKFj9ec
type: videoTutorial
tags: [ 'finance', 'payments']
dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id
properties:
- name: txn_ref_dt
primaryKey: false
primaryKeyPosition: -1
businessName: transaction reference date
logicalType: date
physicalType: date
required: false
description: Reference date for transaction
partitioned: true
partitionKeyPosition: 1
criticalDataElement: false
tags: [ ]
classification: public
transformSourceObjects:
- table_name_1
- table_name_2
- table_name_3
transformLogic: sel t1.txn_dt as txn_ref_dt from table_name_1 as t1, table_name_2 as t2, table_name_3 as t3 where t1.txn_dt=date-3
transformDescription: defines the logic in business terms; logic for dummies
examples:
- "2022-10-03"
- "2020-01-28"
customProperties:
- property: anonymizationStrategy
value: none
- name: rcvr_id
primaryKey: true
primaryKeyPosition: 1
businessName: receiver id
logicalType: string
physicalType: varchar(18)
required: false
description: A description for column rcvr_id.
partitioned: false
partitionKeyPosition: -1
criticalDataElement: false
tags: [ 'uid' ]
classification: restricted
- name: rcvr_cntry_code
primaryKey: false
primaryKeyPosition: -1
businessName: receiver country code
logicalType: string
physicalType: varchar(2)
required: false
description: Country code
partitioned: false
partitionKeyPosition: -1
criticalDataElement: false
tags: [ ]
classification: public
authoritativeDefinitions:
- url: https://collibra.com/asset/742b358f-71a5-4ab1-bda4-dcdba9418c25
type: businessDefinition
- url: https://github.com/myorg/myrepo
type: transformationImplementation
- url: jdbc:postgresql://localhost:5432/adventureworks/tbl_1/rcvr_cntry_code
type: implementation
encryptedName: rcvr_cntry_code_encrypted
quality:
- rule: nullCheck
description: column should not contain null values
dimension: completeness # dropdown 7 values
type: library
severity: error
businessImpact: operational
schedule: 0 20 * * *
scheduler: cron
customProperties:
- property: FIELD_NAME
value:
- property: COMPARE_TO
value:
- property: COMPARISON_TYPE
value: Greater than
quality:
- rule: countCheck
type: library
description: Ensure row count is within expected volume range
dimension: completeness
method: reconciliation
severity: error
businessImpact: operational
schedule: 0 20 * * *
scheduler: cron
customProperties:
- property: business-key
value:
- txn_ref_dt
- rcvr_id
# Pricing
price:
priceAmount: 9.95
priceCurrency: USD
priceUnit: megabyte
# Team
team:
- username: ceastwood
role: Data Scientist
dateIn: "2022-08-02"
dateOut: "2022-10-01"
replacedByUsername: mhopper
- username: mhopper
role: Data Scientist
dateIn: "2022-10-01"
- username: daustin
role: Owner
comment: Keeper of the grail
dateIn: "2022-10-01"
# Roles
roles:
- role: microstrategy_user_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: 'mandolorian'
- role: bq_queryman_user_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: na
- role: risk_data_access_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: 'dathvador'
- role: bq_unica_user_opr
access: write
firstLevelApprovers: Reporting Manager
secondLevelApprovers: 'mickey'
# SLA
slaDefaultElement: tab1.txn_ref_dt
slaProperties:
- property: latency # Property, see list of values in DP QoS
value: 4
unit: d # d, day, days for days; y, yr, years for years
element: tab1.txn_ref_dt # This would not be needed as it is the same table.column as the default one
- property: generalAvailability
value: "2022-05-12T09:30:10-08:00"
- property: endOfSupport
value: "2032-05-12T09:30:10-08:00"
- property: endOfLife
value: "2042-05-12T09:30:10-08:00"
- property: retention
value: 3
unit: y
element: tab1.txn_ref_dt
- property: frequency
value: 1
valueExt: 1
unit: d
element: tab1.txn_ref_dt
- property: timeOfAvailability
value: 09:00-08:00
element: tab1.txn_ref_dt
driver: regulatory # Describes the importance of the SLA: [regulatory|analytics|operational|...]
- property: timeOfAvailability
value: 08:00-08:00
element: tab1.txn_ref_dt
driver: analytics
# Support
support:
- channel: '#product-help' # Simple Slack communication channel
tool: slack
url: https://aidaug.slack.com/archives/C05UZRSBKLY
- channel: datacontract-ann # Simple distribution list
tool: email
url: mailto:datacontract-ann@bitol.io
- channel: Feedback # Product Feedback
description: General Product Feedback (Public)
url: https://product-feedback.com
# Tags
tags:
- transactions
# Custom properties
customProperties:
- property: refRulesetName
value: gcsc.ruleset.name
- property: somePropertyName
value: property.value
- property: dataprocClusterName # Used for specific applications like Elevate
value: [ cluster name ]
contractCreatedTs: "2022-11-15T02:59:43+00:00"