POST
/
datasets
Create a new dataset
curl --request POST \
  --url https://app.getcensus.com/api/v1/datasets \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '{
  "type": "entity_resolution",
  "output_type": "merged",
  "match_rules": {
    "type": "group",
    "operator": "or",
    "rules": [
      {
        "type": "rule",
        "column": "email",
        "match_method": {
          "type": "fuzzy",
          "threshold": "med"
        }
      },
      {
        "type": "rule",
        "column": "name",
        "match_method": {
          "type": "exact"
        }
      }
    ]
  },
  "merge_rules": {
    "default_row_rules": [
      {
        "type": "string",
        "column": "email",
        "condition": {
          "type": "longest"
        }
      },
      {
        "type": "number",
        "column": "age",
        "condition": {
          "type": "highest"
        }
      }
    ],
    "column_overrides": [
      {
        "type": "row_waterfall_condition",
        "column": "name",
        "rules": [
          {
            "type": "string",
            "column": "name",
            "condition": {
              "type": "lexicographically_first"
            }
          }
        ]
      }
    ]
  },
  "source_dataset_ids": [
    123,
    456
  ],
  "schema_mapping": {
    "email": [
      {
        "column_name": "email",
        "dataset_id": 123
      },
      {
        "column_name": "Email",
        "dataset_id": 456
      }
    ],
    "name": [
      {
        "column_name": "name",
        "dataset_id": 456
      }
    ],
    "id": [
      {
        "column_name": "id",
        "dataset_id": 456
      },
      {
        "column_name": "ID",
        "dataset_id": 123
      }
    ]
  },
  "mode": {
    "type": "triggered",
    "triggers": {
      "schedule": {
        "frequency": "daily",
        "hour": 10,
        "minute": 30
      }
    }
  },
  "name": "Users",
  "description": "This dataset contains golden user records"
}'
{
  "status": "created",
  "data": {
    "type": "entity_resolution",
    "output_type": "merged",
    "match_rules": {
      "type": "group",
      "operator": "or",
      "rules": [
        {
          "type": "rule",
          "column": "email",
          "match_method": {
            "type": "fuzzy",
            "threshold": "med"
          }
        },
        {
          "type": "rule",
          "column": "name",
          "match_method": {
            "type": "exact"
          }
        }
      ]
    },
    "merge_rules": {
      "default_row_rules": [
        {
          "type": "string",
          "column": "email",
          "condition": {
            "type": "longest"
          }
        },
        {
          "type": "number",
          "column": "age",
          "condition": {
            "type": "highest"
          }
        }
      ],
      "column_overrides": [
        {
          "type": "row_waterfall_condition",
          "column": "name",
          "rules": [
            {
              "type": "string",
              "column": "name",
              "condition": {
                "type": "lexicographically_first"
              }
            }
          ]
        }
      ]
    },
    "source_dataset_ids": [
      123,
      456
    ],
    "schema_mapping": {
      "email": [
        {
          "column_name": "email",
          "dataset_id": 123
        },
        {
          "column_name": "Email",
          "dataset_id": 456
        }
      ],
      "name": [
        {
          "column_name": "name",
          "dataset_id": 456
        }
      ],
      "id": [
        {
          "column_name": "id",
          "dataset_id": 456
        },
        {
          "column_name": "ID",
          "dataset_id": 123
        }
      ]
    },
    "mode": {
      "type": "triggered",
      "triggers": {
        "schedule": {
          "frequency": "daily",
          "hour": 10,
          "minute": 30
        }
      }
    },
    "census_store": {
      "namespace": "datasets",
      "table_name": "deduplicated_users"
    },
    "status": {
      "overall_status": "Failed",
      "last_refresh": "2023-11-07T05:31:56Z",
      "sync_configurations": [
        {
          "sync_id": 123
        }
      ],
      "error": {
        "title": "Sync failed",
        "details": "The sync encountered an error while processing data."
      }
    },
    "name": "Users",
    "id": 157,
    "resource_identifier": "dataset:deduplicated_users",
    "description": "This dataset contains golden user records",
    "created_at": "2023-11-07T05:31:56Z",
    "updated_at": "2023-11-07T05:31:56Z",
    "cached_record_count": 123
  }
}

Authorizations

Authorization
string
header
required

Bearer token for the current workspace. Can be retrieved from the Workspace's settings page.

Body

application/json

Response

201
application/json

Dataset created successfully

The response is of type object.