Web crawler job

Control Web Crawler Jobs

Change state of a web crawler job to pause or resume it back to running.

POST
/web-crawler-jobs.control
AuthorizationBearer <token>

All the DevRev APIs require a token to authenticate the user. Provide Authorization: Bearer <TOKEN> as a header to every API request. How do I find my token?

In: header

actionweb-crawler-jobs-control-request-action
Value in"abort" | "pause" | "resume"
idstring

The ID of the job.

Formatid

Response Body

curl -X POST "https://api.devrev.ai/web-crawler-jobs.control" \  -H "Content-Type: application/json" \  -d '{    "action": "abort",    "id": "string"  }'
{
  "web_crawler_job": {
    "created_by": {
      "type": "dev_user",
      "display_id": "string",
      "id": "string",
      "display_name": "string",
      "display_picture": {
        "display_id": "string",
        "id": "string",
        "file": {
          "type": "string",
          "name": "string",
          "size": 0
        }
      },
      "email": "string",
      "full_name": "string",
      "state": "active"
    },
    "created_date": "2023-01-01T12:00:00.000Z",
    "display_id": "string",
    "id": "string",
    "modified_by": {
      "type": "dev_user",
      "display_id": "string",
      "id": "string",
      "display_name": "string",
      "display_picture": {
        "display_id": "string",
        "id": "string",
        "file": {
          "type": "string",
          "name": "string",
          "size": 0
        }
      },
      "email": "string",
      "full_name": "string",
      "state": "active"
    },
    "modified_date": "2023-01-01T12:00:00.000Z",
    "accept_regexs": [
      "string"
    ],
    "description": "string",
    "domain_names": [
      "string"
    ],
    "frequency": 0,
    "max_depth": 0,
    "no_parent": true,
    "notify_on_complete": true,
    "num_bytes": 0,
    "num_timeout_urls": 0,
    "num_urls_scraped": 0,
    "reject_regexs": [
      "string"
    ],
    "sitemap_index_urls": [
      "string"
    ],
    "sitemap_urls": [
      "string"
    ],
    "state": "aborted",
    "urls": [
      "string"
    ],
    "user_agent": "string"
  }
}
{
  "detail": "string",
  "message": "string",
  "type": "artifact_already_attached_to_a_parent",
  "existing_parent": "string",
  "is_same": true
}
{
  "detail": "string",
  "message": "string",
  "type": "unauthenticated"
}
{
  "detail": "string",
  "message": "string",
  "type": "forbidden"
}
{
  "detail": "string",
  "message": "string",
  "type": "not_found"
}
{
  "detail": "string",
  "message": "string",
  "type": "too_many_requests",
  "retry_after": 0
}
{
  "detail": "string",
  "message": "string",
  "type": "internal_error",
  "reference_id": "string"
}
{
  "detail": "string",
  "message": "string",
  "type": "service_unavailable"
}