Skip to content

[components] Scrapeless - update actions #17493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 32 additions & 29 deletions components/scrapeless/actions/crawler/crawler.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export default {
key: "scrapeless-crawler",
name: "Crawler",
description: "Crawl any website at scale and say goodbye to blocks. [See the documentation](https://apidocs.scrapeless.com/api-17509010).",
version: "0.0.2",
version: "0.0.3",
type: "action",
props: {
scrapeless,
Expand All @@ -26,9 +26,34 @@ export default {
reloadProps: true,
},
},
additionalProps() {
const props = {
url: {
type: "string",
label: "URL to Crawl",
description: "If you want to crawl in batches, please refer to the SDK of the document",
},
};

if (this.apiServer === "crawl") {
return {
...props,
limitCrawlPages: {
type: "integer",
label: "Number Of Subpages",
default: 5,
description: "Max number of results to return",
},
};
}

return props;
},
async run({ $ }) {
const {
scrapeless, apiServer, ...inputProps
scrapeless,
apiServer,
...inputProps
} = this;

const browserOptions = {
Expand All @@ -40,50 +65,28 @@ export default {

let response;

const client = await scrapeless._scrapelessClient();

if (apiServer === "crawl") {
response =
await scrapeless._scrapelessClient().scrapingCrawl.crawl.crawlUrl(inputProps.url, {
await client.scrapingCrawl.crawl.crawlUrl(inputProps.url, {
limit: inputProps.limitCrawlPages,
browserOptions,
});
}

if (apiServer === "scrape") {
response =
await scrapeless._scrapelessClient().scrapingCrawl.scrape.scrapeUrl(inputProps.url, {
await client.scrapingCrawl.scrape.scrapeUrl(inputProps.url, {
browserOptions,
});
}

if (response?.status === "completed" && response?.data) {
$.export("$summary", `Successfully retrieved crawling results for ${inputProps.url}`);
$.export("$summary", `Successfully retrieved crawling results for \`${inputProps.url}\``);
return response.data;
} else {
throw new Error(response?.error || "Failed to retrieve crawling results");
}
},
additionalProps() {
const { apiServer } = this;

const props = {};

if (apiServer === "crawl" || apiServer === "scrape") {
props.url = {
type: "string",
label: "URL to Crawl",
description: "If you want to crawl in batches, please refer to the SDK of the document",
};
}

if (apiServer === "crawl") {
props.limitCrawlPages = {
type: "integer",
label: "Number Of Subpages",
default: 5,
description: "Max number of results to return",
};
}

return props;
},
};
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export default {
key: "scrapeless-get-scrape-result",
name: "Get Scrape Result",
description: "Retrieve the result of a completed scraping job. [See the documentation](https://apidocs.scrapeless.com/api-11949853)",
version: "0.0.2",
version: "0.0.3",
type: "action",
props: {
scrapeless,
Expand Down
Loading
Loading