ubercrawler/index.js

81 lines
2.0 KiB
JavaScript
Raw Normal View History

2023-08-30 22:23:10 +00:00
import puppeteer from "puppeteer-core"
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms))
}
2023-08-30 23:02:26 +00:00
const settlePromises = async (promiseArray) => {
const settledArray = await Promise.allSettled(promiseArray)
let errors = settledArray.filter((resolved) => resolved.reason)
if (errors.length) {
throw errors
}
return settledArray.map((resolved) => resolved.value)
}
2023-08-30 22:23:10 +00:00
async function main() {
2023-08-30 23:24:08 +00:00
const browser = await puppeteer.launch({
headless: false,
ignoreHTTPSErrors: true,
executablePath: "/usr/bin/chromium",
})
2023-08-30 22:23:10 +00:00
try {
console.log("Opened Browser")
const page = await browser.newPage()
console.log("Opened Page")
await page.setRequestInterception(true)
let usefulRequestHeaders = {}
page.on("request", (request) => {
let url = request.url()
2023-08-30 23:02:26 +00:00
if (url.includes("getWebActivityFeed")) {
2023-08-30 22:23:10 +00:00
usefulRequestHeaders = { ...request.headers() }
2023-08-30 23:02:26 +00:00
console.log("hello!", usefulRequestHeaders)
2023-08-30 22:23:10 +00:00
}
2023-08-30 23:24:08 +00:00
if (request.isInterceptResolutionHandled()) {
return
}
if (
url.endsWith(".png") ||
url.endsWith(".jpg") ||
url.endsWith(".ico")
) {
request.abort()
} else {
console.log("request url", url)
request.continue()
}
})
page.on("response", async (response) => {
const request = response.request()
if (request.url().includes("desiredrequest.json")) {
const text = await response.text()
console.log(text)
}
2023-08-30 22:23:10 +00:00
})
2023-08-30 23:02:26 +00:00
await page.goto("https://drivers.uber.com/earnings/activities")
2023-08-30 22:23:10 +00:00
console.log("Went to Page")
2023-08-30 23:24:08 +00:00
for (let i = 0; i < 100; i++) {
2023-08-30 23:02:26 +00:00
await sleep(500)
if (usefulRequestHeaders["content-type"]) {
console.log("success")
break
}
2023-08-30 23:24:08 +00:00
if (i == 99) {
2023-08-30 23:02:26 +00:00
throw "fail"
2023-08-30 22:23:10 +00:00
}
}
console.log("after sleep loop")
} catch (err) {
2023-08-30 23:02:26 +00:00
console.error("Critical failure", err)
} finally {
await browser.close()
2023-08-30 22:23:10 +00:00
}
}
main()