ubercrawler/index.js
2023-08-30 16:24:08 -07:00

81 lines
2.0 KiB
JavaScript

import puppeteer from "puppeteer-core"
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms))
}
const settlePromises = async (promiseArray) => {
const settledArray = await Promise.allSettled(promiseArray)
let errors = settledArray.filter((resolved) => resolved.reason)
if (errors.length) {
throw errors
}
return settledArray.map((resolved) => resolved.value)
}
async function main() {
const browser = await puppeteer.launch({
headless: false,
ignoreHTTPSErrors: true,
executablePath: "/usr/bin/chromium",
})
try {
console.log("Opened Browser")
const page = await browser.newPage()
console.log("Opened Page")
await page.setRequestInterception(true)
let usefulRequestHeaders = {}
page.on("request", (request) => {
let url = request.url()
if (url.includes("getWebActivityFeed")) {
usefulRequestHeaders = { ...request.headers() }
console.log("hello!", usefulRequestHeaders)
}
if (request.isInterceptResolutionHandled()) {
return
}
if (
url.endsWith(".png") ||
url.endsWith(".jpg") ||
url.endsWith(".ico")
) {
request.abort()
} else {
console.log("request url", url)
request.continue()
}
})
page.on("response", async (response) => {
const request = response.request()
if (request.url().includes("desiredrequest.json")) {
const text = await response.text()
console.log(text)
}
})
await page.goto("https://drivers.uber.com/earnings/activities")
console.log("Went to Page")
for (let i = 0; i < 100; i++) {
await sleep(500)
if (usefulRequestHeaders["content-type"]) {
console.log("success")
break
}
if (i == 99) {
throw "fail"
}
}
console.log("after sleep loop")
} catch (err) {
console.error("Critical failure", err)
} finally {
await browser.close()
}
}
main()