最新消息: 电脑我帮您提供丰富的电脑知识,编程学习,软件下载,win7系统下载。

如何在Javascript for循环中添加服务器端延迟?

IT培训 admin 6浏览 0评论

如何在Javascript for循环中添加服务器端延迟?

我正在摆弄使用Node.js从电子商务网站获取数据。我使用Request来检索页面的DOM,并使用Cheerio来执行服务器端DOM选择。

const cheerio = require('cheerio');
const request = require('request');

// takes a URL, scrapes the page, and returns an object with the data
let scrapePage = (url) => {

    return new Promise((resolve, reject) => {

        request(url, (error, resp, body) => {

            if(error){
                reject(error);
            };

            let $ = cheerio.load(body); 
            let $url = url;
            let $price = $('#rt-mainbody > div > div.details > div.Data > div:nth-child(4) > div.description').text();

            let obj = {
                url: $url,
                price: $price
            }

            resolve(obj);

        });

    });

};

// Runs scrapePage in a loop
// There is a variable called arrayOfURLs defined elsewhere that contains 100s of URLs

for( let i = 0; i < arrayOfURLs.length; i++){
    scrapePage(arrayOfURLs[i])
        .then((obj) => {
            //write to a file
        })
        .catch((error) => {
        })
};

问题是我发送请求的服务器有时会发送回空白数据,我假设是因为我发送了太多请求而没有任何停顿。由于JS的异步性质,我很难弄清楚如何在循环的每次迭代之间添加有效延迟。仅仅以同步方式添加setTimeOut是不够的,因为setTimeOut本身是异步的,我在服务器上运行它,所以没有Window对象。

编辑

上面的代码是我正在处理的简化版本。整个代码是这样的:

app.js

const fs = require('fs');
const path = 'urls.txt';
const path2 = 'results.txt';
const scraper = require('./scraper');

let scrapePage = (url) => {
    scraper.scrapePage(url)
        .then((obj) => {
            // console.log('obj from the scraper with Promises was received');
            // console.log(obj);
            // console.log('writing obj to a file');
            fs.appendFile(path2, JSON.stringify(obj) + ', ', (error) => {
                if(error){
                    console.log(error);
                } else {
                    // console.log('Successfully wrote to ' + path2);
                }
            })
        })
        .catch((error) => {
            console.log('There was an error scraping obj: ');
            console.log(error);
        })  
}

fs.readFile(path, 'utf8', (err, data) => {

  if (err){
    throw err;
  };

  var urlArray = JSON.parse(data);

  // this returns an Unexpected Identifier error    
  // const results = await Promise.all(urlArray.map(scrapePage));

  // this returns an Unexpected Token Function error
  // async function scrapePages(){
  //    const results = await Promise.all(urlArray.map(scrapePage));
  // };

});

scraper.js

const request = require('request');
const cheerio = require('cheerio');

exports.scrapePage = (url) => {
    return new Promise((resolve, reject) => {
        request(url, (error, resp, body) => {
            if(error){
                reject(error);
            };

            let $ = cheerio.load(body); 
            let $url = url;

            let $price = $('#rt-mainbody > div > div.details > div.itemData > div:nth-child(4) > div.description').text();

            let obj = {
                url: $url,
                price: $price
            }

            resolve(obj);

        })
    })
}
回答如下:

在您发送服务器响应之前,我认为您不会等待您的承诺解决。您可以使用async / await完全消除for循环,例如

const results = await Promise.all(arrayOfURLs.map(scrapePage));

如何在Javascript for循环中添加服务器端延迟?

我正在摆弄使用Node.js从电子商务网站获取数据。我使用Request来检索页面的DOM,并使用Cheerio来执行服务器端DOM选择。

const cheerio = require('cheerio');
const request = require('request');

// takes a URL, scrapes the page, and returns an object with the data
let scrapePage = (url) => {

    return new Promise((resolve, reject) => {

        request(url, (error, resp, body) => {

            if(error){
                reject(error);
            };

            let $ = cheerio.load(body); 
            let $url = url;
            let $price = $('#rt-mainbody > div > div.details > div.Data > div:nth-child(4) > div.description').text();

            let obj = {
                url: $url,
                price: $price
            }

            resolve(obj);

        });

    });

};

// Runs scrapePage in a loop
// There is a variable called arrayOfURLs defined elsewhere that contains 100s of URLs

for( let i = 0; i < arrayOfURLs.length; i++){
    scrapePage(arrayOfURLs[i])
        .then((obj) => {
            //write to a file
        })
        .catch((error) => {
        })
};

问题是我发送请求的服务器有时会发送回空白数据,我假设是因为我发送了太多请求而没有任何停顿。由于JS的异步性质,我很难弄清楚如何在循环的每次迭代之间添加有效延迟。仅仅以同步方式添加setTimeOut是不够的,因为setTimeOut本身是异步的,我在服务器上运行它,所以没有Window对象。

编辑

上面的代码是我正在处理的简化版本。整个代码是这样的:

app.js

const fs = require('fs');
const path = 'urls.txt';
const path2 = 'results.txt';
const scraper = require('./scraper');

let scrapePage = (url) => {
    scraper.scrapePage(url)
        .then((obj) => {
            // console.log('obj from the scraper with Promises was received');
            // console.log(obj);
            // console.log('writing obj to a file');
            fs.appendFile(path2, JSON.stringify(obj) + ', ', (error) => {
                if(error){
                    console.log(error);
                } else {
                    // console.log('Successfully wrote to ' + path2);
                }
            })
        })
        .catch((error) => {
            console.log('There was an error scraping obj: ');
            console.log(error);
        })  
}

fs.readFile(path, 'utf8', (err, data) => {

  if (err){
    throw err;
  };

  var urlArray = JSON.parse(data);

  // this returns an Unexpected Identifier error    
  // const results = await Promise.all(urlArray.map(scrapePage));

  // this returns an Unexpected Token Function error
  // async function scrapePages(){
  //    const results = await Promise.all(urlArray.map(scrapePage));
  // };

});

scraper.js

const request = require('request');
const cheerio = require('cheerio');

exports.scrapePage = (url) => {
    return new Promise((resolve, reject) => {
        request(url, (error, resp, body) => {
            if(error){
                reject(error);
            };

            let $ = cheerio.load(body); 
            let $url = url;

            let $price = $('#rt-mainbody > div > div.details > div.itemData > div:nth-child(4) > div.description').text();

            let obj = {
                url: $url,
                price: $price
            }

            resolve(obj);

        })
    })
}
回答如下:

在您发送服务器响应之前,我认为您不会等待您的承诺解决。您可以使用async / await完全消除for循环,例如

const results = await Promise.all(arrayOfURLs.map(scrapePage));
发布评论

评论列表 (0)

  1. 暂无评论