Introduction

I got my seo backlink work done from a freelancer. It was like 3000 links, and usually the links that freelancer provides are broken. So, I wanted to really test each single of them to check if those URLs are actually active and having my url ot backlink.

NodeJs automation

I wrote a simple nodejs automation which read list of urls from a text file, and one by one check the validity of url and backlink.

Input

  1. A text file having list of urls
  2. My website name: xyz.com

Code

Following is the directory structure:

project
    - app.js
    - src/http/url_checker.js
    - package.json

package.json

{
  "name": "check_links_seo",
  "version": "1.0.0",
  "description": "For checking link validity work given by freelancers",
  "main": "app.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "Gorav Singal",
  "license": "ISC",
  "dependencies": {
    "async": "^3.2.0",
    "cheerio": "^1.0.0-rc.3",
    "request": "^2.88.2",
    "request-promise": "^4.2.5"
  }
}

app.js

const urlChecker = require('./src/http/url_checker');
const fs = require('fs');

const urls = fs.readFileSync('urls.txt').toString().split('\n');

//remember to put your website here
const myWeb = 'XYZ.com';

return urlChecker.checkYourLinkInUrls(urls, myWeb)
    .then(() => {
        console.log('Successful finished...');
    })
    .catch(err => {
        console.error(err);
    });

url_checker.js

const rp = require('request-promise');
const cheerio = require('cheerio');
const async = require('async');

class UrlChecker {
    checkYourLinkInUrls(urls, desiredWebsite) {
        return new Promise((resolve, reject) => {
            async.eachLimit(urls, 1, (url, callback) => {
                return this.__checkYourLinkInUrl(url, desiredWebsite)
                    .then(function (res) {
                        if (!res) {
                            console.log('failed', url);
                        }
                        else {
                            console.log('success', url);
                        }
                        callback();
                    }).catch(function (err) {
                        callback(err);
                    });
            }, function (err) {
                if (err) {
                    reject(err);
                } else {
                    resolve();
                }
            });
        });
    }

    __checkYourLinkInUrl(url, desiredWebsite) {
        // console.log('Checking url: ', url);
        return rp(url)
            .then(html => {
                return html.indexOf(desiredWebsite) > -1;
                // const $ = cheerio.load(html);
                // const links = $('a');

                // let found = false;
                // $(links).each(function(i, link){
                //     const web = $(link).attr('href');
                //     console.log(web);
                //     // console.log($(link).text() + ':\n  ' + $(link).attr('href'));
                //     if (web.startsWith(desiredWebsite)) {
                //         found = true;
                //         return found;
                //     }
                // });
                // // console.log($(links));
                // return found;
            })
            .catch(err => {
                // console.error('Error in url', url, err);
                return false;
            });
    }
}

module.exports = new UrlChecker();

Note: In above code, I’m just checking whether given web page is having my website or not. And in commented code, I’ve also checked for actual links. But, this code is bit expensive in computation as well as memory.

Run code

node app.js

Thanks for reading…