123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- "use strict";
- var fs = require("fs");
- var crypto = require("crypto");
- var http = require("http");
- var https = require("https");
- function rewriteWebArchiveUrl(url) {
-
-
-
- var webArchiveRegex =
- /(^https?:\/\/web\.archive\.org\/web\/)(\d+)(\/https?:\/\/.+)/g;
- var urlParts = webArchiveRegex.exec(url);
- if (urlParts) {
- return urlParts[1] + (urlParts[2] + "if_") + urlParts[3];
- }
- return url;
- }
- function downloadFile(file, url, callback, redirects) {
- url = rewriteWebArchiveUrl(url);
- var protocol = /^https:\/\//.test(url) ? https : http;
- protocol
- .get(url, function (response) {
- if (
- response.statusCode === 301 ||
- response.statusCode === 302 ||
- response.statusCode === 307 ||
- response.statusCode === 308
- ) {
- if (redirects > 10) {
- callback("Too many redirects");
- }
- var redirectTo = response.headers.location;
- redirectTo = require("url").resolve(url, redirectTo);
- downloadFile(file, redirectTo, callback, (redirects || 0) + 1);
- return;
- }
- if (response.statusCode !== 200) {
- callback("HTTP " + response.statusCode);
- return;
- }
- var stream = fs.createWriteStream(file);
- stream.on("error", function (err) {
- callback(err);
- });
- response.pipe(stream);
- stream.on("finish", function () {
- stream.end();
- callback();
- });
- })
- .on("error", function (err) {
- callback(err);
- });
- }
- function downloadManifestFiles(manifest, callback) {
- function downloadNext() {
- if (i >= links.length) {
- callback();
- return;
- }
- var file = links[i].file;
- var url = links[i].url;
- console.log("Downloading " + url + " to " + file + "...");
- downloadFile(file, url, function (err) {
- if (err) {
- console.error("Error during downloading of " + url + ": " + err);
- fs.writeFileSync(file, "");
- fs.writeFileSync(file + ".error", err);
- }
- i++;
- downloadNext();
- });
- }
- var links = manifest
- .filter(function (item) {
- return item.link && !fs.existsSync(item.file);
- })
- .map(function (item) {
- var file = item.file;
- var linkfile = file + ".link";
- var url = fs.readFileSync(linkfile).toString();
- url = url.replace(/\s+$/, "");
- return { file, url };
- });
- var i = 0;
- downloadNext();
- }
- function calculateMD5(file, callback) {
- var hash = crypto.createHash("md5");
- var stream = fs.createReadStream(file);
- stream.on("data", function (data) {
- hash.update(data);
- });
- stream.on("error", function (err) {
- callback(err);
- });
- stream.on("end", function () {
- var result = hash.digest("hex");
- callback(null, result);
- });
- }
- function verifyManifestFiles(manifest, callback) {
- function verifyNext() {
- if (i >= manifest.length) {
- callback(error);
- return;
- }
- var item = manifest[i];
- if (fs.existsSync(item.file + ".error")) {
- console.error(
- 'WARNING: File was not downloaded. See "' + item.file + '.error" file.'
- );
- error = true;
- i++;
- verifyNext();
- return;
- }
- if (item.link && !fs.existsSync(item.file + ".link")) {
- console.error(
- `WARNING: Unneeded \`"link": true\`-entry for the "${item.id}" test.`
- );
- error = true;
- i++;
- verifyNext();
- return;
- }
- calculateMD5(item.file, function (err, md5) {
- if (err) {
- console.log('WARNING: Unable to open file for reading "' + err + '".');
- error = true;
- } else if (!item.md5) {
- console.error(
- 'WARNING: Missing md5 for file "' +
- item.file +
- '". ' +
- 'Hash for current file is "' +
- md5 +
- '"'
- );
- error = true;
- } else if (md5 !== item.md5) {
- console.error(
- 'WARNING: MD5 of file "' +
- item.file +
- '" does not match file. Expected "' +
- item.md5 +
- '" computed "' +
- md5 +
- '"'
- );
- error = true;
- }
- i++;
- verifyNext();
- });
- }
- var i = 0;
- var error = false;
- verifyNext();
- }
- exports.downloadManifestFiles = downloadManifestFiles;
- exports.verifyManifestFiles = verifyManifestFiles;
|