aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorevansiroky <evan.siroky@yahoo.com>2020-10-25 14:27:29 -0700
committerevansiroky <evan.siroky@yahoo.com>2020-10-25 14:27:29 -0700
commit62278f0d8361650f683f16a1204c3fc10cd29fab (patch)
treeff3356b82c93ae405e3931113fd865fed41c7e02
parentb0442caf358d2a813d8ae3bd2db8b5a99f2dadf8 (diff)
downloadtimezone-boundary-builder-62278f0d8361650f683f16a1204c3fc10cd29fab.tar.gz
Add ability to diff zones generated from current config against latest release
Fixes #83
-rw-r--r--CHANGELOG.md5
-rw-r--r--README.md9
-rw-r--r--index.js294
-rw-r--r--package-lock.json5
-rw-r--r--package.json1
-rw-r--r--util/featureWriterStream.js25
-rw-r--r--util/progressStats.js (renamed from progressStats.js)0
7 files changed, 257 insertions, 82 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4889400..6d2e14d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,10 +8,11 @@
### Other Changes
-* Switch command line flag processing to use the yargs library. Existing flags have changed: --no-validation and --filtered-zones have been renamed to --no_validation and --included_zones respectively. --included_zones now takes a list without quotes or commas.
-* Addition of new flags: --excluded_zones, --dist_dir, --downloads_dir, --skip_zip, --skip_shapefile. See --help and README.md for details.
+* Switch command line flag processing to use the yargs library. Existing flags have changed: ``--no-validation` and ``--filtered-zones` have been renamed to ``--skip_validation` and `--included_zones` respectively. `--included_zones` now takes a list without quotes or commas.
+* Addition of new flags: `--excluded_zones`, `--dist_dir`, `--downloads_dir`, `--skip_analyze_diffs`, `--skip_shapefile`, `--skip_zip`. See `--help` and README.md for details.
* Remove unneeded downloaded files from downloads directory before creating input data zipfile ([#82](https://github.com/evansiroky/timezone-boundary-builder/issues/82)).
* Junk directory names when zipping data for releases
+* Add ability to generate a difference of the zone boundaries between the current config and the latest release. ([#83](https://github.com/evansiroky/timezone-boundary-builder/issues/83)).
## 2020a
diff --git a/README.md b/README.md
index b582024..598bba6 100644
--- a/README.md
+++ b/README.md
@@ -75,19 +75,22 @@ node --max-old-space-size=8192 index.js --downloads_dir ./downloads2 --dist_dir
Other command line flags:
+ `--help` - show some basic usage information
- + `--no_validation` - do not validate the time zone boundaries
- + `--skip_zip` - do not zip the generated geojson files
+ + `--skip_analyze_diffs` - do not analyze differences between the current output and another version
+ `--skip_shapefile` - do not create the shapefile from the geojson file
+ + `--skip_validation` - do not validate the time zone boundaries
+ + `--skip_zip` - do not zip the generated geojson files
### What the script does
There are three config files that describe the boundary building process. The `osmBoundarySources.json` file lists all of the needed boundaries to extract via queries to the Overpass API. The `timezones.json` file lists all of the timezones and various operations to perform to build the boundaries. The `expectedZoneOverlaps.json` file lists all timezones that are allowed to overlap each other and the acceptable bounds of a particular overlap.
-The `index.js` file downloads all of the required geometries, builds the specified geometries, validates that there aren't large areas of overlap (other than those that are expected), outputs one huge geojson file, and finally zips up the geojson file using the `zip` cli and also converts the geojson to a shapefile using the `ogr2ogr` cli. The script has only been verified to run with Node.js 10 on the MacOS platform.
+The `index.js` file downloads all of the required geometries, builds the specified geometries, validates that there aren't large areas of overlap (other than those that are expected), analyzes the difference between the current output and the last release, outputs one huge geojson file, and finally zips up the geojson file using the `zip` cli and also converts the geojson to a shapefile using the `ogr2ogr` cli. The script has only been verified to run with Node.js 10 on the MacOS platform.
The code does query the publicly available Overpass API, but it self-throttles the making of requests to have a minimum of 4 seconds gap between requests. If the Overpass API throttles the download, then the gap will be increased exponentially.
+The validation and difference analysis can take a really long time to compute. If these tasks are not needed, be sure to add the `--skip_analyze_diffs` and `--skip_validation` flags.
+
As of release 2020a, it is possible to run the script with the underlying input data that was used to build the timezone geometries at the time of the release. In the release files, the `input-data.zip` will have all of the necessary input data including the downloaded files from overpass, the `timezones.json` file and the `osmBoundarySources.json` file as well.
## Limitations of this project
diff --git a/index.js b/index.js
index 52ed37b..0caf598 100644
--- a/index.js
+++ b/index.js
@@ -9,26 +9,20 @@ var helpers = require('@turf/helpers')
var multiPolygon = helpers.multiPolygon
var polygon = helpers.polygon
var asynclib = require('async')
+var https = require('follow-redirects').https
var jsts = require('jsts')
var rimraf = require('rimraf')
var overpass = require('query-overpass')
var yargs = require('yargs')
-const ProgressStats = require('./progressStats')
+const FeatureWriterStream = require('./util/featureWriterStream')
+const ProgressStats = require('./util/progressStats')
var osmBoundarySources = require('./osmBoundarySources.json')
var zoneCfg = require('./timezones.json')
var expectedZoneOverlaps = require('./expectedZoneOverlaps.json')
const argv = yargs
- .option('included_zones', {
- description: 'Include specified zones',
- type: 'array'
- })
- .option('excluded_zones', {
- description: 'Exclude specified zones',
- type: 'array'
- })
.option('downloads_dir', {
description: 'Set the download location',
default: './downloads',
@@ -39,18 +33,30 @@ const argv = yargs
default: './dist',
type: 'string'
})
- .option('no_validation', {
- description: 'Skip validation',
- type: 'boolean'
+ .option('excluded_zones', {
+ description: 'Exclude specified zones',
+ type: 'array'
})
- .option('skip_zip', {
- description: 'Skip zip creation',
+ .option('included_zones', {
+ description: 'Include specified zones',
+ type: 'array'
+ })
+ .option('skip_analyze_diffs', {
+ description: 'Skip analysis of diffs between versions',
type: 'boolean'
})
.option('skip_shapefile', {
description: 'Skip shapefile creation',
type: 'boolean'
})
+ .option('skip_validation', {
+ description: 'Skip validation',
+ type: 'boolean'
+ })
+ .option('skip_zip', {
+ description: 'Skip zip creation',
+ type: 'boolean'
+ })
.help()
.strict()
.alias('help', 'h')
@@ -104,6 +110,7 @@ var distZones = {}
var lastReleaseJSONfile
var minRequestGap = 4
var curRequestGap = 4
+const bufferDistance = 0.01
var safeMkdir = function (dirname, callback) {
fs.mkdir(dirname, function (err) {
@@ -115,7 +122,13 @@ var safeMkdir = function (dirname, callback) {
})
}
-var debugGeo = function (op, a, b, reducePrecision) {
+var debugGeo = function (
+ op,
+ a,
+ b,
+ reducePrecision,
+ bufferAfterPrecisionReduction
+) {
var result
if (reducePrecision) {
@@ -143,8 +156,23 @@ var debugGeo = function (op, a, b, reducePrecision) {
}
} catch (e) {
if (e.name === 'TopologyException') {
- console.log('Encountered TopologyException, retry with GeometryPrecisionReducer')
- return debugGeo(op, a, b, true)
+ if (reducePrecision) {
+ if (bufferAfterPrecisionReduction) {
+ console.log('Encountered TopologyException, retry with buffer increase')
+ return debugGeo(
+ op,
+ a.buffer(bufferDistance),
+ b.buffer(bufferDistance),
+ true,
+ bufferAfterPrecisionReduction
+ )
+ } else {
+ throw new Error('Encountered TopologyException after reducing precision')
+ }
+ } else {
+ console.log('Encountered TopologyException, retry with GeometryPrecisionReducer')
+ return debugGeo(op, a, b, true, bufferAfterPrecisionReduction)
+ }
}
console.log('op err')
console.log(e)
@@ -671,85 +699,195 @@ var addOceans = function (callback) {
}
var combineAndWriteZones = function (callback) {
- var stream = fs.createWriteStream(distDir + '/combined.json')
- var streamWithOceans = fs.createWriteStream(distDir + '/combined-with-oceans.json')
+ const regularWriter = new FeatureWriterStream(distDir + '/combined.json')
+ const oceanWriter = new FeatureWriterStream(distDir + '/combined-with-oceans.json')
var zones = Object.keys(zoneCfg)
- stream.write('{"type":"FeatureCollection","features":[')
- streamWithOceans.write('{"type":"FeatureCollection","features":[')
-
- for (var i = 0; i < zones.length; i++) {
- if (i > 0) {
- stream.write(',')
- streamWithOceans.write(',')
- }
- var feature = {
+ zones.forEach(zoneName => {
+ const feature = {
type: 'Feature',
- properties: { tzid: zones[i] },
- geometry: geomToGeoJson(getDistZoneGeom(zones[i]))
+ properties: { tzid: zoneName },
+ geometry: geomToGeoJson(getDistZoneGeom(zoneName))
}
const stringified = JSON.stringify(feature)
- stream.write(stringified)
- streamWithOceans.write(stringified)
- }
+ regularWriter.add(stringified)
+ oceanWriter.add(stringified)
+ })
oceanZoneBoundaries.forEach(boundary => {
- streamWithOceans.write(',')
var feature = {
type: 'Feature',
properties: { tzid: boundary.tzid },
geometry: boundary.geom
}
- streamWithOceans.write(JSON.stringify(feature))
+ oceanWriter.add(JSON.stringify(feature))
})
asynclib.parallel([
- cb => {
- stream.end(']}', cb)
- },
- cb => {
- streamWithOceans.end(']}', cb)
- }
+ cb => regularWriter.end(cb),
+ cb => oceanWriter.end(cb)
], callback)
}
-var cleanDownloadsDir = function (cb) {
- // TODO:
-
- // list all files in downloads dir
- // for each file
- // if file does not exist in osmBoundarySources.json file, then remove
- cb()
-}
-
var downloadLastRelease = function (cb) {
- // TODO:
-
// download latest release info
- // determine last release version name
- lastReleaseJSONfile = `./dist/${lastReleaseName}.json`
-
- // check if file already downloaded, if so immediately callback
- fetchIfNeeded(lastReleaseJSONfile, cb, cb, function () {
- // find download link for geojson with oceans
- // download the latest release data into the dist directory
- // unzip geojson
- cb()
- })
+ https.get(
+ {
+ headers: { 'user-agent': 'timezone-boundary-builder' },
+ host: 'api.github.com',
+ path: '/repos/evansiroky/timezone-boundary-builder/releases/latest'
+ },
+ function (res) {
+ var data = ''
+ res.on('data', function (chunk) {
+ data += chunk
+ })
+ res.on('end', function () {
+ data = JSON.parse(data)
+ // determine last release version name and download link
+ const lastReleaseName = data.name
+ lastReleaseJSONfile = `./dist/${lastReleaseName}.json`
+ let lastReleaseDownloadUrl
+ for (var i = 0; i < data.assets.length; i++) {
+ if (data.assets[i].browser_download_url.indexOf('timezones-with-oceans.geojson') > -1) {
+ lastReleaseDownloadUrl = data.assets[i].browser_download_url
+ }
+ }
+ if (!lastReleaseDownloadUrl) {
+ return cb(new Error('geojson not found'))
+ }
+
+ // check for file that got downloaded
+ fs.stat(lastReleaseJSONfile, function (err) {
+ if (!err) {
+ // file found, skip download steps
+ return cb()
+ }
+ // file not found, download
+ console.log(`Downloading latest release to ${lastReleaseJSONfile}.zip`)
+ https.get({
+ headers: { 'user-agent': 'timezone-boundary-builder' },
+ host: 'github.com',
+ path: lastReleaseDownloadUrl.replace('https://github.com', '')
+ }, function (response) {
+ var file = fs.createWriteStream(`${lastReleaseJSONfile}.zip`)
+ response.pipe(file)
+ file.on('finish', function () {
+ file.close((err) => {
+ if (err) return cb(err)
+ // unzip file
+ console.log('unzipping latest release')
+ exec(
+ `unzip -o ${lastReleaseJSONfile} -d dist`,
+ err => {
+ if (err) { return cb(err) }
+ console.log('unzipped file')
+ console.log('moving unzipped file')
+ // might need to change this after changes to how files are
+ // zipped after 2020a
+ fs.copyFile(
+ path.join(
+ 'dist',
+ 'dist',
+ 'combined-with-oceans.json'
+ ),
+ lastReleaseJSONfile,
+ cb
+ )
+ }
+ )
+ })
+ })
+ }).on('error', cb)
+ })
+ })
+ }
+ )
}
var analyzeChangesFromLastRelease = function (cb) {
- // TODO
-
// load last release data into memory
+ console.log('loading previous release into memory')
+ const lastReleaseData = require(lastReleaseJSONfile)
+
+ // load each feature's geojson into JSTS format and then organized by tzid
+ const lastReleaseZones = {}
+ lastReleaseData.features.forEach(
+ feature => {
+ lastReleaseZones[feature.properties.tzid] = feature
+ }
+ )
// generate set of keys from last release and current
+ const zoneNames = new Set()
+ Object.keys(distZones).forEach(zoneName => zoneNames.add(zoneName))
+ Object.keys(lastReleaseZones).forEach(zoneName => zoneNames.add(zoneName))
+
+ // create diff for each zone
+ const analysisProgress = new ProgressStats(
+ 'Analyzing diffs',
+ zoneNames.size
+ )
+ const additionsWriter = new FeatureWriterStream(distDir + '/additions.json')
+ const removalsWriter = new FeatureWriterStream(distDir + '/removals.json')
+ zoneNames.forEach(zoneName => {
+ analysisProgress.beginTask(zoneName, true)
+ if (distZones[zoneName] && lastReleaseZones[zoneName]) {
+ // some zones take forever to diff unless they are buffered, so buffer by
+ // just a small amount
+ const lastReleaseGeom = geoJsonToGeom(
+ lastReleaseZones[zoneName].geometry
+ ).buffer(bufferDistance)
+ const curDataGeom = getDistZoneGeom(zoneName).buffer(bufferDistance)
+
+ // don't diff equal geometries
+ if (curDataGeom.equals(lastReleaseGeom)) return
+
+ // diff current - last = additions
+ const addition = debugGeo(
+ 'diff',
+ curDataGeom,
+ lastReleaseGeom,
+ false,
+ true
+ )
+ if (addition.getArea() > 0.0001) {
+ additionsWriter.add(JSON.stringify({
+ type: 'Feature',
+ properties: { tzid: zoneName },
+ geometry: geomToGeoJson(addition)
+ }))
+ }
- // for each zone
- // diff current - last = additions
- // diff last - current = removals
+ // diff last - current = removals
+ const removal = debugGeo(
+ 'diff',
+ lastReleaseGeom,
+ curDataGeom,
+ false,
+ true
+ )
+ if (removal.getArea() > 0.0001) {
+ removalsWriter.add(JSON.stringify({
+ type: 'Feature',
+ properties: { tzid: zoneName },
+ geometry: geomToGeoJson(removal)
+ }))
+ }
+ } else if (distZones[zoneName]) {
+ additionsWriter.add(JSON.stringify({
+ type: 'Feature',
+ properties: { tzid: zoneName },
+ geometry: geomToGeoJson(getDistZoneGeom(zoneName))
+ }))
+ } else {
+ removalsWriter.add(JSON.stringify(lastReleaseZones[zoneName]))
+ }
+ })
- // write file of additions
- // write file of removals
- cb()
+ // write files
+ asynclib.parallel([
+ wcb => additionsWriter.end(wcb),
+ wcb => removalsWriter.end(wcb)
+ ], cb)
}
const autoScript = {
@@ -788,11 +926,12 @@ const autoScript = {
'/* timezones.json osmBoundarySources.json expectedZoneOverlaps.json', cb)
}],
downloadLastRelease: ['makeDistDir', function (results, cb) {
- if (process.argv.indexOf('analyze-changes') > -1) {
+ if (argv.skip_analyze_diffs) {
+ overallProgress.beginTask('WARNING: Skipping download of last release for analysis!')
+ cb()
+ } else {
overallProgress.beginTask('Downloading last release for analysis')
downloadLastRelease(cb)
- } else {
- overallProgress.beginTask('WARNING: Skipping download of last release for analysis!')
}
}],
createZones: ['makeDistDir', 'getOsmBoundaries', function (results, cb) {
@@ -802,7 +941,7 @@ const autoScript = {
validateZones: ['createZones', function (results, cb) {
overallProgress.beginTask('Validating timezone boundaries')
loadDistZonesIntoMemory()
- if (argv.no_validation) {
+ if (argv.skip_validation) {
console.warn('WARNING: Skipping validation!')
cb()
} else {
@@ -894,11 +1033,12 @@ const autoScript = {
)
},
analyzeChangesFromLastRelease: ['downloadLastRelease', 'mergeZones', function (results, cb) {
- if (process.argv.indexOf('analyze-changes') > -1) {
+ if (argv.skip_analyze_diffs) {
+ overallProgress.beginTask('WARNING: Skipping analysis of changes from last release!')
+ cb()
+ } else {
overallProgress.beginTask('Analyzing changes from last release')
analyzeChangesFromLastRelease(cb)
- } else {
- overallProgress.beginTask('WARNING: Skipping analysis of changes from last release!')
}
}]
}
diff --git a/package-lock.json b/package-lock.json
index d5c0e59..c8d64e1 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1082,6 +1082,11 @@
"integrity": "sha512-r5wGx7YeOwNWNlCA0wQ86zKyDLMQr+/RB8xy74M4hTphfmjlijTSSXGuH8rnvKZnfT9i+75zmd8jcKdMR4O6jA==",
"dev": true
},
+ "follow-redirects": {
+ "version": "1.13.0",
+ "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.13.0.tgz",
+ "integrity": "sha512-aq6gF1BEKje4a9i9+5jimNFIpq4Q1WiwBToeRK5NvZBd/TRsmW8BsJfOEGkr76TbOyPVD3OVDN910EcUNtRYEA=="
+ },
"forever-agent": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
diff --git a/package.json b/package.json
index 37adbb9..a83ada9 100644
--- a/package.json
+++ b/package.json
@@ -25,6 +25,7 @@
"@turf/bbox": "^6.0.1",
"@turf/helpers": "^6.1.4",
"async": "^3.2.0",
+ "follow-redirects": "^1.13.0",
"jsts": "^2.5.1",
"query-overpass": "^1.5.5",
"rimraf": "^3.0.2",
diff --git a/util/featureWriterStream.js b/util/featureWriterStream.js
new file mode 100644
index 0000000..7f0e7b8
--- /dev/null
+++ b/util/featureWriterStream.js
@@ -0,0 +1,25 @@
+const fs = require('fs')
+
+class FeatureWriterStream {
+ constructor (file) {
+ this.file = file
+ this.stream = fs.createWriteStream(file)
+ this.stream.write('{"type":"FeatureCollection","features":[')
+ this.numFeatures = 0
+ }
+
+ add (stringifiedFeature) {
+ if (this.numFeatures > 0) {
+ this.stream.write(',')
+ }
+ this.stream.write(stringifiedFeature)
+ this.numFeatures++
+ }
+
+ end (cb) {
+ console.log(`Closing out file ${this.file}`)
+ this.stream.end(']}', cb)
+ }
+}
+
+module.exports = FeatureWriterStream
diff --git a/progressStats.js b/util/progressStats.js
index f9794ed..f9794ed 100644
--- a/progressStats.js
+++ b/util/progressStats.js