diff --git a/core/src/main/scala/HFileStorageService.scala b/core/src/main/scala/HFileStorageService.scala index c2c974781..5d1417da5 100644 --- a/core/src/main/scala/HFileStorageService.scala +++ b/core/src/main/scala/HFileStorageService.scala @@ -117,7 +117,7 @@ class HFileStorageService(originalBasepath: String, shouldPreload: Boolean) exte try { (StoredFeatureId.fromLegacyObjectId(new ObjectId(parts(0))).get, parts(1).toInt) } catch { - case _: Exception => throw new Exception("malformed boost line: %s --> %s".format(l, parts.toList)) + case _ => throw new Exception("malformed boost line: %s --> %s".format(l, parts.toList)) } }).toMap } else { diff --git a/eval.py b/eval.py index 09e09d6cc..da59f3172 100755 --- a/eval.py +++ b/eval.py @@ -217,8 +217,6 @@ def evallog(title, old = None, new = None): geomB = interpB['feature']['geometry'] centerA = geomA['center'] centerB = geomB['center'] - scoresA = interpA.get('scores', defaultdict) - scoresB = interpB.get('scores', defaultdict) distance = earthDistance( centerA['lat'], centerA['lng'], diff --git a/indexer/src/main/scala/OutputHFile.scala b/indexer/src/main/scala/OutputHFile.scala index 1594e9687..0fbc9f73e 100644 --- a/indexer/src/main/scala/OutputHFile.scala +++ b/indexer/src/main/scala/OutputHFile.scala @@ -340,20 +340,14 @@ class FeatureIndexer(override val basepath: String, override val fidMap: FidMap) ) ) - val geom = - if (fullFeature.feature.geometryOption.flatMap(_.wkbGeometryOption).isDefined) { - fullFeature.feature.geometryOption.map(_.wkbGeometryByteArray) - } else { - None - } - makeGeocodeServingFeature(partialFeature, geom) + makeGeocodeServingFeature(partialFeature) } def makeGeocodeRecord(g: GeocodeRecord) = { makeGeocodeServingFeature(g.toGeocodeServingFeature()) } - def makeGeocodeServingFeature(f: GeocodeServingFeature, polygon: Option[Array[Byte]] = None) = { + def makeGeocodeServingFeature(f: GeocodeServingFeature) = { val parents = for { parentLongId <- f.scoringFeatures.parentIds parentFid <- StoredFeatureId.fromLong(parentLongId) @@ -362,20 +356,9 @@ class FeatureIndexer(override val basepath: String, override val fidMap: FidMap) parentFid } - val wkbReader = new WKBReader() - - val featureGeom = if (f.feature.geometryOption.flatMap(_.wkbGeometryOption).isDefined) { - f.feature.geometryOption.map(_.wkbGeometryByteArray) - } else { - None - } - - val area = polygon.orElse(featureGeom).map(p => wkbReader.read(p).getArea()) - - val scoringFeatures = f.scoringFeatures.mutableCopy - scoringFeatures.parentIds = parents.map(_.longId) - area.foreach(a => scoringFeatures.areaInDegrees = a) - f.copy(scoringFeatures = scoringFeatures) + f.copy( + scoringFeatures = f.scoringFeatures.copy(parentIds = parents.map(_.longId)) + ) } def writeFeatures() { @@ -450,11 +433,9 @@ class RevGeoIndexer(override val basepath: String, override val fidMap: FidMap) ) } - val totalArea = geom.getArea() - logDuration("clipped and outputted cover for %d cells (%s)".format(cells.size, record.featureId)) { val recordShape = geom.buffer(0) - val preparedRecordShape = PreparedGeometryFactory.prepare(recordShape) + val preparedRecordShape = PreparedGeometryFactory.prepare(recordShape) cells.foreach( (cellid: S2CellId) => { val bucket = s2map.getOrElseUpdate(cellid.id, new ListBuffer[CellGeometry]()) @@ -466,6 +447,7 @@ class RevGeoIndexer(override val basepath: String, override val fidMap: FidMap) cellGeometryBuilder.wkbGeometry(ByteBuffer.wrap(wkbWriter.write(s2shape.intersection(recordShape)))) } cellGeometryBuilder.woeType(record.woeType) + cellGeometryBuilder.oid(ByteBuffer.wrap(record.featureId.legacyObjectId.toByteArray())) cellGeometryBuilder.longId(record._id) bucket += cellGeometryBuilder.result } diff --git a/interface/src/main/thrift/geocoder.thrift b/interface/src/main/thrift/geocoder.thrift index 011e760b1..193db3f96 100644 --- a/interface/src/main/thrift/geocoder.thrift +++ b/interface/src/main/thrift/geocoder.thrift @@ -159,7 +159,7 @@ struct ScoringFeatures { 5: optional bool canGeocode = 1 7: optional bool hasPoly = 0 8: optional list extraRelationIds = [], - 9: optional double areaInDegrees = 0.0 + 3: optional list DEPRECATED_parents = [], } @@ -349,7 +349,7 @@ struct BulkReverseGeocodeResponse { // this list will be the same size as the input array of latlngs // intepretationIndexes[0] is a list of indexes into interpretations that represent the // reverse geocodes of latlng[0] - // interpretationIndexes[1] are the interpretations that revgeo latlng[1] + // interpretationIndexes[1] are the interpretations that revgeo latlng[1] // latlngs that had no revgeo matches will have an empty array in the corresponding position // ... and so on 4: list> interpretationIndexes @@ -372,7 +372,7 @@ struct BulkSlugLookupResponse { // this list will be the same size as the input array of slugs // intepretationIndexes[0] is a list of indexes into interpretations that represent the // lookup of slugs[0] - // interpretationIndexes[1] are the interpretations that map to slugs[1] + // interpretationIndexes[1] are the interpretations that map to slugs[1] // ... and so on 2: list> interpretationIndexes diff --git a/interface/src/main/thrift/side_tables.thrift b/interface/src/main/thrift/side_tables.thrift index 1550c7423..a9c14bcdd 100644 --- a/interface/src/main/thrift/side_tables.thrift +++ b/interface/src/main/thrift/side_tables.thrift @@ -3,8 +3,9 @@ namespace java com.foursquare.twofishes include "geocoder.thrift" struct CellGeometry { - 2: optional binary wkbGeometry - 3: optional geocoder.YahooWoeType woeType + 1: optional binary oid, + 2: optional binary wkbGeometry, + 3: optional geocoder.YahooWoeType woeType, 4: optional bool full 5: optional i64 longId } diff --git a/server/src/main/scala/GeocodeRequestUtils.scala b/server/src/main/scala/GeocodeRequestUtils.scala index 01e97272e..aab1d5c55 100644 --- a/server/src/main/scala/GeocodeRequestUtils.scala +++ b/server/src/main/scala/GeocodeRequestUtils.scala @@ -14,6 +14,7 @@ object GeocodeRequestUtils { def shouldFetchPolygon(req: CommonGeocodeRequestParams) = responseIncludes(req, ResponseIncludes.WKB_GEOMETRY) || responseIncludes(req, ResponseIncludes.WKT_GEOMETRY) || + responseIncludes(req, ResponseIncludes.REVGEO_COVERAGE) || responseIncludes(req, ResponseIncludes.WKB_GEOMETRY_SIMPLIFIED) || responseIncludes(req, ResponseIncludes.WKT_GEOMETRY_SIMPLIFIED) diff --git a/server/src/main/scala/ReverseGeocoderImpl.scala b/server/src/main/scala/ReverseGeocoderImpl.scala index c27dd8ac5..3137fdbde 100644 --- a/server/src/main/scala/ReverseGeocoderImpl.scala +++ b/server/src/main/scala/ReverseGeocoderImpl.scala @@ -1,11 +1,9 @@ // Copyright 2012 Foursquare Labs Inc. All Rights Reserved package com.foursquare.twofishes -import com.foursquare.geo.shapes.ShapefileS2Util import com.foursquare.twofishes.Identity._ import com.foursquare.twofishes.util.{GeoTools, GeometryUtils, StoredFeatureId, TwofishesLogger} import com.foursquare.twofishes.util.Lists.Implicits._ -import com.google.common.geometry.S2CellId import com.twitter.ostrich.stats.Stats import com.twitter.util.Duration import com.vividsolutions.jts.geom.{Coordinate, Geometry, GeometryFactory, Point => JTSPoint} @@ -14,11 +12,9 @@ import com.vividsolutions.jts.util.GeometricShapeFactory import com.weiglewilczek.slf4s.Logging import org.apache.thrift.TBaseHelper import org.bson.types.ObjectId -import scala.collection.mutable.HashMap import scala.collection.mutable.ListBuffer import scalaj.collection.Implicits._ - class ReverseGeocodeParseOrdering extends Ordering[Parse[Sorted]] { def compare(a: Parse[Sorted], b: Parse[Sorted]): Int = { val comparisonOpt = for { @@ -56,47 +52,37 @@ trait TimeResponseHelper { } } -case class CellGeometryWrapper(cell: CellGeometry, cellid: Long) { - def wkbGeometryOption = cell.wkbGeometryOption - def woeTypeOption = cell.woeTypeOption - def full = cell.full - def longIdOption = cell.longIdOption - lazy val s2cellid = new S2CellId(cellid) - lazy val geomOption: Option[Geometry] = { - val wkbReader = new WKBReader() - if (full) { - Some(ShapefileS2Util.fullGeometryForCell(s2cellid)) - } else { - wkbGeometryOption.map(wkbGeometry => wkbReader.read(TBaseHelper.byteBufferToByteArray(wkbGeometry))) - } - } -} - class ReverseGeocoderHelperImpl( store: GeocodeStorageReadService, req: CommonGeocodeRequestParams, queryLogger: MemoryLogger ) extends GeocoderImplTypes with TimeResponseHelper with BulkImplHelpers with Logging { - def computeOverlapArea( - featureGeometries: Seq[CellGeometryWrapper], + def featureGeometryIntersections(wkbGeometry: Array[Byte], otherGeom: Geometry) = { + val wkbReader = new WKBReader() + val geom = wkbReader.read(wkbGeometry) + try { + (geom, geom.intersects(otherGeom)) + } catch { + case e => + Stats.addMetric("intersects_exception", 1) + logger.error("failed to calculate intersection: %s".format(otherGeom), e) + (geom, false) + } + } + + def computeCoverage( + featureGeometry: Geometry, requestGeometry: Geometry ): Double = { - // there's still some redundant work here -- we've already determined if our request geom doesn't touch - // the geometry in some of the cells when we computed findMatches. - val areas = for { - cellGeometry <- featureGeometries - geom <- cellGeometry.geomOption - } yield { - try { - geom.intersection(requestGeometry).getArea() - } catch { - case e: Exception => - Stats.addMetric("intersection_exception", 1) - logger.error("failed to calculate intersection: %s x %s".format(geom, requestGeometry), e) - 0.0 - } + try { + val intersection = featureGeometry.intersection(requestGeometry) + math.min(1, intersection.getArea() / requestGeometry.getArea()) + } catch { + case e => + Stats.addMetric("intersection_exception", 1) + logger.error("failed to calculate intersection: %s x %s".format(featureGeometry, requestGeometry), e) + 0.0 } - areas.sum } def responseIncludes(include: ResponseIncludes): Boolean = @@ -126,7 +112,7 @@ class ReverseGeocoderHelperImpl( def findMatches( otherGeom: Geometry, - cellGeometries: Seq[CellGeometryWrapper] + cellGeometries: Seq[CellGeometry] ): Seq[StoredFeatureId] = { if (req.debug > 0) { queryLogger.ifDebug("had %d candidates", cellGeometries.size) @@ -138,25 +124,18 @@ class ReverseGeocoderHelperImpl( for { cellGeometry <- cellGeometries if (req.woeRestrict.isEmpty || cellGeometry.woeTypeOption.exists(req.woeRestrict.has)) - longId <- cellGeometry.longIdOption - fid <- StoredFeatureId.fromLong(longId) + oid <- cellGeometry.oidOption.map(bb => new ObjectId(TBaseHelper.byteBufferToByteArray(bb))) + fid <- StoredFeatureId.fromLegacyObjectId(oid) } yield { if (!matches.has(fid)) { if (cellGeometry.full) { queryLogger.ifDebug("was full: %s", fid) matches.append(fid) } else { - cellGeometry.geomOption match { - case Some(geom) => - val intersects = queryLogger.logDuration("intersectionCheck", "intersectionCheck") { - try { - geom.intersects(otherGeom) - } catch { - case e: Exception => - Stats.addMetric("intersects_exception", 1) - logger.error("failed to calculate intersection: %s".format(otherGeom), e) - false - } + cellGeometry.wkbGeometryOption match { + case Some(wkbGeometry) => + val (geom, intersects) = queryLogger.logDuration("intersectionCheck", "intersectionCheck") { + featureGeometryIntersections(TBaseHelper.byteBufferToByteArray(wkbGeometry), otherGeom) } if (intersects) { matches.append(fid) @@ -172,25 +151,17 @@ class ReverseGeocoderHelperImpl( def doBulkReverseGeocode(otherGeoms: Seq[Geometry]): (Seq[Seq[Int]], Seq[GeocodeInterpretation], Seq[GeocodeFeature]) = { - // For each incoming geometry, get its complete list of s2 cells val geomIndexToCellIdMap: Map[Int, Seq[Long]] = (for { (g, index) <- otherGeoms.zipWithIndex } yield { index -> s2CoverGeometry(g) }).toMap - // Map of cellid -> Seq[CellGeometry] - val cellGeometryMap: Map[Long, Seq[CellGeometryWrapper]] = + val cellGeometryMap: Map[Long, Seq[CellGeometry]] = (for { cellid: Long <- geomIndexToCellIdMap.values.flatten.toSet } yield { - cellid -> store.getByS2CellId(cellid).map(cell => CellGeometryWrapper(cell, cellid)) + cellid -> store.getByS2CellId(cellid) }).toMap - // map from FeatureId -> Seq[candidate cells+CellGeometry] - val featureIdToCellGeometryMap: Map[StoredFeatureId, Seq[CellGeometryWrapper]] = { - cellGeometryMap.toList.flatMap({case (cellid, geometries) => geometries }) - .groupBy(cell => StoredFeatureId.fromLong(cell.longIdOption.getOrElse(0)).get) - } - val geomToMatches = (for { (otherGeom, index) <- otherGeoms.zipWithIndex } yield { @@ -203,6 +174,7 @@ class ReverseGeocoderHelperImpl( val matchedIds = geomToMatches.flatMap(_._2).toSet.toList + // need to get polygons if we need to calculate coverage val polygonMap: Map[StoredFeatureId, Geometry] = ( if (GeocodeRequestUtils.shouldFetchPolygon(req)) { store.getPolygonByFeatureIds(matchedIds) @@ -217,6 +189,10 @@ class ReverseGeocoderHelperImpl( val parsesAndOtherGeomToFids: Seq[(SortedParseSeq, (Geometry, Seq[StoredFeatureId]))] = (for { ((otherGeom, featureIds), index) <- geomToMatches.zipWithIndex } yield { + val cellGeometries = geomIndexToCellIdMap(index).flatMap(cellid => cellGeometryMap(cellid)) + + val featureIds = findMatches(otherGeom, cellGeometries) + val servingFeaturesMap: Map[StoredFeatureId, GeocodeServingFeature] = store.getByFeatureIds(featureIds.toSet.toList) @@ -226,26 +202,14 @@ class ReverseGeocoderHelperImpl( f <- servingFeaturesMap.get(fid) } yield { val parse = Parse[Sorted](List(FeatureMatch(0, 0, "", f))) - if (responseIncludes(ResponseIncludes.REVGEO_COVERAGE)) { - for { - cellGeoms <- featureIdToCellGeometryMap.get(fid) - totalArea <- f.scoringFeaturesOption.flatMap(_.areaInDegreesOption) - // request overlap for now only makes sense for non-bulk lookups. - // otherwise, overlap of which request is it describing? - if otherGeoms.size == 1 - geom <- otherGeoms.headOption - } { - queryLogger.logDuration("intersectionCheck", "intersectionCheck") { - val overlapArea = computeOverlapArea(cellGeoms, geom) - val requestArea = geom.getArea() - - // coverage is undefined when the request is a point - if (requestArea > 0) { - parse.scoringFeatures.percentOfRequestCovered(math.min(1, overlapArea / requestArea)) - parse.scoringFeatures.percentOfFeatureCovered(math.min(1, overlapArea / totalArea)) - } + if (responseIncludes(ResponseIncludes.REVGEO_COVERAGE) && + otherGeom.getNumPoints > 2) { + polygonMap.get(fid).foreach(geom => { + if (geom.getNumPoints > 2) { + parse.scoringFeatures.percentOfRequestCovered(computeCoverage(geom, otherGeom)) + parse.scoringFeatures.percentOfFeatureCovered(computeCoverage(otherGeom, geom)) } - } + }) } parse } diff --git a/server/src/test/scala/GeocoderTest.scala b/server/src/test/scala/GeocoderTest.scala index bf78131eb..13ee84da9 100644 --- a/server/src/test/scala/GeocoderTest.scala +++ b/server/src/test/scala/GeocoderTest.scala @@ -72,6 +72,7 @@ class MockGeocodeStorageReadService extends GeocodeStorageReadService { ByteBuffer.wrap(wkbWriter.write(s2shape.intersection(recordShape)))) } cellGeometryBuilder.woeType(woeType) + cellGeometryBuilder.oid(ByteBuffer.wrap(id.legacyObjectId.toByteArray())) cellGeometryBuilder.longId(id.longId) bucket += cellGeometryBuilder.result })