*.iml
.idea/
target/
+db/
<version>1.0</version>
<dependencies>
<dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <version>1.7.21</version>
+ </dependency>
+ <dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>1.4.192</version>
<target>1.8</target>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>2.4.3</version>
+ <executions>
+ <execution>
+ <id>exec-jar</id>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <shadedArtifactAttached>true</shadedArtifactAttached>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>com.viseo.xerox.elastic.Main</mainClass>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
</project>
\ No newline at end of file
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
+import lombok.Getter;
+import lombok.Setter;
import java.io.IOException;
import java.io.StringWriter;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
/**
* @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/search-uri-request.html">search-uri-request</a>
private JsonFactory factory;
private ObjectWriter writer;
+ @Getter
+ @Setter
+ private SearchMode searchMode;
+ @Getter
+ @Setter
+ private boolean fuzziness;
public ESJsonConfig() {
factory = new JsonFactory();
writer = new ObjectMapper(factory).writer();
+ searchMode = SearchMode.BY_SCORE;
+ fuzziness = false;
}
public String getJsonIndex() throws IOException {
jg.writeStringField("tokenizer","standard");
jg.writeArrayFieldStart("filter");
jg.writeString("trim");
- jg.writeString("french_elision");
jg.writeString("lowercase");
+ jg.writeString("french_elision");
jg.writeString("asciifolding");
jg.writeString("synonym");
jg.writeString("french_stop");
// @formatter:on
}
+ public String getJsonMetadataForIndexing(String indexName, String typeName, Long id) throws IOException {
+ StringWriter stringWriter = new StringWriter();
+ JsonGenerator jg = factory.createGenerator(stringWriter);
+ // @formatter:off
+ jg.writeStartObject();
+ jg.writeObjectFieldStart("index");
+ jg.writeStringField("_index", indexName);
+ jg.writeStringField("_type", typeName);
+ jg.writeStringField("_id", id.toString());
+ jg.writeEndObject();
+ jg.writeEndObject();
+ // @formatter:on
+ jg.close();
+ return stringWriter.toString();
+ }
+
public String getJsonForIndexing(final Point point) throws IOException {
StringWriter stringWriter = new StringWriter();
writer.writeValue(stringWriter, point);
public String getJsonQuery(String queryString, double latitude, double longitude) throws IOException {
StringWriter stringWriter = new StringWriter();
JsonGenerator jg = factory.createGenerator(stringWriter);
- // @formatter:off
- jg.writeStartObject();
- jg.writeObjectFieldStart("query");
- jg.writeObjectFieldStart("multi_match");
- jg.writeStringField("type", "best_fields");
- jg.writeStringField("query", queryString);
- jg.writeArrayFieldStart("fields");
- jg.writeString("name");
- jg.writeString("name.french");
- jg.writeString("town");
- jg.writeString("town.french");
+ switch (searchMode) {
+ case BY_SCORE:
+ // @formatter:off
+ jg.writeStartObject();
+ jg.writeObjectFieldStart("query");
+ jg.writeObjectFieldStart("multi_match");
+ writeQueryOptions(jg, queryString);
+ writeSearchFields(jg);
+ jg.writeEndObject();
+ jg.writeEndObject();
+ jg.writeArrayFieldStart("sort");
+ jg.writeString("_score");
+ // this will not sort by geo distance but will compute it nevertheless
+ writeJsonGeoSorting(latitude, longitude, jg);
jg.writeEndArray();
- jg.writeStringField("fuzziness", "AUTO");
+ jg.writeBooleanField("track_scores", true);
jg.writeEndObject();
- jg.writeEndObject();
- jg.writeArrayFieldStart("sort");
+ // @formatter:on
+ break;
+ case BY_PROXIMITY:
+ // @formatter:off
jg.writeStartObject();
- jg.writeObjectFieldStart("_geo_distance");
- jg.writeObjectFieldStart("location");
- jg.writeNumberField("lat", latitude);
- jg.writeNumberField("lon", longitude);
+ jg.writeObjectFieldStart("query");
+ jg.writeObjectFieldStart("multi_match");
+ writeQueryOptions(jg, queryString);
+ writeSearchFields(jg);
jg.writeEndObject();
- jg.writeStringField("unit", "km");
jg.writeEndObject();
+ jg.writeArrayFieldStart("sort");
+ writeJsonGeoSorting(latitude, longitude, jg);
+ jg.writeEndArray();
+ jg.writeBooleanField("track_scores", true);
jg.writeEndObject();
+ // @formatter:on
+ break;
+ case BY_SCORE_AND_PROXIMITY:
+ // @formatter:off
jg.writeStartObject();
- jg.writeObjectFieldStart("_type");
- jg.writeStringField("order", "asc"); // InterrestPoint before MobilityPoint
+ jg.writeObjectFieldStart("query");
+ jg.writeObjectFieldStart("function_score");
+ jg.writeObjectFieldStart("query");
+ jg.writeObjectFieldStart("multi_match");
+ writeQueryOptions(jg, queryString);
+ writeSearchFields(jg);
+ jg.writeEndObject();
+ jg.writeEndObject();
+ jg.writeArrayFieldStart("functions");
+ jg.writeStartObject();
+ writeExpDistanceDecayScore(jg, latitude, longitude, 10, 50, 0.33 );
+ jg.writeEndObject();
+ jg.writeEndArray();
+ jg.writeStringField("boost_mode", "multiply");
+ jg.writeEndObject();
jg.writeEndObject();
+ jg.writeArrayFieldStart("sort");
+ jg.writeString("_score");
+ // this will not sort by geo distance but will output it
+ writeJsonGeoSorting(latitude, longitude, jg);
+ jg.writeEndArray();
+ jg.writeBooleanField("track_scores", true);
jg.writeEndObject();
- jg.writeEndArray();
- jg.writeBooleanField("track_scores", true);
- jg.writeEndObject();
- // @formatter:on
+ // @formatter:on
+ break;
+ }
jg.close();
return stringWriter.toString();
}
+ private void writeQueryOptions(JsonGenerator jg, String queryString) throws IOException {
+ jg.writeStringField("query", queryString);
+ jg.writeStringField("type", fuzziness ? "best_fields" : "cross_fields");
+ jg.writeStringField("operator", "and");
+ jg.writeStringField("zero_terms_query", "all");
+ if (fuzziness) {
+ jg.writeStringField("fuzziness", "AUTO");
+ }
+ }
+
+ private void writeSearchFields(JsonGenerator jg) throws IOException {
+ jg.writeArrayFieldStart("fields");
+ jg.writeString("name^3");
+ jg.writeString("name.french^3");
+ jg.writeString("town");
+ jg.writeString("town.french");
+ jg.writeEndArray();
+ }
+
+ private void writeJsonGeoSorting(Double latitude, Double longitude, JsonGenerator jg) throws IOException {
+ // @formatter:off
+ jg.writeStartObject();
+ jg.writeObjectFieldStart("_geo_distance");
+ jg.writeObjectFieldStart("location");
+ jg.writeNumberField("lat", latitude);
+ jg.writeNumberField("lon", longitude);
+ jg.writeEndObject();
+ jg.writeStringField("unit", "km");
+ jg.writeEndObject();
+ jg.writeEndObject();
+ // @formatter:on
+ }
+
+ private void writeExpDistanceDecayScore(JsonGenerator jg, double latitude, double longitude, int offsetInKm, int scaleInKm, double decay) throws IOException {
+ // @formatter:off
+ jg.writeObjectFieldStart("exp");
+ jg.writeObjectFieldStart("location");
+ jg.writeObjectFieldStart("origin");
+ jg.writeNumberField("lat", latitude);
+ jg.writeNumberField("lon", longitude);
+ jg.writeEndObject();
+ jg.writeStringField("offset", String.format("%dkm", offsetInKm));
+ jg.writeStringField("scale", String.format("%dkm", scaleInKm));
+ jg.writeNumberField("decay", decay);
+ jg.writeEndObject();
+ jg.writeEndObject();
+ // @formatter:on
+ }
+
public List<Point> parseJsonResult(String json) throws IOException {
ObjectMapper mapper = new ObjectMapper();
List<Point> points = new ArrayList<>();
JsonNode hitNode = elements.next();
String type = hitNode.get("_type").asText();
final Point point;
- Iterator<JsonNode> sortItems;
switch (type) {
case "InterrestPoint":
point = new InterrestPoint();
}
if (point != null) {
point.setId(hitNode.get("_id").asLong());
- sortItems = hitNode.get("sort").elements();
- point.setDistanceKm(sortItems.next().asDouble());
+ point.setScore(hitNode.get("_score").asDouble());
+ List<JsonNode> sortItems = getListFromIterator(hitNode.get("sort").elements());
+ point.setDistanceKm(sortItems.get(searchMode == SearchMode.BY_PROXIMITY ? 0 : 1).asDouble());
points.add(point);
}
}
return points;
}
+
+ private <T> List<T> getListFromIterator(Iterator<T> it) {
+ List<T> ret = new ArrayList<>();
+ while (it.hasNext()) {
+ ret.add(it.next());
+ }
+ return ret;
+ }
}
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
+import java.util.List;
public class ESRequest {
private final String url;
+ private static final Logger LOG = LoggerFactory.getLogger(ESRequest.class);
+ private String description;
public ESRequest(String url) {
this.url = url;
+ this.description = "";
}
private HttpClient getClient() {
}
private void printResultStatus(HttpResponse response) {
- System.out.println(" → " + response.getStatusLine());
+ LOG.info(description + " → " + response.getStatusLine());
+ description = "";
}
private void printAction(String description) {
- System.out.print(description);
+ this.description = description;
}
private StringEntity getEntity(String json) {
printResultStatus(response);
}
+ public void bulkIndex(List<String[]> jsonTuples) throws IOException {
+ printAction(String.format("Burk indexing %d documents…", jsonTuples.size()));
+ HttpPost post = new HttpPost(String.format("%s/_bulk", url));
+ StringBuilder jsonList = new StringBuilder();
+ for (String[] jsonTuple : jsonTuples) {
+ jsonList.append(jsonTuple[0]).append("\n");
+ jsonList.append(jsonTuple[1]).append("\n");
+ }
+ post.setEntity(getEntity(jsonList.toString()));
+ HttpResponse response = getClient().execute(post);
+ printResultStatus(response);
+ }
+
public String find(String jsonRequest, String indexName, String queryString) throws URISyntaxException, IOException {
printAction(String.format("Querying %s with « %s »…", indexName, queryString));
HttpGet get = new HttpGet(String.format("%s/%s/_search", url, indexName));
public String toString() {
final StringBuilder sb = new StringBuilder("InterrestPoint{");
sb.append("id=").append(id);
- sb.append(", funcId=").append(funcId);
+ sb.append(", score='").append(getScore()).append('\'');
sb.append(", name='").append(name).append('\'');
- sb.append(", latitude=").append(latitude);
- sb.append(", longitude=").append(longitude);
- sb.append(", distance(km)=").append(distanceKm);
- sb.append(", extId='").append(extId).append('\'');
- sb.append(", networkId=").append(networkId);
sb.append(", town='").append(town).append('\'');
+ sb.append(", distance(km)=").append(distanceKm);
sb.append('}');
return sb.toString();
}
import org.apache.commons.io.IOUtils;
import org.hibernate.Session;
import org.hibernate.Transaction;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import javax.persistence.EntityManager;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class Main {
+ private static final double VISEO_LYON_LAT = 45.760399;
+ private static final double VISEO_LYON_LON = 4.851713;
private final EntityManager em;
private final ESJsonConfig esJsonConfig;
+ private static final Logger LOG = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
+ System.setProperty("org.jboss.logging.provider", "slf4j");
try {
final String action;
if (args.length > 0) {
}
new Main().run(action);
} catch (Exception e) {
- e.printStackTrace();
+ LOG.error(e.getMessage(), e);
} finally {
EntityManagerProvider.getInstance().getEmf().close();
}
case "index":
indexDb();
break;
- default:
+ case "query":
queryDb();
+ break;
+ default:
+ help();
}
}
+ private void help() {
+ LOG.info("Utiliser un des paramètres suivants:");
+ LOG.info(" create");
+ LOG.info(" index");
+ LOG.info(" query");
+ }
+
private void createDb() throws IOException {
String sql = IOUtils.toString(getClass().getResourceAsStream("/insert_data.sql"), StandardCharsets.UTF_8.name());
Session session = ((Session) em.getDelegate()).getSession();
dropIndex(esRequest);
createIndexAndAnalyzer(esRequest);
paramMappings(esRequest, InterrestPoint.class, MobilityPoint.class);
+ int bulkCount = 100;
+ List<Point> points = new ArrayList<>(bulkCount);
for (InterrestPoint point : em.createQuery("select p from InterrestPoint p", InterrestPoint.class).getResultList()) {
- indexPoint(esRequest, point);
+ points.add(point);
+ if (points.size() == bulkCount) {
+ bulkIndexPoints(esRequest, points);
+ points.clear();
+ }
+ }
+ if (points.size() != 0) {
+ bulkIndexPoints(esRequest, points);
+ points.clear();
}
for (MobilityPoint point : em.createQuery("select p from MobilityPoint p", MobilityPoint.class).getResultList()) {
indexPoint(esRequest, point);
esRequest.index(esJsonConfig.getJsonForIndexing(point), ESJsonConfig.INDEX_NAME, point.getClass().getSimpleName(), point.getId());
}
+ private void bulkIndexPoints(ESRequest esRequest, List<Point> points) throws IOException {
+ final List<String[]> jsonTuples = new ArrayList<>(points.size());
+ for (Point point : points) {
+ String jsonMetadata = esJsonConfig.getJsonMetadataForIndexing(ESJsonConfig.INDEX_NAME, point.getClass().getSimpleName(), point.getId());
+ String jsonForIndexing = esJsonConfig.getJsonForIndexing(point);
+ jsonTuples.add(new String[]{jsonMetadata, jsonForIndexing});
+ }
+ esRequest.bulkIndex(jsonTuples);
+ }
+
/**
* @see <a href="https://www.elastic.co/guide/en/elasticsearch/guide/2.x/asciifolding-token-filter.html">ascii folding</a>
*/
private void queryDb() throws IOException, URISyntaxException {
try (Scanner scanner = new Scanner(System.in, "UTF-8")) {
- System.out.print("Latitude: ");
- final double latitude = scanner.nextDouble();
- System.out.print("Longitude: ");
- final double longitude = scanner.nextDouble();
- scanner.nextLine();
- String queryString = "-";
- while (!queryString.isEmpty()) {
- System.out.print("Query: ");
- queryString = scanner.nextLine();
- List<Point> points = queryES(queryString, latitude, longitude);
- getAndPrintPoints(points);
+ while (true) {
+ final double latitude = getDouble(scanner, "Latitude", VISEO_LYON_LAT);
+ final double longitude = getDouble(scanner, "Longitude", VISEO_LYON_LON);
+ final boolean scoreByGeo = getBoolean(scanner, "Privilégier la recherche par proximité géographique", false);
+ final boolean useGeo = !scoreByGeo && getBoolean(scanner, "Affiner la recherche par proximité géographique", false);
+ esJsonConfig.setSearchMode(useGeo ? SearchMode.BY_SCORE_AND_PROXIMITY : scoreByGeo ? SearchMode.BY_PROXIMITY : SearchMode.BY_SCORE);
+ final boolean fuzziness = getBoolean(scanner, "Avec fuzziness", false);
+ esJsonConfig.setFuzziness(fuzziness);
+ String queryString;
+ while (true) {
+ System.out.print("Query : ");
+ queryString = scanner.nextLine();
+ if (queryString.isEmpty()) {
+ break;
+ }
+ List<Point> points = queryES(queryString, latitude, longitude);
+ getAndPrintPoints(points);
+ }
+ if (getBoolean(scanner, "Quitter", true)) {
+ break;
+ } else {
+ System.out.println("**********");
+ }
}
}
}
+ private boolean getBoolean(Scanner scanner, String label, boolean def) {
+ System.out.print(String.format("%s [%s] ? ", label, def ? "O/n" : "o/N"));
+ String line = scanner.nextLine().toLowerCase().replace("o", Boolean.TRUE.toString());
+ if (line.isEmpty()) {
+ return def;
+ } else {
+ return Boolean.parseBoolean(line);
+ }
+ }
+
+ private double getDouble(Scanner scanner, String label, double def) {
+ System.out.print(String.format("%s [%f] : ", label, def));
+ String line = scanner.nextLine().replace(".", ",");
+ if (line.isEmpty()) {
+ return def;
+ } else {
+ return Double.parseDouble(line);
+ }
+ }
+
private List<Point> queryES(String queryString, double latitude, double longitude) throws IOException, URISyntaxException {
String jsonRequest = esJsonConfig.getJsonQuery(queryString, latitude, longitude);
- ESRequest esRequest = new ESRequest(ESJsonConfig.ES_BASE_URL);
- String jsonResult = esRequest.find(jsonRequest, ESJsonConfig.INDEX_NAME, queryString);
+ LOG.debug(jsonRequest);
+ String jsonResult = new ESRequest(ESJsonConfig.ES_BASE_URL).find(jsonRequest, ESJsonConfig.INDEX_NAME, queryString);
+ LOG.debug(jsonResult);
return esJsonConfig.parseJsonResult(jsonResult);
}
private void getAndPrintPoints(List<Point> points) {
for (Point point : points) {
Class<? extends Point> pointClass = point.getClass();
+ double score = point.getScore();
Double distanceKm = point.getDistanceKm();
point = em.createQuery(String.format("select p from %s p where p.id = :id", pointClass.getSimpleName()), pointClass).setParameter("id", point.getId()).getSingleResult();
+ point.setScore(score);
point.setDistanceKm(distanceKm);
- System.out.println(point);
+ LOG.info(point.toString());
}
}
}
public String toString() {
final StringBuilder sb = new StringBuilder("MobilityPoint{");
sb.append("id=").append(id);
- sb.append(", funcId=").append(funcId);
+ sb.append(", score=").append(getScore());
sb.append(", name='").append(name).append('\'');
- sb.append(", shortName='").append(shortName).append('\'');
- sb.append(", type=").append(type);
- sb.append(", stopPlaceType=").append(stopPlaceType);
- sb.append(", latitude=").append(latitude);
- sb.append(", longitude=").append(longitude);
- sb.append(", distance(km)=").append(distanceKm);
- sb.append(", networkId=").append(networkId);
sb.append(", town='").append(town).append('\'');
+ sb.append(", distance(km)=").append(distanceKm);
sb.append('}');
return sb.toString();
}
import com.fasterxml.jackson.annotation.JsonProperty;
-public abstract class Point {
+public abstract class Point extends Score {
public abstract Long getId();
public abstract void setId(Long id);
public abstract String getName();
public abstract Double getLongitude();
public abstract Double getDistanceKm();
public abstract void setDistanceKm(Double distanceKm);
+
@JsonProperty
public Location getLocation() {
return new Location(getLatitude(), getLongitude());
--- /dev/null
+package com.viseo.xerox.elastic;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import lombok.Getter;
+import lombok.Setter;
+
+public class Score {
+ @Getter
+ @Setter
+ @JsonIgnore
+ private double score;
+}
--- /dev/null
+package com.viseo.xerox.elastic;
+
+public enum SearchMode {
+ BY_SCORE,
+ BY_PROXIMITY,
+ BY_SCORE_AND_PROXIMITY,
+}
<class>com.viseo.xerox.elastic.MobilityPoint</class>
<properties>
<property name="javax.persistence.jdbc.driver" value="org.h2.Driver" />
- <property name="javax.persistence.jdbc.url" value="jdbc:h2:file:./target/xerox;DB_CLOSE_DELAY=-1" />
+ <property name="javax.persistence.jdbc.url" value="jdbc:h2:file:./db/xerox;DB_CLOSE_DELAY=-1" />
<property name="javax.persistence.jdbc.user" value="sa" />
<property name="javax.persistence.jdbc.password" value="" />
<property name="hibernate.show_sql" value="false" />
--- /dev/null
+org.slf4j.simpleLogger.logFile=System.out
+org.slf4j.simpleLogger.defaultLogLevel=warn
+org.slf4j.simpleLogger.log.org.hibernate.orm.connections.pooling=error
+org.slf4j.simpleLogger.log.com.viseo.xerox.elastic=info
\ No newline at end of file
--- /dev/null
+hopital, clinique
+saint, st
+sainte, ste
+boulevard, bd, bvd
+avenue, av
+route, rte