Git Repositories

Meilleure recherche affinée même sans l'aide de la proximité géographique
[xerox-elastic-poc.git] / src / main / java / com / viseo / xerox / elastic / ESJsonConfig.java
1 package com.viseo.xerox.elastic;
2
3 import com.fasterxml.jackson.core.JsonFactory;
4 import com.fasterxml.jackson.core.JsonGenerator;
5 import com.fasterxml.jackson.databind.JsonNode;
6 import com.fasterxml.jackson.databind.ObjectMapper;
7 import com.fasterxml.jackson.databind.ObjectWriter;
8 import lombok.Getter;
9 import lombok.Setter;
10
11 import java.io.IOException;
12 import java.io.StringWriter;
13 import java.util.*;
14
15 /**
16  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/search-uri-request.html">search-uri-request</a>
17  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/guide/current/mapping-intro.html">mapping-intro</a>
18  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/guide/2.x/unicode-normalization.html">unicode-normalization</a>
19  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/guide/2.x/using-synonyms.html">using-synonyms</a>
20  */
21 public class ESJsonConfig {
22     public static final String ES_BASE_URL = "http://localhost:9200";
23     public static final String INDEX_NAME = "xerox";
24     /**
25      * Relative to Elastic-Search <code>config</code> directory
26      */
27     public static final String SYNONYM_TXT_FILE = "analysis/synonym.txt";
28
29     private JsonFactory factory;
30     private ObjectWriter writer;
31     @Getter
32     @Setter
33     private SearchMode searchMode;
34     @Getter
35     @Setter
36     private boolean fuzziness;
37
38     public ESJsonConfig() {
39         factory = new JsonFactory();
40         writer = new ObjectMapper(factory).writer();
41         searchMode = SearchMode.BY_SCORE;
42         fuzziness = false;
43     }
44
45     public String getJsonIndex() throws IOException {
46         StringWriter stringWriter = new StringWriter();
47         JsonGenerator jg = factory.createGenerator(stringWriter);
48         // @formatter:off
49         jg.writeStartObject();
50             jg.writeObjectFieldStart("settings");
51                 jg.writeObjectFieldStart("index");
52                     jg.writeObjectFieldStart("analysis");
53                         jg.writeObjectFieldStart("analyzer");
54                             jg.writeObjectFieldStart("custom_french_analyzer");
55                                 jg.writeStringField("type","custom");
56                                 jg.writeStringField("tokenizer","standard");
57                                 jg.writeArrayFieldStart("filter");
58                                     jg.writeString("trim");
59                                     jg.writeString("lowercase");
60                                     jg.writeString("french_elision");
61                                     jg.writeString("asciifolding");
62                                     jg.writeString("synonym");
63                                     jg.writeString("french_stop");
64                                     jg.writeString("french_stemmer");
65                                 jg.writeEndArray();
66                             jg.writeEndObject();
67                         jg.writeEndObject();
68                         jg.writeObjectFieldStart("filter");
69                             jg.writeObjectFieldStart("french_elision");
70                                 jg.writeStringField("type","elision");
71                                 jg.writeBooleanField("articles_case", true);
72                                 jg.writeArrayFieldStart("articles");
73                                     for (String article : new String[] {"l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"}) {
74                                         jg.writeString(article);
75                                     }
76                                 jg.writeEndArray();
77                             jg.writeEndObject();
78                             jg.writeObjectFieldStart("french_stop");
79                                 jg.writeStringField("type","stop");
80                                 jg.writeStringField("stopwords","_french_");
81                             jg.writeEndObject();
82                             jg.writeObjectFieldStart("french_stemmer");
83                                 jg.writeStringField("type","stemmer");
84                                 jg.writeStringField("language","light_french");
85                             jg.writeEndObject();
86                             jg.writeObjectFieldStart("synonym");
87                                 jg.writeStringField("type","synonym");
88                                 jg.writeStringField("synonyms_path",SYNONYM_TXT_FILE);
89                                 jg.writeBooleanField("ignore_case",true);
90                             jg.writeEndObject();
91                         jg.writeEndObject();
92                     jg.writeEndObject();
93                 jg.writeEndObject();
94             jg.writeEndObject();
95         jg.writeEndObject();
96         // @formatter:on
97         jg.close();
98         return stringWriter.toString();
99     }
100
101     /**
102      * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/2.4/geo-point.html">geo-point</a>
103      * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/2.4/multi-fields.html">multi-fields</a>
104      * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/2.4/analysis-lang-analyzer.html#french-analyzer">french-analyzer</a>
105      */
106     public String getJsonForMapping() throws IOException {
107         StringWriter stringWriter = new StringWriter();
108         JsonGenerator jg = factory.createGenerator(stringWriter);
109         // @formatter:off
110         jg.writeStartObject();
111             jg.writeObjectFieldStart("properties");
112                 addMapperForGeoLocation(jg, "geo_point");
113                 addMapperForFrenchText(jg, "name");
114                 addMapperForFrenchText(jg, "town");
115             jg.writeEndObject();
116         jg.writeEndObject();
117         // @formatter:on
118         jg.close();
119         return stringWriter.toString();
120     }
121
122     private void addMapperForGeoLocation(JsonGenerator jg, @SuppressWarnings("SameParameterValue") String fieldName) throws IOException {
123         jg.writeObjectFieldStart("location");
124         jg.writeStringField("type", fieldName);
125         jg.writeBooleanField("lat_lon", true);
126         jg.writeEndObject();
127     }
128
129     private void addMapperForFrenchText(JsonGenerator jg, String fieldName) throws IOException {
130         // @formatter:off
131         jg.writeObjectFieldStart(fieldName);
132             jg.writeStringField("type", "string");
133             jg.writeObjectFieldStart("fields");
134                 jg.writeObjectFieldStart("french");
135                     jg.writeStringField("type", "string");
136                     jg.writeStringField("analyzer", "custom_french_analyzer");
137                 jg.writeEndObject();
138             jg.writeEndObject();
139         jg.writeEndObject();
140         // @formatter:on
141     }
142
143     public String getJsonMetadataForIndexing(String indexName, String typeName, Long id) throws IOException {
144         StringWriter stringWriter = new StringWriter();
145         JsonGenerator jg = factory.createGenerator(stringWriter);
146         // @formatter:off
147         jg.writeStartObject();
148             jg.writeObjectFieldStart("index");
149                 jg.writeStringField("_index", indexName);
150                 jg.writeStringField("_type", typeName);
151                 jg.writeStringField("_id", id.toString());
152             jg.writeEndObject();
153         jg.writeEndObject();
154         // @formatter:on
155         jg.close();
156         return stringWriter.toString();
157     }
158
159     public String getJsonForIndexing(final Point point) throws IOException {
160         StringWriter stringWriter = new StringWriter();
161         writer.writeValue(stringWriter, point);
162         return stringWriter.toString();
163     }
164
165     public String getJsonQuery(String queryString, double latitude, double longitude) throws IOException {
166         StringWriter stringWriter = new StringWriter();
167         JsonGenerator jg = factory.createGenerator(stringWriter);
168         jg.writeStartObject();
169         switch (searchMode) {
170             case BY_SCORE:
171                 writeByScore(jg, queryString, latitude, longitude);
172                 break;
173             case BY_PROXIMITY:
174                 writeByProximity(jg, queryString, latitude, longitude);
175                 break;
176             case BY_SCORE_AND_PROXIMITY:
177                 writeByScoreAndProximity(jg, queryString, latitude, longitude);
178                 break;
179         }
180         jg.writeEndObject();
181         jg.close();
182         return stringWriter.toString();
183     }
184
185     private void writeByScore(JsonGenerator jg, String queryString, double latitude, double longitude) throws IOException {
186         // @formatter:off
187         jg.writeObjectFieldStart("query");
188             writeDisMaxQuery(jg, queryString);
189         jg.writeEndObject();
190         jg.writeArrayFieldStart("sort");
191             jg.writeString("_score");
192             // this will not sort by geo distance but will compute it nevertheless
193             writeJsonGeoSorting(latitude, longitude, jg);
194         jg.writeEndArray();
195         jg.writeBooleanField("track_scores", true);
196         // @formatter:on
197     }
198
199     private void writeByProximity(JsonGenerator jg, String queryString, double latitude, double longitude) throws IOException {
200         // @formatter:off
201         jg.writeObjectFieldStart("query");
202             writeDisMaxQuery(jg, queryString);
203         jg.writeEndObject();
204         jg.writeArrayFieldStart("sort");
205             writeJsonGeoSorting(latitude, longitude, jg);
206         jg.writeEndArray();
207         jg.writeBooleanField("track_scores", true);
208         // @formatter:on
209     }
210
211     private void writeByScoreAndProximity(JsonGenerator jg, String queryString, double latitude, double longitude) throws IOException {
212         // @formatter:off
213         jg.writeObjectFieldStart("query");
214             jg.writeObjectFieldStart("function_score");
215                 jg.writeObjectFieldStart("query");
216                     writeDisMaxQuery(jg, queryString);
217                 jg.writeEndObject();
218                 jg.writeArrayFieldStart("functions");
219                     jg.writeStartObject();
220                         writeExpDistanceDecayScore(jg, latitude, longitude, 10, 50, 0.33 );
221                     jg.writeEndObject();
222                 jg.writeEndArray();
223                 jg.writeStringField("boost_mode", "multiply");
224             jg.writeEndObject();
225         jg.writeEndObject();
226         jg.writeArrayFieldStart("sort");
227             jg.writeString("_score");
228             // this will not sort by geo distance but will output it
229             writeJsonGeoSorting(latitude, longitude, jg);
230         jg.writeEndArray();
231         jg.writeBooleanField("track_scores", true);
232         // @formatter:on
233     }
234
235     private void writeDisMaxQuery(JsonGenerator jg, String queryString) throws IOException {
236         // @formatter:off
237         jg.writeObjectFieldStart("dis_max");
238             jg.writeArrayFieldStart("queries");
239                 writeMultiMatch(jg, queryString, "most_fields", false, "name^10", "town^10");
240                 writeMultiMatch(jg, queryString, "most_fields", fuzziness, "name.french", "town.french");
241                 writeMultiMatch(jg, queryString, "cross_fields", false, "name.french", "town.french");
242             jg.writeEndArray();
243         jg.writeEndObject();
244         // @formatter:on
245     }
246
247     private void writeMultiMatch(JsonGenerator jg, String queryString, String type, boolean fuzziness, String... fields) throws IOException {
248         // @formatter:off
249         jg.writeStartObject();
250             jg.writeObjectFieldStart("multi_match");
251                 writeQueryOptions(jg, queryString, type,fuzziness);
252                 writeSearchFields(jg, fields);
253             jg.writeEndObject();
254         jg.writeEndObject();
255         // @formatter:on
256     }
257
258     private void writeQueryOptions(JsonGenerator jg, String queryString, String type, boolean fuzziness) throws IOException {
259         jg.writeStringField("query", queryString);
260         jg.writeStringField("type", type);
261         jg.writeStringField("operator", "and");
262         jg.writeStringField("zero_terms_query", "all");
263         if (fuzziness) {
264             jg.writeStringField("fuzziness", "AUTO");
265         }
266     }
267
268     private void writeSearchFields(JsonGenerator jg, String... fields) throws IOException {
269         jg.writeArrayFieldStart("fields");
270         for (String field : fields) {
271             jg.writeString(field);
272         }
273         jg.writeEndArray();
274     }
275
276     private void writeJsonGeoSorting(Double latitude, Double longitude, JsonGenerator jg) throws IOException {
277         // @formatter:off
278         jg.writeStartObject();
279             jg.writeObjectFieldStart("_geo_distance");
280                 jg.writeObjectFieldStart("location");
281                     jg.writeNumberField("lat", latitude);
282                     jg.writeNumberField("lon", longitude);
283                 jg.writeEndObject();
284                 jg.writeStringField("unit", "km");
285             jg.writeEndObject();
286         jg.writeEndObject();
287         // @formatter:on
288     }
289
290     @SuppressWarnings("SameParameterValue")
291     private void writeExpDistanceDecayScore(JsonGenerator jg, double latitude, double longitude, int offsetInKm, int scaleInKm, double decay) throws IOException {
292         // @formatter:off
293         jg.writeObjectFieldStart("exp");
294             jg.writeObjectFieldStart("location");
295                 jg.writeObjectFieldStart("origin");
296                     jg.writeNumberField("lat", latitude);
297                     jg.writeNumberField("lon", longitude);
298                 jg.writeEndObject();
299                 jg.writeStringField("offset", String.format("%dkm", offsetInKm));
300                 jg.writeStringField("scale", String.format("%dkm", scaleInKm));
301                 jg.writeNumberField("decay", decay);
302             jg.writeEndObject();
303         jg.writeEndObject();
304         // @formatter:on
305     }
306
307     public List<Point> parseJsonResult(String json) throws IOException {
308         ObjectMapper mapper = new ObjectMapper();
309         List<Point> points = new ArrayList<>();
310         JsonNode rootNode = mapper.readValue(json, JsonNode.class);
311         JsonNode hitsNode = rootNode.get("hits");
312         JsonNode hits = hitsNode.get("hits");
313         Iterator<JsonNode> elements = hits.elements();
314         while (elements.hasNext()) {
315             JsonNode hitNode = elements.next();
316             String type = hitNode.get("_type").asText();
317             final Point point;
318             switch (type) {
319                 case "InterrestPoint":
320                     point = new InterrestPoint();
321                     break;
322                 case "MobilityPoint":
323                     point = new MobilityPoint();
324                     break;
325                 default:
326                     point = null;
327             }
328             if (point != null) {
329                 point.setId(hitNode.get("_id").asLong());
330                 point.setScore(hitNode.get("_score").asDouble());
331                 List<JsonNode> sortItems = getListFromIterator(hitNode.get("sort").elements());
332                 point.setDistanceKm(sortItems.get(searchMode == SearchMode.BY_PROXIMITY ? 0 : 1).asDouble());
333                 points.add(point);
334             }
335         }
336         return points;
337     }
338
339     private <T> List<T> getListFromIterator(Iterator<T> it) {
340         List<T> ret = new ArrayList<>();
341         while (it.hasNext()) {
342             ret.add(it.next());
343         }
344         return ret;
345     }
346 }