Git Repositories

2017be91019742cb59a93d3f4cbe401c12c1759c
[xerox-elastic-poc.git] / src / main / java / com / viseo / xerox / elastic / ESJsonConfig.java
1 package com.viseo.xerox.elastic;
2
3 import com.fasterxml.jackson.core.JsonFactory;
4 import com.fasterxml.jackson.core.JsonGenerator;
5 import com.fasterxml.jackson.databind.JsonNode;
6 import com.fasterxml.jackson.databind.ObjectMapper;
7 import com.fasterxml.jackson.databind.ObjectWriter;
8 import lombok.Getter;
9 import lombok.Setter;
10
11 import java.io.IOException;
12 import java.io.StringWriter;
13 import java.util.*;
14
15 /**
16  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/search-uri-request.html">search-uri-request</a>
17  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/guide/current/mapping-intro.html">mapping-intro</a>
18  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/guide/2.x/unicode-normalization.html">unicode-normalization</a>
19  * @see <a href="https://www.elastic.co/guide/en/elasticsearch/guide/2.x/using-synonyms.html">using-synonyms</a>
20  */
21 public class ESJsonConfig {
22     public static final String ES_BASE_URL = "http://localhost:9200";
23     public static final String INDEX_NAME = "xerox";
24     /**
25      * Relative to Elastic-Search <code>config</code> directory
26      */
27     public static final String SYNONYM_TXT_FILE = "analysis/synonym.txt";
28
29     private JsonFactory factory;
30     private ObjectWriter writer;
31     @Getter
32     @Setter
33     private SearchMode searchMode;
34     @Getter
35     @Setter
36     private boolean fuzziness;
37
38     public ESJsonConfig() {
39         factory = new JsonFactory();
40         writer = new ObjectMapper(factory).writer();
41         searchMode = SearchMode.BY_SCORE;
42         fuzziness = false;
43     }
44
45     public String getJsonIndex() throws IOException {
46         StringWriter stringWriter = new StringWriter();
47         JsonGenerator jg = factory.createGenerator(stringWriter);
48         // @formatter:off
49         jg.writeStartObject();
50             jg.writeObjectFieldStart("settings");
51                 jg.writeObjectFieldStart("index");
52                     jg.writeObjectFieldStart("analysis");
53                         jg.writeObjectFieldStart("analyzer");
54                             jg.writeObjectFieldStart("custom_french_analyzer");
55                                 jg.writeStringField("type","custom");
56                                 jg.writeStringField("tokenizer","standard");
57                                 jg.writeArrayFieldStart("filter");
58                                     jg.writeString("trim");
59                                     jg.writeString("lowercase");
60                                     jg.writeString("french_elision");
61                                     jg.writeString("asciifolding");
62                                     jg.writeString("synonym");
63                                     jg.writeString("french_stop");
64                                     jg.writeString("french_stemmer");
65                                 jg.writeEndArray();
66                             jg.writeEndObject();
67                         jg.writeEndObject();
68                         jg.writeObjectFieldStart("filter");
69                             jg.writeObjectFieldStart("french_elision");
70                                 jg.writeStringField("type","elision");
71                                 jg.writeBooleanField("articles_case", true);
72                                 jg.writeArrayFieldStart("articles");
73                                     for (String article : new String[] {"l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"}) {
74                                         jg.writeString(article);
75                                     }
76                                 jg.writeEndArray();
77                             jg.writeEndObject();
78                             jg.writeObjectFieldStart("french_stop");
79                                 jg.writeStringField("type","stop");
80                                 jg.writeStringField("stopwords","_french_");
81                             jg.writeEndObject();
82                             jg.writeObjectFieldStart("french_stemmer");
83                                 jg.writeStringField("type","stemmer");
84                                 jg.writeStringField("language","light_french");
85                             jg.writeEndObject();
86                             jg.writeObjectFieldStart("synonym");
87                                 jg.writeStringField("type","synonym");
88                                 jg.writeStringField("synonyms_path",SYNONYM_TXT_FILE);
89                                 jg.writeBooleanField("ignore_case",true);
90                             jg.writeEndObject();
91                         jg.writeEndObject();
92                     jg.writeEndObject();
93                 jg.writeEndObject();
94             jg.writeEndObject();
95         jg.writeEndObject();
96         // @formatter:on
97         jg.close();
98         return stringWriter.toString();
99     }
100
101     /**
102      * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/2.4/geo-point.html">geo-point</a>
103      * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/2.4/multi-fields.html">multi-fields</a>
104      * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/2.4/analysis-lang-analyzer.html#french-analyzer">french-analyzer</a>
105      */
106     public String getJsonForMapping() throws IOException {
107         StringWriter stringWriter = new StringWriter();
108         JsonGenerator jg = factory.createGenerator(stringWriter);
109         // @formatter:off
110         jg.writeStartObject();
111             jg.writeObjectFieldStart("properties");
112                 addMapperForGeoLocation(jg, "geo_point");
113                 addMapperForFrenchText(jg, "name");
114                 addMapperForFrenchText(jg, "town");
115             jg.writeEndObject();
116         jg.writeEndObject();
117         // @formatter:on
118         jg.close();
119         return stringWriter.toString();
120     }
121
122     private void addMapperForGeoLocation(JsonGenerator jg, @SuppressWarnings("SameParameterValue") String fieldName) throws IOException {
123         jg.writeObjectFieldStart("location");
124         jg.writeStringField("type", fieldName);
125         jg.writeBooleanField("lat_lon", true);
126         jg.writeEndObject();
127     }
128
129     private void addMapperForFrenchText(JsonGenerator jg, String fieldName) throws IOException {
130         // @formatter:off
131         jg.writeObjectFieldStart(fieldName);
132             jg.writeStringField("type", "string");
133             jg.writeObjectFieldStart("fields");
134                 jg.writeObjectFieldStart("french");
135                     jg.writeStringField("type", "string");
136                     jg.writeStringField("analyzer", "custom_french_analyzer");
137                 jg.writeEndObject();
138             jg.writeEndObject();
139         jg.writeEndObject();
140         // @formatter:on
141     }
142
143     public String getJsonMetadataForIndexing(String indexName, String typeName, Long id) throws IOException {
144         StringWriter stringWriter = new StringWriter();
145         JsonGenerator jg = factory.createGenerator(stringWriter);
146         // @formatter:off
147         jg.writeStartObject();
148             jg.writeObjectFieldStart("index");
149                 jg.writeStringField("_index", indexName);
150                 jg.writeStringField("_type", typeName);
151                 jg.writeStringField("_id", id.toString());
152             jg.writeEndObject();
153         jg.writeEndObject();
154         // @formatter:on
155         jg.close();
156         return stringWriter.toString();
157     }
158
159     public String getJsonForIndexing(final Point point) throws IOException {
160         StringWriter stringWriter = new StringWriter();
161         writer.writeValue(stringWriter, point);
162         return stringWriter.toString();
163     }
164
165     public String getJsonQuery(String queryString, double latitude, double longitude) throws IOException {
166         StringWriter stringWriter = new StringWriter();
167         JsonGenerator jg = factory.createGenerator(stringWriter);
168         switch (searchMode) {
169             case BY_SCORE:
170                 // @formatter:off
171                 jg.writeStartObject();
172                     jg.writeObjectFieldStart("query");
173                         jg.writeObjectFieldStart("multi_match");
174                             writeQueryOptions(jg, queryString);
175                             writeSearchFields(jg);
176                         jg.writeEndObject();
177                     jg.writeEndObject();
178                     jg.writeArrayFieldStart("sort");
179                         jg.writeString("_score");
180                         // this will not sort by geo distance but will compute it nevertheless
181                         writeJsonGeoSorting(latitude, longitude, jg);
182                     jg.writeEndArray();
183                     jg.writeBooleanField("track_scores", true);
184                 jg.writeEndObject();
185                 // @formatter:on
186                 break;
187             case BY_PROXIMITY:
188                 // @formatter:off
189                 jg.writeStartObject();
190                     jg.writeObjectFieldStart("query");
191                         jg.writeObjectFieldStart("multi_match");
192                             writeQueryOptions(jg, queryString);
193                             writeSearchFields(jg);
194                         jg.writeEndObject();
195                     jg.writeEndObject();
196                     jg.writeArrayFieldStart("sort");
197                         writeJsonGeoSorting(latitude, longitude, jg);
198                     jg.writeEndArray();
199                     jg.writeBooleanField("track_scores", true);
200                 jg.writeEndObject();
201                 // @formatter:on
202                 break;
203             case BY_SCORE_AND_PROXIMITY:
204                 // @formatter:off
205                 jg.writeStartObject();
206                     jg.writeObjectFieldStart("query");
207                         jg.writeObjectFieldStart("function_score");
208                             jg.writeObjectFieldStart("query");
209                                 jg.writeObjectFieldStart("multi_match");
210                                     writeQueryOptions(jg, queryString);
211                                     writeSearchFields(jg);
212                                 jg.writeEndObject();
213                             jg.writeEndObject();
214                             jg.writeArrayFieldStart("functions");
215                                 jg.writeStartObject();
216                                     writeExpDistanceDecayScore(jg, latitude, longitude, 10, 50, 0.33 );
217                                 jg.writeEndObject();
218                             jg.writeEndArray();
219                             jg.writeStringField("boost_mode", "multiply");
220                         jg.writeEndObject();
221                     jg.writeEndObject();
222                     jg.writeArrayFieldStart("sort");
223                         jg.writeString("_score");
224                         // this will not sort by geo distance but will output it
225                         writeJsonGeoSorting(latitude, longitude, jg);
226                     jg.writeEndArray();
227                     jg.writeBooleanField("track_scores", true);
228                 jg.writeEndObject();
229                 // @formatter:on
230                 break;
231         }
232         jg.close();
233         return stringWriter.toString();
234     }
235
236     private void writeQueryOptions(JsonGenerator jg, String queryString) throws IOException {
237         jg.writeStringField("query", queryString);
238         jg.writeStringField("type", fuzziness ? "best_fields" : "cross_fields");
239         jg.writeStringField("operator", "and");
240         jg.writeStringField("zero_terms_query", "all");
241         if (fuzziness) {
242             jg.writeStringField("fuzziness", "AUTO");
243         }
244     }
245
246     private void writeSearchFields(JsonGenerator jg) throws IOException {
247         jg.writeArrayFieldStart("fields");
248         jg.writeString("name^3");
249         jg.writeString("name.french^3");
250         jg.writeString("town");
251         jg.writeString("town.french");
252         jg.writeEndArray();
253     }
254
255     private void writeJsonGeoSorting(Double latitude, Double longitude, JsonGenerator jg) throws IOException {
256         // @formatter:off
257         jg.writeStartObject();
258             jg.writeObjectFieldStart("_geo_distance");
259                 jg.writeObjectFieldStart("location");
260                     jg.writeNumberField("lat", latitude);
261                     jg.writeNumberField("lon", longitude);
262                 jg.writeEndObject();
263                 jg.writeStringField("unit", "km");
264             jg.writeEndObject();
265         jg.writeEndObject();
266         // @formatter:on
267     }
268
269     private void writeExpDistanceDecayScore(JsonGenerator jg, double latitude, double longitude, int offsetInKm, int scaleInKm, double decay) throws IOException {
270         // @formatter:off
271         jg.writeObjectFieldStart("exp");
272             jg.writeObjectFieldStart("location");
273                 jg.writeObjectFieldStart("origin");
274                     jg.writeNumberField("lat", latitude);
275                     jg.writeNumberField("lon", longitude);
276                 jg.writeEndObject();
277                 jg.writeStringField("offset", String.format("%dkm", offsetInKm));
278                 jg.writeStringField("scale", String.format("%dkm", scaleInKm));
279                 jg.writeNumberField("decay", decay);
280             jg.writeEndObject();
281         jg.writeEndObject();
282         // @formatter:on
283     }
284
285     public List<Point> parseJsonResult(String json) throws IOException {
286         ObjectMapper mapper = new ObjectMapper();
287         List<Point> points = new ArrayList<>();
288         JsonNode rootNode = mapper.readValue(json, JsonNode.class);
289         JsonNode hitsNode = rootNode.get("hits");
290         JsonNode hits = hitsNode.get("hits");
291         Iterator<JsonNode> elements = hits.elements();
292         while (elements.hasNext()) {
293             JsonNode hitNode = elements.next();
294             String type = hitNode.get("_type").asText();
295             final Point point;
296             switch (type) {
297                 case "InterrestPoint":
298                     point = new InterrestPoint();
299                     break;
300                 case "MobilityPoint":
301                     point = new MobilityPoint();
302                     break;
303                 default:
304                     point = null;
305             }
306             if (point != null) {
307                 point.setId(hitNode.get("_id").asLong());
308                 point.setScore(hitNode.get("_score").asDouble());
309                 List<JsonNode> sortItems = getListFromIterator(hitNode.get("sort").elements());
310                 point.setDistanceKm(sortItems.get(searchMode == SearchMode.BY_PROXIMITY ? 0 : 1).asDouble());
311                 points.add(point);
312             }
313         }
314         return points;
315     }
316
317     private <T> List<T> getListFromIterator(Iterator<T> it) {
318         List<T> ret = new ArrayList<>();
319         while (it.hasNext()) {
320             ret.add(it.next());
321         }
322         return ret;
323     }
324 }