Skip to content

Commit 8d56271

Browse files
Merge pull request #250 from KosukeOkamoto/add-discovery-scripts-for-4.8.0
Add scripts to reindex old ElasticSearch index for Watson Discovery 4.8.0 on CP4D
2 parents 4074402 + 409c660 commit 8d56271

1 file changed

Lines changed: 267 additions & 0 deletions

File tree

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
TMP_WORK_DIR="/tmp/elastic-workdir"
6+
7+
function generate_new_settings() {
8+
local index="$1"
9+
local org_settings_json="$2"
10+
local new_settings_json="$3"
11+
12+
UNNECESSARY_settings="provided_name creation_date uuid version"
13+
14+
echo "Generating new settings"
15+
\cp "${org_settings_json}" "${new_settings_json}"
16+
17+
echo "Removing unnecessary settings"
18+
for key in ${UNNECESSARY_settings} ; do
19+
jq ". | del(.\"${index}\".settings.index.${key})" "${new_settings_json}" > tmp.json
20+
\cp tmp.json "${new_settings_json}"
21+
done
22+
\rm tmp.json
23+
}
24+
25+
26+
function get_mappings() {
27+
local index="$1"
28+
local mappings_json="$2"
29+
30+
echo "Getting mappings"
31+
curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" "${ELASTIC_ENDPOINT}/${index}/_mappings" > "${mappings_json}"
32+
}
33+
34+
35+
function create_new_index() {
36+
local index="$1"
37+
local new_settings_json="$2"
38+
local mappings_json="$3"
39+
local new_index="$4"
40+
41+
echo "Creating new index ${new_index}"
42+
43+
local settings="$(jq ".\"${index}\".settings" "${new_settings_json}")"
44+
local mappings=$(jq ".\"${index}\".mappings" "${mappings_json}")
45+
46+
index_data='{"settings":'${settings}', "mappings":'${mappings}'}'
47+
index_data_json=index_data.json
48+
echo "${index_data}" > "${index_data_json}"
49+
50+
curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" -XPUT "${ELASTIC_ENDPOINT}/${new_index}" -d@"${TMP_WORK_DIR}/${index_data_json}"
51+
echo ""
52+
}
53+
54+
55+
function execute_reindex() {
56+
local index="$1"
57+
local new_index="$2"
58+
59+
echo "Executing reindex index to ${new_index}"
60+
61+
reindex_body='{"source": {"index": "'${index}'"}, "dest": {"index": "'${new_index}'"}}'
62+
reindex_body_json=reindex_body.json
63+
64+
echo "${reindex_body}" > "${reindex_body_json}"
65+
66+
reindex_task="$(curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" -XPOST "${ELASTIC_ENDPOINT}/_reindex?wait_for_completion=false" -d@"${TMP_WORK_DIR}/${reindex_body_json}")"
67+
if [ -z "${reindex_task}" ] ; then
68+
echo "Failed to launch reindex task"
69+
exit 1
70+
fi
71+
task_id="$(echo "${reindex_task}" | jq -r '.task')"
72+
if [ -z "${task_id}" ] || [ "${task_id}" = "null" ] ; then
73+
echo "Failed to get task ID of reindex"
74+
exit 1
75+
fi
76+
echo "Reindex task ID: ${task_id}"
77+
base_interval=${TASK_CHECK_BASE_INTERVAL:-10}
78+
max_interval=${MAX_TASK_CHECK_INTERVAL:-300}
79+
local count=0
80+
while ((count++));
81+
do
82+
# Ignore failure
83+
task_status="$(curl -sSk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" "${ELASTIC_ENDPOINT}/_tasks/${task_id}")"
84+
if [ "$( echo "${task_status}" | jq -r '.completed')" != "true" ] ; then
85+
echo "In Progress: $(echo "${task_status}" | jq -r '.task.description')"
86+
interval=$((base_interval * count))
87+
sleep $((interval < max_interval ? interval : max_interval))
88+
continue
89+
fi
90+
reindex_result="$(echo "${task_status}" | jq -r '.response')"
91+
if [ "$(echo "${reindex_result}" | jq -r '.timed_out' )" = "true" ] || [ "$(echo "${reindex_result}" | jq -r '.failures' )" != "[]" ] ; then
92+
echo "Failed to reindex: ${new_index}"
93+
echo "${reindex_result}"
94+
exit 1
95+
fi
96+
break
97+
done
98+
99+
echo "Reindexed: ${new_index}"
100+
}
101+
102+
103+
function set_index_readonly() {
104+
local index="$1"
105+
106+
echo "Setting index ${index} to read-only"
107+
108+
READONLY_TRUE_BODY='{"settings": {"index.blocks.write": true}}'
109+
READONLY_TRUE_BODY_JSON=READONLY_TRUE_BODY.json
110+
echo "${READONLY_TRUE_BODY}" > "${READONLY_TRUE_BODY_JSON}"
111+
112+
curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" -XPUT "${ELASTIC_ENDPOINT}/${index}/_settings" -d@"${TMP_WORK_DIR}/${READONLY_TRUE_BODY_JSON}"
113+
echo ""
114+
}
115+
116+
117+
function unset_index_readonly() {
118+
local index="$1"
119+
120+
echo "Unsetting index ${index} to read-only"
121+
122+
READONLY_FALSE_BODY='{"settings": {"index.blocks.write": false}}'
123+
READONLY_FALSE_BODY_JSON=readonly_false_body.json
124+
echo "${READONLY_FALSE_BODY}" > "${READONLY_FALSE_BODY_JSON}"
125+
126+
curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" -XPUT "${ELASTIC_ENDPOINT}/${index}/_settings" -d@"${TMP_WORK_DIR}/${READONLY_FALSE_BODY_JSON}"
127+
echo ""
128+
}
129+
130+
131+
function clone_index() {
132+
local src_index="$1"
133+
local dst_index="$2"
134+
135+
replica_num="$(get_replica_num "${src_index}")"
136+
137+
set_index_readonly "${src_index}"
138+
echo "Renaming index from ${src_index} to ${dst_index}"
139+
140+
clone_body='{"settings": {"index.number_of_replicas": '${replica_num}'}}'
141+
clone_body_json=clone_body.json
142+
echo "${clone_body}" > "${clone_body_json}"
143+
144+
curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" -XPOST "${ELASTIC_ENDPOINT}/${src_index}/_clone/${dst_index}" -d@"${TMP_WORK_DIR}/${clone_body_json}"
145+
echo ""
146+
147+
unset_index_readonly "${src_index}"
148+
unset_index_readonly "${dst_index}"
149+
}
150+
151+
152+
function get_replica_num() {
153+
local index="$1"
154+
155+
settings="$(curl -sSfk -u ${ELASTIC_USER}:${ELASTIC_PASSWORD} ${ELASTIC_ENDPOINT}/${index}/_settings)"
156+
157+
echo "${settings}" | jq -r ".\"${index}\".settings.index.number_of_replicas"
158+
}
159+
160+
161+
function remove_index() {
162+
local index="$1"
163+
164+
echo "Removing index ${index}"
165+
curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" -XDELETE "${ELASTIC_ENDPOINT}/${index}"
166+
echo ""
167+
}
168+
169+
function remove_index_if_exists(){
170+
local index="$1"
171+
http_status="$(curl -sSk -XHEAD -o /dev/null -w '%{http_code}' -I -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" "${ELASTIC_ENDPOINT}/${index}")"
172+
if [ "${http_status}" = "200" ] ; then
173+
echo "Remove existing index : ${index}"
174+
remove_index "${index}"
175+
elif [ "${http_status}" != "404" ] ; then
176+
echo "Failed to check if index exist: ${index}"
177+
exit 1
178+
fi
179+
}
180+
181+
# Main logic start from here
182+
trap 'if [ $? -ne 0 ] ; then echo "Error: Please contact support. Do not run this scripts again."; fi' 0 1 2 3 15
183+
rm -rf "${TMP_WORK_DIR}"
184+
mkdir -p "${TMP_WORK_DIR}"
185+
cd "${TMP_WORK_DIR}"
186+
187+
echo "Checking status of ElasticSearch"
188+
cluster_stats="$(curl -sSfk -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" "${ELASTIC_ENDPOINT}/_cluster/stats")"
189+
if [ -z "${cluster_stats}" ] ; then
190+
echo "Failed to get stats"
191+
exit 1
192+
fi
193+
cluster_status="$(echo "${cluster_stats}" | jq -r '.status')"
194+
if [ -z "${cluster_status}" ] || [ "${cluster_status}" = "red" ] ; then
195+
echo "Unhealthy cluster status: ${cluster_status}"
196+
exit 1
197+
fi
198+
199+
index_count="$(echo "${cluster_stats}" | jq -r '.indices.count')"
200+
if [ -z "${index_count}" ] ; then
201+
echo "Failed to get index count"
202+
exit 1
203+
fi
204+
205+
if [ "${index_count}" = "0" ] ; then
206+
echo "ElasticSearch has no index"
207+
echo "Completed!"
208+
exit 0
209+
fi
210+
211+
echo "Getting index list"
212+
indices="$(curl -sSkf -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" "${ELASTIC_ENDPOINT}/_cat/indices?h=index")"
213+
214+
if [[ -z "${indices}" ]] ; then
215+
echo "Failed to get index list"
216+
exit 1
217+
fi
218+
219+
total=$(echo "$indices" | wc -w)
220+
echo "Total number of indices: ${total}"
221+
222+
count=1
223+
for index in ${indices} ; do
224+
org_settings_json="${index}.settings.json"
225+
new_settings_json="${index}.settings.new.json"
226+
mappings_json="${index}.mappings.json"
227+
228+
version="$(curl -sSkf -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" "${ELASTIC_ENDPOINT}/${index}/_settings" | jq -r .[].settings.index.version.created)"
229+
if [[ ${version} = 7* ]]; then
230+
echo "[${count} / ${total}] Skip ElasticSearch 7 index: ${index}"
231+
elif [[ ${version} = 6* ]]; then
232+
echo "[${count} / ${total}] ElasticSearch 6 index found: ${index}"
233+
echo "----------------------------"
234+
echo "Updating index - ${index} ..."
235+
curl -sSkf -u "${ELASTIC_USER}:${ELASTIC_PASSWORD}" "${ELASTIC_ENDPOINT}/${index}/_settings" > "${org_settings_json}"
236+
generate_new_settings "${index}" "${org_settings_json}" "${new_settings_json}"
237+
238+
get_mappings "${index}" "${mappings_json}"
239+
240+
new_index="${index}_new"
241+
242+
remove_index_if_exists "${new_index}"
243+
244+
create_new_index "${index}" "${new_settings_json}" "${mappings_json}" "${new_index}"
245+
246+
execute_reindex "${index}" "${new_index}"
247+
248+
# TMP_index=${index}_tmp
249+
# clone_index ${index} ${TMP_index}
250+
251+
remove_index "${index}"
252+
253+
clone_index "${new_index}" "${index}"
254+
255+
remove_index "${new_index}"
256+
echo "----------------------------"
257+
else
258+
echo "Failed to get version of index: ${index}"
259+
exit 1
260+
fi
261+
((count++))
262+
done
263+
264+
rm -rf "${TMP_WORK_DIR}"
265+
266+
trap 0 1 2 3 15
267+
echo "Completed!"

0 commit comments

Comments
 (0)