Backbrain   Archive

Ceph & BLUESTORE_SLOW_OP_ERROR

https://www.unixe.de/ceph-bluestore-slow-op-error/

HEALTH_WARN: 8 OSD(s) experiencing slow operations in BlueStore
osd.32 observed slow operation indications in BlueStore
 osd.33 observed slow operation indications in BlueStore
 osd.37 observed slow operation indications in BlueStore
 osd.39 observed slow operation indications in BlueStore
 osd.40 observed slow operation indications in BlueStore
 osd.41 observed slow operation indications in BlueStore
 osd.42 observed slow operation indications in BlueStore
 osd.46 observed slow operation indications in BlueStore
ceph config set class:hdd bluestore_slow_ops_warn_lifetime 21600
ceph config set class:hdd bluestore_slow_ops_warn_threshold 5

OSDS=$(ceph osd ls)
for id in $OSDS; do
  host=$(ceph osd find $id | awk -F'"' '/"host"/{print $4; exit}')
  [ -z "$host" ] && continue

  out=$(ssh -o BatchMode=yes -o ConnectTimeout=2 root@$host \
    "ceph --admin-daemon /var/run/ceph/ceph-osd.$id.asok perf dump 2>/dev/null | grep -i -E 'slow_ops_count|slow_read_wait_aio_count|slow_aio_wait_count'")

  if echo "$out" | grep -q -E ':[[:space:]]*[1-9]'; then
    echo
    echo "osd.$id @ $host"
    echo "$out"
  fi
done
OSDS=$(ceph osd ls)
for id in $OSDS; do
  host=$(ceph osd find $id | awk -F'"' '/"host"/{print $4; exit}')
  [ -z "$host" ] && continue

  out=$(ssh -o BatchMode=yes -o ConnectTimeout=2 root@$host \
    "ceph --admin-daemon /var/run/ceph/ceph-osd.$id.asok perf dump 2>/dev/null | grep -i -E 'slow_read_wait_aio_count|slow_aio_wait_count|slow_committed_kv_count|slow_read_onode_meta_count'")

  if echo "$out" | grep -q -E ':[[:space:]]*[1-9]'; then
    echo
    echo "osd.$id @ $host"
    echo "$out"
  fi
done