Sum aggregation

edit

A single-value metrics aggregation that sums up numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric or histogram fields.

Assuming the data consists of documents representing sales records we can sum the sale price of all hats with:

response = client.search(
  index: 'sales',
  size: 0,
  body: {
    query: {
      constant_score: {
        filter: {
          match: {
            type: 'hat'
          }
        }
      }
    },
    aggregations: {
      hat_prices: {
        sum: {
          field: 'price'
        }
      }
    }
  }
)
puts response
POST /sales/_search?size=0
{
  "query": {
    "constant_score": {
      "filter": {
        "match": { "type": "hat" }
      }
    }
  },
  "aggs": {
    "hat_prices": { "sum": { "field": "price" } }
  }
}

Resulting in:

{
  ...
  "aggregations": {
    "hat_prices": {
      "value": 450.0
    }
  }
}

The name of the aggregation (hat_prices above) also serves as the key by which the aggregation result can be retrieved from the returned response.

Script

edit

If you need to get the sum for something more complex than a single field, run the aggregation on a runtime field.

response = client.search(
  index: 'sales',
  size: 0,
  body: {
    runtime_mappings: {
      "price.weighted": {
        type: 'double',
        script: "\n        double price = doc['price'].value;\n        if (doc['promoted'].value) {\n          price *= 0.8;\n        }\n        emit(price);\n      "
      }
    },
    query: {
      constant_score: {
        filter: {
          match: {
            type: 'hat'
          }
        }
      }
    },
    aggregations: {
      hat_prices: {
        sum: {
          field: 'price.weighted'
        }
      }
    }
  }
)
puts response
POST /sales/_search?size=0
{
  "runtime_mappings": {
    "price.weighted": {
      "type": "double",
      "script": """
        double price = doc['price'].value;
        if (doc['promoted'].value) {
          price *= 0.8;
        }
        emit(price);
      """
    }
  },
  "query": {
    "constant_score": {
      "filter": {
        "match": { "type": "hat" }
      }
    }
  },
  "aggs": {
    "hat_prices": {
      "sum": {
        "field": "price.weighted"
      }
    }
  }
}

Missing value

edit

The missing parameter defines how documents that are missing a value should be treated. By default documents missing the value will be ignored but it is also possible to treat them as if they had a value. For example, this treats all hat sales without a price as being 100.

response = client.search(
  index: 'sales',
  size: 0,
  body: {
    query: {
      constant_score: {
        filter: {
          match: {
            type: 'hat'
          }
        }
      }
    },
    aggregations: {
      hat_prices: {
        sum: {
          field: 'price',
          missing: 100
        }
      }
    }
  }
)
puts response
POST /sales/_search?size=0
{
  "query": {
    "constant_score": {
      "filter": {
        "match": { "type": "hat" }
      }
    }
  },
  "aggs": {
    "hat_prices": {
      "sum": {
        "field": "price",
        "missing": 100 
      }
    }
  }
}

Histogram fields

edit

When sum is computed on histogram fields, the result of the aggregation is the sum of all elements in the values array multiplied by the number in the same position in the counts array.

For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:

response = client.indices.create(
  index: 'metrics_index',
  body: {
    mappings: {
      properties: {
        latency_histo: {
          type: 'histogram'
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'metrics_index',
  id: 1,
  refresh: true,
  body: {
    "network.name": 'net-1',
    latency_histo: {
      values: [
        0.1,
        0.2,
        0.3,
        0.4,
        0.5
      ],
      counts: [
        3,
        7,
        23,
        12,
        6
      ]
    }
  }
)
puts response

response = client.index(
  index: 'metrics_index',
  id: 2,
  refresh: true,
  body: {
    "network.name": 'net-2',
    latency_histo: {
      values: [
        0.1,
        0.2,
        0.3,
        0.4,
        0.5
      ],
      counts: [
        8,
        17,
        8,
        7,
        6
      ]
    }
  }
)
puts response

response = client.search(
  index: 'metrics_index',
  size: 0,
  filter_path: 'aggregations',
  body: {
    aggregations: {
      total_latency: {
        sum: {
          field: 'latency_histo'
        }
      }
    }
  }
)
puts response
PUT metrics_index
{
  "mappings": {
    "properties": {
      "latency_histo": { "type": "histogram" }
    }
  }
}

PUT metrics_index/_doc/1?refresh
{
  "network.name" : "net-1",
  "latency_histo" : {
      "values" : [0.1, 0.2, 0.3, 0.4, 0.5],
      "counts" : [3, 7, 23, 12, 6]
   }
}

PUT metrics_index/_doc/2?refresh
{
  "network.name" : "net-2",
  "latency_histo" : {
      "values" :  [0.1, 0.2, 0.3, 0.4, 0.5],
      "counts" : [8, 17, 8, 7, 6]
   }
}

POST /metrics_index/_search?size=0&filter_path=aggregations
{
  "aggs" : {
    "total_latency" : { "sum" : { "field" : "latency_histo" } }
  }
}

For each histogram field, the sum aggregation will add each number in the values array, multiplied by its associated count in the counts array.

Eventually, it will add all values for all histograms and return the following result:

{
  "aggregations": {
    "total_latency": {
      "value": 28.8
    }
  }
}