'Extract ranges from sequential values [closed]

Task 1:

I have my collection with documents in mongodb with value from sequential ranges as follow :

 {x:1}
 {x:2}
 {x:3}
 {x:5}
 {x:6}
 {x:7}
 {x:8}
 {x:20}
 {x:21}

I need to extract a list of sequential ranges in the form(the count is not compulsory , but I need at least the first and last values from the range):

 {x:[1,3] , count:3}
 {x:[5,8], count:4}
 {x:[20,21],count:2}

or

 { min:1 , max:3 , count:3}
 { min:5 , max:8 , count:4}
 { min:20 , max:21 , count:2}

Please, advice suitable solution , collection has ~100M docs , some of the values are in 10 digit ranges others in 15 digit ranges , but they are all sequentially incremental in their range?

Task 2:

Same think like in Task 1 , but taken based on custom sequence step , for example if the sequence step is 3:

{y:1}
{y:3}
{y:5}
{y:20}
{y:22}

need to produce:

{y:[1,5] ,count:3}
{y:[20,22]} , count:2}

Thanks!

P.S. I succeeded partially to get some ranges picture by fetch distribution by number of digits range , but this seems to be very general:

 db.collection.aggregate([
   {
     $addFields: {
       range: {
         $strLenCP: {
           $toString: "$x"
         }
       }
     }
   },
   {
     $group: {
       _id: "$range",
       minValue: {
         $min: "$x"
       },
       maxValue: {
         $max: "$x"
       },
       Count: {
         $sum: 1
       }
     }
   },
   {
     $addFields: {
       x: [
         {
           $toString: "$minValue"
         },
         {
           $toString: "$maxValue"
         }
       ]
     }
   },
   {
     $project: {
       range: "$_id",
       "_id": 0,
       x: 1,
       Count: 1
     }
   },
   {
     $sort: {
       range: 1
     }
   }
 ])

playground



Solution 1:[1]

Here is another way of querying - produces result with format [ { min: 1 , max: 3 , count: 3 }, ... ]:

db.collection.aggregate([
 { 
    $sort: { x: 1 } 
 },
 { 
    $group: { 
        _id: null, 
        docs: { $push: "$x" }, 
        firstVal: { $first: "$x" }, 
        lastVal: { $last: "$x" } 
    } 
  },
  { 
    $project: {
        _id: 0,
        output: {
            $let: {
                vars: {
                    result: {
                        $reduce: {
                            input: "$docs",
                            initialValue: { 
                                 prev: { $add: [ "$firstVal", -1 ] }, 
                                 val: { min: "$firstVal", max: 0, count: 0 }, 
                                 vals: [ ] 
                            },
                            in: {
                                 $cond: [
                                     { $eq: [ { $subtract: [ "$$this", "$$value.prev" ] }, 1 ] },
                                     {
                                        prev: "$$this",
                                        val: { 
                                            min : "$$value.val.min", 
                                            max: "$$value.val.max", 
                                            count: { $add: [ "$$value.val.count", 1 ] } 
                                        },
                                        vals: "$$value.vals"
                                     },
                                     {
                                        vals: {
                                            $concatArrays: [
                                                "$$value.vals",
                                                [ { min : "$$value.val.min", max: "$$value.prev", count: "$$value.val.count" } ] 
                                            ]                                
                                         },
                                        val: { min: "$$this", max: "$lastVal", count: 1 },
                                        prev: "$$this"
                                     },
                                 ]
                            }
                        }
                     }
                 },
                 in: {
                     $concatArrays: [ "$$result.vals", [ "$$result.val" ] ]
                 }
            }
        }
    }
  }, 
])

Solution 2:[2]

Use $setWindowFields instead of $group all data

db.collection.aggregate([
  {
    $setWindowFields: {
      partitionBy: "",
      sortBy: { x: 1 },
      output: {
        c: {
          $push: "$x",
          window: {
            range: [ -3, 0 ]
          }
        }
      }
    }
  },
  {
    $set: {
      "c": {
        "$cond": {
          "if": { "$gt": [ { "$size": "$c" }, 1 ] },
          "then": 0,
          "else": 1
        }
      }
    }
  },
  {
    $setWindowFields: {
      partitionBy: "",
      sortBy: { x: 1 },
      output: {
        g: {
          $sum: "$c",
          window: {
            documents: [ "unbounded", "current" ]
          }
        }
      }
    }
  },
  {
    $group: {
      _id: "$g",
      count: { $sum: 1 },
      max: { "$max": "$x" },
      min: { "$min": "$x" }
    }
  }
])

mongoplayground


In PostgreSQL

CREATE TABLE test (
  id INT,
  x INT
);
INSERT INTO test VALUES (1, 1);
INSERT INTO test VALUES (2, 3);
INSERT INTO test VALUES (3, 5);
INSERT INTO test VALUES (4, 20);
INSERT INTO test VALUES (5, 22);
SELECT 
  MAX(x) AS max, MIN(x) AS min, COUNT(*) AS count
FROM (
  SELECT *, SUM(inc) OVER(ORDER BY x) AS grp
  FROM (
    SELECT *, CASE WHEN x - LAG(x) OVER(ORDER BY x) < 4 THEN NULL ELSE 1 END AS inc
    FROM test
  ) q
) q
GROUP BY grp

db-fiddle

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 prasad_
Solution 2