'Extract ranges from sequential values [closed]
Task 1:
I have my collection with documents in mongodb with value from sequential ranges as follow :
{x:1}
{x:2}
{x:3}
{x:5}
{x:6}
{x:7}
{x:8}
{x:20}
{x:21}
I need to extract a list of sequential ranges in the form(the count is not compulsory , but I need at least the first and last values from the range):
{x:[1,3] , count:3}
{x:[5,8], count:4}
{x:[20,21],count:2}
or
{ min:1 , max:3 , count:3}
{ min:5 , max:8 , count:4}
{ min:20 , max:21 , count:2}
Please, advice suitable solution , collection has ~100M docs , some of the values are in 10 digit ranges others in 15 digit ranges , but they are all sequentially incremental in their range?
Task 2:
Same think like in Task 1 , but taken based on custom sequence step , for example if the sequence step is 3:
{y:1}
{y:3}
{y:5}
{y:20}
{y:22}
need to produce:
{y:[1,5] ,count:3}
{y:[20,22]} , count:2}
Thanks!
P.S. I succeeded partially to get some ranges picture by fetch distribution by number of digits range , but this seems to be very general:
db.collection.aggregate([
{
$addFields: {
range: {
$strLenCP: {
$toString: "$x"
}
}
}
},
{
$group: {
_id: "$range",
minValue: {
$min: "$x"
},
maxValue: {
$max: "$x"
},
Count: {
$sum: 1
}
}
},
{
$addFields: {
x: [
{
$toString: "$minValue"
},
{
$toString: "$maxValue"
}
]
}
},
{
$project: {
range: "$_id",
"_id": 0,
x: 1,
Count: 1
}
},
{
$sort: {
range: 1
}
}
])
Solution 1:[1]
Here is another way of querying - produces result with format [ { min: 1 , max: 3 , count: 3 }, ... ]
:
db.collection.aggregate([
{
$sort: { x: 1 }
},
{
$group: {
_id: null,
docs: { $push: "$x" },
firstVal: { $first: "$x" },
lastVal: { $last: "$x" }
}
},
{
$project: {
_id: 0,
output: {
$let: {
vars: {
result: {
$reduce: {
input: "$docs",
initialValue: {
prev: { $add: [ "$firstVal", -1 ] },
val: { min: "$firstVal", max: 0, count: 0 },
vals: [ ]
},
in: {
$cond: [
{ $eq: [ { $subtract: [ "$$this", "$$value.prev" ] }, 1 ] },
{
prev: "$$this",
val: {
min : "$$value.val.min",
max: "$$value.val.max",
count: { $add: [ "$$value.val.count", 1 ] }
},
vals: "$$value.vals"
},
{
vals: {
$concatArrays: [
"$$value.vals",
[ { min : "$$value.val.min", max: "$$value.prev", count: "$$value.val.count" } ]
]
},
val: { min: "$$this", max: "$lastVal", count: 1 },
prev: "$$this"
},
]
}
}
}
},
in: {
$concatArrays: [ "$$result.vals", [ "$$result.val" ] ]
}
}
}
}
},
])
Solution 2:[2]
Use $setWindowFields
instead of $group
all data
db.collection.aggregate([
{
$setWindowFields: {
partitionBy: "",
sortBy: { x: 1 },
output: {
c: {
$push: "$x",
window: {
range: [ -3, 0 ]
}
}
}
}
},
{
$set: {
"c": {
"$cond": {
"if": { "$gt": [ { "$size": "$c" }, 1 ] },
"then": 0,
"else": 1
}
}
}
},
{
$setWindowFields: {
partitionBy: "",
sortBy: { x: 1 },
output: {
g: {
$sum: "$c",
window: {
documents: [ "unbounded", "current" ]
}
}
}
}
},
{
$group: {
_id: "$g",
count: { $sum: 1 },
max: { "$max": "$x" },
min: { "$min": "$x" }
}
}
])
In PostgreSQL
CREATE TABLE test (
id INT,
x INT
);
INSERT INTO test VALUES (1, 1);
INSERT INTO test VALUES (2, 3);
INSERT INTO test VALUES (3, 5);
INSERT INTO test VALUES (4, 20);
INSERT INTO test VALUES (5, 22);
SELECT
MAX(x) AS max, MIN(x) AS min, COUNT(*) AS count
FROM (
SELECT *, SUM(inc) OVER(ORDER BY x) AS grp
FROM (
SELECT *, CASE WHEN x - LAG(x) OVER(ORDER BY x) < 4 THEN NULL ELSE 1 END AS inc
FROM test
) q
) q
GROUP BY grp
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | prasad_ |
Solution 2 |