MAPREDUCE basics: Difference between revisions
From NoSQLZoo
No edit summary |
|||
| (6 intermediate revisions by 2 users not shown) | |||
| Line 11: | Line 11: | ||
#PRETTY | #PRETTY | ||
import pprint | import pprint | ||
pp = pprint.PrettyPrinter(indent=4) | pp = pprint.PrettyPrinter(indent=4, width=160) | ||
</pre> | </pre> | ||
| Line 18: | Line 18: | ||
[[MapReduce]] examples are available. | [[MapReduce]] examples are available. | ||
<div class='extra_space' style='width:1em; height:6em;'></div> | <div class='extra_space' style='width:1em; height:6em;'></div> | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
| Line 44: | Line 45: | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
<p class=strong> | <p class=strong>Use the previous answer to find the population of the world to the nearest million</p> | ||
<div class=hint title="How to round to the nearest million">Use the JavaScript round function : Math.round(population/1000000)*1000000 </div> | |||
<pre class=def> | |||
</pre> | |||
<div class=ans> | |||
from bson.code import Code | |||
temp = db.world.map_reduce( | |||
map=Code("function(){emit('World Population in Millions', this.population)}"), | |||
reduce=Code("""function(key, values){ | |||
return Math.round(Array.sum(values)/1000000)*1000000; | |||
}"""), | |||
out={"inline":1}) | |||
pp.pprint(temp["results"]) | |||
</div> | |||
</div> | |||
<div class=q data-lang="py3"> | |||
<p class=strong>Count number of countries by first letter</p> | |||
<pre class=def> | <pre class=def> | ||
</pre> | |||
<div class=ans> | |||
from bson.code import Code | from bson.code import Code | ||
temp = db.world.map_reduce( | temp = db.world.map_reduce( | ||
map=Code("function(){emit(this. | map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""), | ||
reduce=Code("""function(key, values){ | reduce=Code("""function(key, values){ | ||
return Array.sum(values); | |||
}"""), | }"""), | ||
out={"inline":1}) | out={"inline":1}) | ||
pp.pprint(temp["results"]) | pp.pprint(temp["results"]) | ||
</div> | |||
</div> | |||
<div class=q data-lang="py3"> | |||
<p class=strong>Show the number of countries on each continent</p> | |||
<pre class=def> | |||
</pre> | </pre> | ||
<div class=ans> | <div class=ans> | ||
| Line 68: | Line 94: | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
<p class=strong>Show the smallest 3 countries (ignore areas of 0 or None)</p> | <p class=strong>Show the smallest 3 countries name and area (ignore areas of 0 or None)</p> | ||
<pre class=def> | <pre class=def> | ||
</pre> | </pre> | ||
| Line 95: | Line 121: | ||
from bson.code import Code | from bson.code import Code | ||
temp = db.world.map_reduce( | temp = db.world.map_reduce( | ||
map=Code("function(){emit(this.continent, { | map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"), | ||
reduce=Code("""function(key, values){ | reduce=Code("""function(key, values){ | ||
var ret = { | var ret = {first:'ZZZ',last:'AAA'}; | ||
for(var i=0;i<values.length;i++){ | for(var i=0;i<values.length;i++){ | ||
if (ret. | if (ret.first>values[i].first) ret.first=values[i].first; | ||
if (ret. | if (ret.last<values[i].last) ret.last=values[i].last; | ||
} | } | ||
return ret; | return ret; | ||
| Line 110: | Line 136: | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
<p class=strong>Return the first and last | <p class=strong>Return country name or capital city that starts with a letter 'M'</p> | ||
<pre class=def> | |||
</pre> | |||
<div class=ans> | |||
from bson.code import Code | |||
temp = db.world.map_reduce( | |||
map=Code("""function(){ | |||
if((this.name).startsWith('M')) | |||
emit(this.name,null); | |||
if((this.capital).startsWith('M')) | |||
emit(this.capital,null); | |||
}"""), | |||
reduce=Code("""function(key, values){ | |||
return values; | |||
}"""), | |||
out={"inline":1}) | |||
pp.pprint(temp["results"]) | |||
</div> | |||
</div> | |||
<div class=q data-lang="py3"> | |||
<p class=strong>Show the first and last city for each letter and the count of cities</p> | |||
<pre class=def> | <pre class=def> | ||
</pre> | </pre> | ||
| Line 116: | Line 164: | ||
from bson.code import Code | from bson.code import Code | ||
temp = db.world.map_reduce( | temp = db.world.map_reduce( | ||
map=Code("function(){emit(this. | map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""), | ||
reduce=Code("""function(key, values){ | reduce=Code("""function(key, values){ | ||
var ret = {first:'ZZZ',last:'AAA', count:0}; | |||
for(var i=0;i<values.length;i++){ | for(var i=0;i<values.length;i++){ | ||
if (ret. | if (ret.first>values[i].first) ret.first=values[i].first; | ||
if (ret. | if (ret.last<values[i].last) ret.last=values[i].last; | ||
ret.count += 1; | |||
} | } | ||
return ret; | return ret; | ||
}"""), | }"""), | ||
finalize=Code("""function(key, val){ | |||
if(!val.count){ | |||
val.count = 1; | |||
return val; | |||
}else | |||
return val; | |||
}"""), | |||
out={"inline":1}) | |||
pp.pprint(temp["results"]) | |||
</div> | |||
</div> | |||
<div class=q data-lang="py3"> | |||
<p class=strong>Show country count for countries in the ranges</p> | |||
0 to 1000000 | |||
1000000 to 2000000 | |||
2000000 to 3000000 | |||
3000000 to 5000000 | |||
5000000 to 10000000 | |||
10000000 to 15000000 | |||
More than 15000000 | |||
<pre class=def> | |||
</pre> | |||
<div class=ans> | |||
from bson.code import Code | |||
temp = db.world.map_reduce( | |||
map=Code("""function(){ | |||
var pop = this.population; | |||
switch(true){ | |||
case pop<1000000: | |||
emit("0 TO 1000000", 1); | |||
break; | |||
case pop<2000000: | |||
emit("1000000 TO 2000000", 1); | |||
break; | |||
case pop<3000000: | |||
emit("2000000 TO 3000000", 1); | |||
break; | |||
case pop<5000000: | |||
emit("3000000 TO 5000000", 1); | |||
break; | |||
case pop<10000000: | |||
emit("5000000 TO 10000000", 1); | |||
break; | |||
case pop<15000000: | |||
emit("10000000 TO 15000000", 1); | |||
break | |||
case pop>15000000: | |||
emit("MORE THAN 15000000", 1); | |||
break; | |||
} | |||
}"""), | |||
reduce=Code("""function(key, values){ return Array.sum(values); }"""), | |||
out={"inline":1}) | out={"inline":1}) | ||
pp.pprint(temp["results"]) | pp.pprint(temp["results"]) | ||
</div> | </div> | ||
</div> | </div> | ||
Latest revision as of 11:18, 27 June 2016
#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4, width=160)
MapReduce the basics
This tutorial introduces the MapReduce command.
MapReduce examples are available.
Find the total population of the each continent
from bson.code import Code
temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, this.population)}"),
reduce=Code("""function(key, values){
return Array.sum(values);
}"""),
out={"inline":1})
pp.pprint(temp["results"])
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, this.population)}"),
reduce=Code("""function(key, values){
return Array.sum(values);
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Use the previous answer to find the population of the world to the nearest million
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit('World Population in Millions', this.population)}"),
reduce=Code("""function(key, values){
return Math.round(Array.sum(values)/1000000)*1000000;
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Count number of countries by first letter
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""),
reduce=Code("""function(key, values){
return Array.sum(values);
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Show the number of countries on each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, 1)}"),
reduce=Code("""function(key, values){
return Array.sum(values);
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Show the smallest 3 countries name and area (ignore areas of 0 or None)
from bson.code import Code temp = db.world.map_reduce(
query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]},
sort={"area":1},
limit=3,
map=Code("function(){emit(this.name, this.area)}"),
reduce=Code("function(key, values){}"),
out={"inline":1},
)
pp.pprint(
temp["results"]
)
Return the first and last country based on name order for each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"),
reduce=Code("""function(key, values){
var ret = {first:'ZZZ',last:'AAA'};
for(var i=0;i<values.length;i++){
if (ret.first>values[i].first) ret.first=values[i].first;
if (ret.last<values[i].last) ret.last=values[i].last;
}
return ret;
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Return country name or capital city that starts with a letter 'M'
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){
if((this.name).startsWith('M'))
emit(this.name,null);
if((this.capital).startsWith('M'))
emit(this.capital,null);
}"""),
reduce=Code("""function(key, values){
return values;
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Show the first and last city for each letter and the count of cities
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""),
reduce=Code("""function(key, values){
var ret = {first:'ZZZ',last:'AAA', count:0};
for(var i=0;i<values.length;i++){
if (ret.first>values[i].first) ret.first=values[i].first;
if (ret.last<values[i].last) ret.last=values[i].last;
ret.count += 1;
}
return ret;
}"""),
finalize=Code("""function(key, val){
if(!val.count){
val.count = 1;
return val;
}else
return val;
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Show country count for countries in the ranges
0 to 1000000 1000000 to 2000000 2000000 to 3000000 3000000 to 5000000 5000000 to 10000000 10000000 to 15000000 More than 15000000
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){
var pop = this.population;
switch(true){
case pop<1000000:
emit("0 TO 1000000", 1);
break;
case pop<2000000:
emit("1000000 TO 2000000", 1);
break;
case pop<3000000:
emit("2000000 TO 3000000", 1);
break;
case pop<5000000:
emit("3000000 TO 5000000", 1);
break;
case pop<10000000:
emit("5000000 TO 10000000", 1);
break;
case pop<15000000:
emit("10000000 TO 15000000", 1);
break
case pop>15000000:
emit("MORE THAN 15000000", 1);
break;
}
}"""),
reduce=Code("""function(key, values){ return Array.sum(values); }"""),
out={"inline":1})
pp.pprint(temp["results"])