首先是安装
yum install -y mongodb-server mongodb
Installing for dependencies:
boost-filesystem
boost-iostreams
boost-program-options
boost-system
boost-thread
gperftools-libs
libicu
libunwind
v8
会装一堆依赖包,boost库,icu库,v8库,gperftools库,都是很厉害的库啊!
启动:
service mongod start
导入海量数据:
[root@ovs-16-11-2 ~]# mongoimport -d mydb -c prj01 --type csv --file opendata_projects.csv --headerline
connected to: 127.0.0.1
Thu Jul 28 09:50:47.002 Progress: 39118658/470754183 8%
Thu Jul 28 09:50:47.002 74400 24800/second
Thu Jul 28 09:50:50.033 Progress: 80042213/470754183 17%
Thu Jul 28 09:50:50.033 150700 25116/second
Thu Jul 28 09:50:53.145 Progress: 108143323/470754183 22%
Thu Jul 28 09:50:53.145 202700 22522/second
Thu Jul 28 09:50:56.004 Progress: 149781879/470754183 31%
Thu Jul 28 09:50:56.004 280000 23333/second
Thu Jul 28 09:50:59.001 Progress: 179705162/470754183 38%
Thu Jul 28 09:50:59.001 336200 22413/second
Thu Jul 28 09:51:03.385 Progress: 212197023/470754183 45%
Thu Jul 28 09:51:03.385 396400 20863/second
Thu Jul 28 09:51:06.015 Progress: 236552399/470754183 50%
Thu Jul 28 09:51:06.015 441700 20077/second
Thu Jul 28 09:51:09.299 Progress: 264365847/470754183 56%
Thu Jul 28 09:51:09.299 493300 19732/second
Thu Jul 28 09:51:12.001 Progress: 304790148/470754183 64%
Thu Jul 28 09:51:12.001 568500 20303/second
Thu Jul 28 09:51:15.033 Progress: 323508057/470754183 68%
Thu Jul 28 09:51:15.033 603300 19461/second
Thu Jul 28 09:51:18.607 Progress: 361610334/470754183 76%
Thu Jul 28 09:51:18.607 674200 19829/second
Thu Jul 28 09:51:21.000 Progress: 393748962/470754183 83%
Thu Jul 28 09:51:21.000 733700 19829/second
Thu Jul 28 09:51:24.007 Progress: 427667505/470754183 90%
Thu Jul 28 09:51:24.007 796900 19922/second
Thu Jul 28 09:51:27.001 Progress: 459658299/470754183 97%
Thu Jul 28 09:51:27.001 857300 19937/second
Thu Jul 28 09:51:27.793 check 9 878853
Thu Jul 28 09:51:27.979 imported 878852 objects
进入命令行,看看库的整体情况:
mongo
use mydb
show collections
db.prj01.findOne()
完整结果如下:
mongo
MongoDB shell version: 2.4.14
connecting to: test
> use mydb
switched to db mydb
> show collections
prj01
system.indexes
> db.prj01.findOne()
{
"_id" : ObjectId("579964f41d36d69d1752f82b"),
"_projectid" : "7342bd01a2a7725ce033a179d22e382d",
"_teacher_acctid" : "5c43ef5eac0f5857c266baa1ccfa3d3f",
"_schoolid" : "9e72d6f2f1e9367b578b6479aa5852b7",
"school_ncesid" : NumberLong("360009702803"),
"school_latitude" : 40.688454,
"school_longitude" : -73.910432,
"school_city" : "Brooklyn",
"school_state" : "NY",
"school_zip" : 11207,
"school_metro" : "urban",
"school_district" : "New York City Dept Of Ed",
"school_county" : "Kings (Brooklyn)",
"school_charter" : "f",
"school_magnet" : "t",
"school_year_round" : "f",
"school_nlns" : "f",
"school_kipp" : "f",
"school_charter_ready_promise" : "f",
"teacher_prefix" : "Mr.",
"teacher_teach_for_america" : "f",
"teacher_ny_teaching_fellow" : "f",
"primary_focus_subject" : "Other",
"primary_focus_area" : "Applied Learning",
"secondary_focus_subject" : "",
"secondary_focus_area" : "",
"resource_type" : "Supplies",
"poverty_level" : "highest poverty",
"grade_level" : "Grades 6-8",
"vendor_shipping_charges" : "",
"sales_tax" : "",
"payment_processing_charges" : "",
"fulfillment_labor_materials" : "",
"total_price_excluding_optional_support" : 229,
"total_price_including_optional_support" : 279.27,
"students_reached" : 0,
"total_donations" : 251,
"num_donors" : 1,
"eligible_double_your_impact_match" : "f",
"eligible_almost_home_match" : "f",
"funding_status" : "completed",
"date_posted" : "2002-09-13 00:00:00",
"date_completed" : "2002-09-23 00:00:00",
"date_thank_you_packet_mailed" : "2003-01-27 00:00:00",
"date_expiration" : "2003-12-31 00:00:00"
}
>
太多字段了,如果我们就想要其中的6个字段:
> db.prj01.findOne({}, {school_state:1, resource_type:1, poverty_level:1, date_posted:1, total_donations:1, funding_status:1, _id:0})
{
"school_state" : "NY",
"resource_type" : "Supplies",
"poverty_level" : "highest poverty",
"total_donations" : 251,
"funding_status" : "completed",
"date_posted" : "2002-09-13 00:00:00"
}
装个pymongo
python -m pip install pymongo
测一下,进入python命令行:
python
...
from pymongo import MongoClient
MONGODB_HOST = 'localhost'
MONGODB_PORT = 27017
DBS_NAME = 'mydb'
COLLECTION_NAME = 'prj01'
FIELDS = {'school_state': True, 'resource_type': True, 'poverty_level': True, 'date_posted': True, 'total_donations': True, '_id': False}
connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
collection = connection[DBS_NAME][COLLECTION_NAME]
projects = collection.find(projection=FIELDS)
for project in projects:
print project
...
{u'school_state': u'MO', u'date_posted': u'2015-08-18 00:00:00', u'poverty_level': u'highest poverty', u'resource_type': u'Books', u'total_donations': 0}
...
数据会疯狂显示一阵子,其实这就是一个完整的python访问mongodb的程序了。
下面我们来完成flask的部分
装个flask
python -m pip install flask
建立个文件夹flask01,建立目录templates
flask01
├── run.py
└── templates
└── index.html
准备一个首页文件index.html放到目录templates下
cat index.html
<h1>Hello World</h1>
准备主程序run.py
from flask import Flask
from flask import render_template
app = Flask(__name__)
@app.route("/")
def index():
return render_template("index.html")
if __name__ == "__main__":
app.run(host='0.0.0.0',port=5000,debug=True)
分配个url来存取mongodb
from flask import Flask
from flask import render_template
from pymongo import MongoClient
import json
from bson import json_util
from bson.json_util import dumps
app = Flask(__name__)
MONGODB_HOST = 'localhost'
MONGODB_PORT = 27017
DBS_NAME = 'mydb'
COLLECTION_NAME = 'prj01'
FIELDS = {'school_state': True, 'resource_type': True, 'poverty_level': True, 'date_posted': True, 'total_donations': True, '_id': False}
@app.route("/")
def index():
return render_template("index.html")
@app.route("/mydb/prj01")
def mydb_prj01():
connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
collection = connection[DBS_NAME][COLLECTION_NAME]
projects = collection.find(projection=FIELDS)
json_projects = []
for project in projects:
json_projects.append(project)
json_projects = json.dumps(json_projects, default=json_util.default)
connection.close()
return json_projects
if __name__ == "__main__":
app.run(host='0.0.0.0',port=5000,debug=True)
运行一下,在浏览器打开这个url:
python run.py
* Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
* Restarting with stat
* Debugger is active!
* Debugger pin code: 373-144-494
172.16.8.1 - - [28/Jul/2016 10:13:27] "GET /mydb/prj01 HTTP/1.1" 200 -
结果很明显是个json
[{"school_state": "NY", "date_posted": "2002-09-13 00:00:00", "poverty_level": "highest poverty", "resource_type": "Supplies", "total_donations": 251},
{"school_state": "NY", "date_posted": "2002-09-16 00:00:00", "poverty_level": "moderate poverty", "resource_type": "Supplies", "total_donations": 125},
...
]