Commit 0323797a authored by Kaifu Ji's avatar Kaifu Ji
Browse files

finall

parent cc41ba26
import json
with open('alallaalal.json', 'r') as f:
dictionary = json.load(f)
f = open('IGA.json', 'w')
a = dictionary['features']
result = []
for item in a:
iga_name1 = item['properties']['vic_lga__2']
iga_name2 = item['properties']['vic_lga__3']
iga_pid = item['properties']['lg_ply_pid']
iga_number = item['properties']['lga_pid']
boundary = item['geometry']['coordinates'][0][0]
x_max = 0
x_min = 1000
y_max = -1000
y_min = 1000
for point in boundary:
if point[0] > x_max:
x_max = point[0]
if point[0] < x_min:
x_min = point[0]
if point[1] > y_max:
y_max = point[1]
if point[1] < y_min:
y_min = point[1]
loc = {'IGA_name1': iga_name1,'IGA_name2':iga_name2,'IGA_num1':iga_number,'IGA_num2':iga_pid, 'boundary': boundary, 'range': [x_min, x_max, y_min, y_max]}
result.append(loc)
print len(result)
r_json = json.dump({'result':result},f)
from __future__ import print_function
import json
from matplotlib.path import Path
with open('IGA.json', 'r') as f:
dictionary = json.load(f)
dictionary = dictionary['result']
for item in dictionary:
path = Path(item['boundary'])
item['boundary'] = path
def detect_IGA(point):
find = False
tmp = []
x = point[0]
y = point[1]
for suburb in dictionary:
if suburb['range'][0] <= x < suburb['range'][1] and suburb['range'][2] <= y < suburb['range'][3]:
tmp.append(suburb)
if len(tmp) == 0:
return {'name': 'not found'}
elif len(tmp) == 1:
result = tmp[0].copy()
result['boundary'] = None
return result
else:
for item in tmp:
if item['boundary'].contains_point(point):
result = item.copy()
result['boundary'] = None
find = True
return result
break
if not find:
return {'name': 'not found'}
print("error")
return None
{"organisation":"VIC_Govt_VCGLR","name":"VCGLR:alcohol_outlet","title":"Licensed Alcohol Outlets 2013 for Victoria","legal":{"copyrightYear":"2009 - 2013","attributionYear":"2013","attribution":"{datasource.organisationTitle}, ({dataset.attributionYear}): {dataset.title}; accessed from AURIN Portal on {dateOfAccess}.","clickthrough":false,"licenceType":"Creative Commons Attribution 3.0 Australia (CC BY 3.0 AU)","copyrightNotice":"&copy; {datasource.organisationTitle} {dataset.copyrightYear}"},"referenceSystemIdentifier":"EPSG:4283","geoLevel":"NA","key":"ogc_fid","keyRegex":null,"availability":"true","geomField":"wkb_geometry","keyword":"alcohol outlet, liquor, health, well being, quality of life","theme":null,"temporalExtent":{"beginDate":"2013-11-15T00:00:00","endDate":"2013-11-15T00:00:00","selectTime":false},"type":"dataset","_id":"dataset-VIC_Govt_VCGLR-VCGLR:alcohol_outlet-NA","bbox":null,"abstract":"This dataset contains the locations of active liquor licenses in Victoria. It includes licence types, venue names and addresses. Further information relating to the datasource is available from here: <a href='http://www.geomaps.vcglr.vic.gov.au/' >Additional Information</a>","selectedAttributes":[{"name":"ogc_fid","type":"java.lang.Integer","title":"Feature Identifier","description":"Feature Identifier","stype":"Nominal","isDimension":false},{"name":"wkb_geometry","type":"com.vividsolutions.jts.geom.Geometry","title":"Geometry","description":"Geometry","stype":"Nominal","isDimension":false},{"name":"objectid_1","type":"java.math.BigDecimal","title":"objectid_1","description":"objectid_1","isDimension":false},{"name":"licenceno","type":"java.lang.Double","title":"Licence Number","description":"The unique code assigned to the licence","stype":"Nominal","isDimension":false},{"name":"lictypes","type":"java.lang.String","title":"Licence Type","description":"The type of licence","stype":"Nominal","isDimension":false},{"name":"premname","type":"java.lang.String","title":"Venue Name","description":"The name of the venue","stype":"Nominal","isDimension":false},{"name":"address1","type":"java.lang.String","title":"Address Part 1","description":"The first part of the Address","stype":"Nominal","isDimension":false},{"name":"address2","type":"java.lang.String","title":"Address Part 2","description":"The second part of the Address","stype":"Nominal","isDimension":false},{"name":"suburb","type":"java.lang.String","title":"Suburb","description":"Suburb","stype":"Nominal","isDimension":false},{"name":"postcode","type":"java.lang.Integer","title":"Postcode","description":"Postcode","stype":"Nominal","isDimension":false}],"filter":{"filterType":"tabular","spatial":{"bbox":[140.961681984,-39.1591895275,149.976679008,-33.9806475865]},"states":["2"]}}
\ No newline at end of file
# coding=utf-8
import json
import sys
import mpi4py.MPI as MPI
import numpy as np
def point_in_polygon(x, y, verts):
"""
- PNPoly算法
- xyverts [(x1, y1), (x2, y2), (x3, y3), ...]
"""
vertx = [xyvert[0] for xyvert in verts]
verty = [xyvert[1] for xyvert in verts]
# 上一步通过后,核心算法部分
nvert = len(verts)
is_in = False
for i in range(nvert):
j = nvert - 1 if i == 0 else i - 1
if ((verty[i] > y) != (verty[j] > y)) and (
x < (vertx[j] - vertx[i]) * (y - verty[i]) / (verty[j] - verty[i]) + vertx[i]):
is_in = not is_in
return is_in
comm = MPI.COMM_WORLD
# the node rank in the whole community
comm_rank = comm.Get_rank()
# the size of the whole community, i.e.,the total number of working nodes in the MPI cluster
comm_size = comm.Get_size()
with open('bigjson.json', 'r') as f:
dictionary = json.load(f)
x_max = 0
x_min = 1000
y_max = -1000
y_min = 1000
ran = np.linspace(112.8, 154, comm_size + 1)
array = [ran[comm_rank], ran[comm_rank + 1], -43.7, -10]
# x = float(array[0])
x = ran[comm_rank]
y = -43.7
f = open('dic' + str(comm_rank) + '.json', 'w')
result = []
while array[1] - x > 0.00001:
y = -43.7
resultx = []
while y < -10.0:
find = False
tmp = []
point = [x, y]
for suburb in dictionary['result']:
if suburb['range'][0] <= x < suburb['range'][1] and suburb['range'][2] <= y < suburb['range'][3]:
tmp.append(suburb)
if len(tmp) == 0:
resultx.append(None)
elif len(tmp) == 1:
resultx.append({'name': tmp[0]['state'] + '|' + tmp[0]['suburb'], 'point': point})
else:
for suburb in tmp:
if point_in_polygon(x, y, suburb['boundary']):
resultx.append({'name': tmp[0]['state'] + '|' + tmp[0]['suburb'], 'point': point})
find = True
break
if not find:
resultx.append(None)
y += 0.0025
result.append(resultx)
print ("this is rank %d this line done %f" % (comm_rank, x))
x += 0.0025
json.dump({'result': result, 'range': array}, f)
# a.sort(key=lambda item:item['range'][1] - item['range'][0])
# for i in range(100):
# print [a[i]['state'],a[i]['suburb']]
# coding=utf-8
import json
import sys
import numpy as np
import mpi4py.MPI as MPI
def point_in_polygon(x, y, verts):
"""
- PNPoly算法
- xyverts [(x1, y1), (x2, y2), (x3, y3), ...]
"""
vertx = [xyvert[0] for xyvert in verts]
verty = [xyvert[1] for xyvert in verts]
# 上一步通过后,核心算法部分
nvert = len(verts)
is_in = False
for i in range(nvert):
j = nvert - 1 if i == 0 else i - 1
if ((verty[i] > y) != (verty[j] > y)) and (
x < (vertx[j] - vertx[i]) * (y - verty[i]) / (verty[j] - verty[i]) + vertx[i]):
is_in = not is_in
return is_in
# the node rank in the whole community
comm = MPI.COMM_WORLD
# the node rank in the whole community
comm_rank = comm.Get_rank()
# the size of the whole community, i.e.,the total number of working nodes in the MPI cluster
comm_size = comm.Get_size()
with open('VICB.json', 'r') as f:
dictionary = json.load(f)
x_max = 0
x_min = 1000
y_max = -1000
y_min = 1000
ran = np.linspace(140.96, 149.98, comm_size + 1)
array = [ran[comm_rank], ran[comm_rank + 1], -39.12, -33.98]
# x = float(array[0])
x = ran[comm_rank]
y = -43.7
f = open('dic' + str(comm_rank) + '.json', 'w')
result = []
while array[1] - x > 0.00001:
y = -43.7
resultx = []
while y < -10.0:
find = False
tmp = []
point = [x, y]
for suburb in dictionary['result']:
if suburb['range'][0] <= x < suburb['range'][1] and suburb['range'][2] <= y < suburb['range'][3]:
tmp.append(suburb)
if len(tmp) == 0:
resultx.append(None)
elif len(tmp) == 1:
resultx.append({'name': tmp[0]['state'] + '|' + tmp[0]['suburb'], 'point': point})
else:
for suburb in tmp:
if point_in_polygon(x, y, suburb['boundary']):
resultx.append({'name': tmp[0]['state'] + '|' + tmp[0]['suburb'], 'point': point})
find = True
break
if not find:
resultx.append(None)
y += 0.0025
result.append(resultx)
print ("this is rank %d this line done %f" % (comm_rank, x))
x += 0.0025
json.dump({'result': result, 'range': array}, f)
# a.sort(key=lambda item:item['range'][1] - item['range'][0])
# for i in range(100):
# print [a[i]['state'],a[i]['suburb']]
import json
a = json.load(open('crime_iga.json','r'))['features']
b = json.load(open('IGA_Tweet.json','r'))['result']
result = {}
for item1 in a:
name = item1['properties']['lga_name11'].lower()
for item2 in b:
if item2['name'] == name:
if name in result:
result[name]['crime_number'] += item1['properties']['grand_tot']
else:
item2['crime_number'] = item1['properties']['grand_tot']
item2['happy_rate'] = item2['positive']/item2['total']
result[name] = item2.copy()
break
tmp = []
for item in result:
tmp.append(result[item])
json.dump({'result':tmp},open('crime_happy.json','w'))
\ No newline at end of file
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import explained_variance_score
import json
import numpy as np
suburb = json.load(open('sentimentPerHourPositive.txt','r'))
data = []
# for item in suburb:
# if 100 <item[1] < 100000:
# data.append(item)
# for item in suburb:
# total = item['data']['total']
# if total < 100:
# continue
# positive = item['data']['positive']
# if 'income' in item['data']:
# income = item['data']['income']
# rate = positive / total
# data.append([income,rate])
# for item in suburb:
# if item['total'] > 200:
# data.append([item['crime_number'],item['happy_rate']])
data = np.array(data)
x = data[:,np.newaxis,0]
y = data[:,1]
model = LinearRegression()
model.fit(x,y)
plt.plot(x,model.predict(x),color = 'black')
plt.scatter(x,y,color='red')
# print(model.get_params())
# print(model.score([[1],[2],[3]],[2,4,6]))
# score = explained_variance_score([[2],[4],[6]],model.predict([[1],[2],[3]]))
# print(score)
plt.show()
\ No newline at end of file
import json
import matplotlib.pyplot as plt
x= []
y1 = []
y2 =[]
a = json.load(open('sentimentPerHour.txt','r'))
for i in range(24):
x.append(i)
i = str(i)
y1.append(a[i]['total'])
y2.append(float(a[i]['positive']) / a[i]['total'] * 50000)
plt.plot(x,y1,color = 'red')
plt.plot(x,y2,color = 'green')
plt.show()
\ No newline at end of file
from wsgiref.simple_server import make_server
from w2v import TextSimilarity
import json
a = TextSimilarity()
def application(environ, start_response):
start_response('200 OK', [('Content-Type', 'text/html')])
method = environ['REQUEST_METHOD']
query = environ['QUERY_STRING'].split('&')
pos = []
neg = []
print(query)
for item in query:
if item.split('=')[0] == 'pos':
pos.append(item.split('=')[1])
else:
print(item)
neg.append(item.split('=')[1])
try:
result = a.similarity_words(pos, neg)
re = []
for item in result:
re.append(item[0])
body = json.dumps({'result': re,'code':0})
except:
body = json.dumps({'code':1})
# body = '<h1>%s!</h1>' % (' '.join(re))
return [body.encode('utf-8')]
httpd = make_server('', 8000, application)
print('Serving HTTP on port 8000...')
# 开始监听HTTP请求:
httpd.serve_forever()
\ No newline at end of file
import json
names = ['VIC']
f = open('VICB.json','w')
result = []
for name in names:
fr = open(name+'.json','r')
a = json.load(fr)['features']
fix = '_loca_2'
if name == 'SA' or name == 'WA' or name == 'NT':
fix = '_local_2'
for item in a:
suburb_name = item['properties'][name.lower()+fix]
boundary = item['geometry']['coordinates'][0][0]
x_max = 0
x_min = 1000
y_max = -1000
y_min = 1000
for point in boundary:
if point[0] > x_max:
x_max = point[0]
if point[0] < x_min:
x_min = point[0]
if point[1] > y_max:
y_max = point[1]
if point[1] < y_min:
y_min = point[1]
loc = {'suburb':suburb_name,'state':name,'boundary':boundary,'range':[x_min,x_max,y_min,y_max]}
result.append(loc)
print ('%s is finish' % suburb_name)
print('%s is done' % name)
print len(result)
r_json = json.dump({'result':result},f)
import json
# import matplotlib.pyplot as plt
from matplotlib.path import Path
f = open('VIC.json','r')
a = json.load(f)['features']
# f1 = plt.figure(1)
# plt.subplot(111)
boundary = []
# plt.scatter([1,2,3,4,5],[1,2,3,4,5])
for item in a:
if item['properties']['vic_loca_2'] == 'MELBOURNE':
# print item
boundary = item['geometry']['coordinates'][0][0]
# la = []
# long = []
path = Path(boundary)
print path.contains_point([144.977,-37.8351])
# for point in boundary:
# la.append(point[0])
# long.append(point[1])
# print [la,long]
# plt.plot(la,long)
# plt.plot([1,2,3],[2,3,4])
# plt.show()
\ No newline at end of file
import matplotlib.pyplot as plt
import json
grid_dictionary = []
grid_range = [140.96, -43.7]
for i in range(8):
f = open('dic' + str(i) + '.json', 'r')
grid_dictionary += json.load(f)['result']
def which_suburb(point):
# point contain two float number
# point[0] is longitude for AU the range is [112.9,154]
# point[1] is latitude for Au the range is [-43.7,-10]
x = int(round((point[0] - grid_range[0]) / 0.0025))
y = int(round((point[1] - grid_range[1]) / 0.0025))
return grid_dictionary[x][y]
from gensim import corpora
import json
documents = ["Human machine interface for lab abc computer applications",
"A survey of user opinion of computer system response time",
"The EPS user interface management system",
"System and human system engineering testing of EPS",
"The generation of random binary unordered trees",
"The intersection graph of paths in trees",
"Graph minors IV Widths of trees and well quasi ordering",
"Graph minors A survey"]
stoplist = set('for a of the and to in'.split())
texts = [[word for word in document.lower().split() if word not in stoplist]
for document in documents]
\ No newline at end of file
with open('mpi_dic7.json', 'r') as f:
dictionary = json.load(f)
rang = dictionary['range']
result = dictionary['result']
a = len(result)
print len(result)
if a == 516:
result = result[:-1]
f = open('nmpi_dic7.json', 'w')
json.dump({'result': result, 'range': rang},f)
from gensim.models import word2vec
import gensim
sentences = word2vec.Text8Corpus("./text8") # 加载语料
# model = word2vec.Word2Vec(sentences,size=100)
# model.save('model')
class TextSimilarity():
def __init__(self):
self.model = word2vec.Word2Vec.load('model')
def similarity_words(self, words):
return self.model.wv.most_similar(words)
def similarity_words(self, pos,nag):
return self.model.wv.most_similar(pos,nag)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment