Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
/data/geojson
/data/tiles
/data/labdata.key
/data/macademia_people
map.xml

*.geojson
*.geoJSON

.DS_Store
/data/.DS_Store
/install/.DS_Store
Expand Down
1 change: 1 addition & 0 deletions cartograph/Config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

from ConfigParser import SafeConfigParser

EXTERNAL_FILES = 'ExternalFiles'
Expand Down
62 changes: 62 additions & 0 deletions knn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import luigi
import os
import cartograph
import numpy as np
from sklearn.neighbors import KDTree
from sklearn.metrics.pairwise import cosine_similarity
from pprint import pprint

from cartograph import Config
from cartograph import Util
config = Config.BAD_GET_CONFIG()



peopleDict = Util.read_features(config.FILE_NAME_NUMBERED_VECS, #config people
config.FILE_NAME_NUMBERED_NAMES,
config.FILE_NAME_ARTICLE_COORDINATES)
peopleKeys = list(peopleDict.keys())
peopleVectors = np.array([peopleDict[vID]["vector"] for vID in peopleKeys])
peopleNames = [peopleDict[nID]["name"] for nID in peopleKeys]
x = [float(peopleDict[fID]["x"]) for fID in peopleKeys]
y = [float(peopleDict[fID]["y"]) for fID in peopleKeys]



interestDict = Util.read_features(config.FILE_NAME_MORE_VECS,
config.FILE_NAME_MORE_NAMES)
interestKeys = list(interestDict.keys())
interestVectors = np.array([interestDict[vID]["vector"] for vID in interestKeys])
interestNames = [interestDict[nID]["name"] for nID in interestKeys]


kdt = KDTree(peopleVectors, leaf_size=30)


x_lst = []
y_lst = []
#knn_dict = {}
for i in range(len(interestVectors)):
dist, ind = kdt.query([interestVectors[i]], k=5)
temp_x_lst=[]
temp_y_lst=[]
temp_name_lst = []
weights = []
for j in ind[0]:
temp_x_lst.append(x[j])
temp_y_lst.append(y[j])
temp_name_lst.append(peopleNames[j])
weights.append(float(cosine_similarity([interestVectors[i]], [peopleVectors[j]]))) #cosine similarity >0
#print weights
#weights = [1,1,1,1,1]
x_lst.append(np.average(temp_x_lst, axis = 0, weights = weights))
y_lst.append(np.average(temp_y_lst, axis = 0, weights = weights))
#knn_dict[interestNames[i]] = temp_name_lst


Util.write_tsv(config.FILE_NAME_MORE_COORDINATES,
("index", "x", "y"), interestKeys, x_lst, y_lst)


#pprint(knn_dict)

5 changes: 2 additions & 3 deletions workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def run(self):
name = featureDict[featureID]["name"]
popularityList.append(nameDict[name])

Util.write_tsv(config.FILE_NAME_NUMBERED_POPULARITY,
Util.write_tsv(config.get("PreprocessingFiles", "popularity_with_id"),
("id", "popularity"),
idList, popularityList)

Expand Down Expand Up @@ -287,7 +287,7 @@ def run(self):
X = [float(points[k]['x']) for k in keys]
Y = [float(points[k]['y']) for k in keys]
maxVal = max(abs(v) for v in X + Y)
scaling = config.MAX_COORDINATE / maxVal
scaling = config.get("MapConstants", "max_coordinate") / maxVal
X = [x * scaling for x in X]
Y = [y * scaling for y in Y]
Util.write_tsv(config.get("PreprocessingFiles",
Expand Down Expand Up @@ -434,7 +434,6 @@ def run(self):
range(1, len(regionList) + 1),
regionList)


class CreateContours(MTimeMixin, luigi.Task):
'''
Make contours based on density of points inside the map
Expand Down