-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJSONparser.py
More file actions
93 lines (61 loc) · 1.62 KB
/
JSONparser.py
File metadata and controls
93 lines (61 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#JSONparser.py
import json
import requests
from scrapfunctions import *
jsonfile = 'atom.json'
def parseJSON(f):
with open(f) as data_file:
return json.load(data_file)
if __name__ == '__main__':
# Parse JSON and assign variables
jsondata = parseJSON(jsonfile)
startlink = jsondata['target']
searchLink = jsondata['searchQuery']
searchSeed = jsondata['searchSeed']
tasksnum = (len(jsondata['do']))
'''
do each task in 'do'
'''
scrapedData = {}
for currentTask in range(tasksnum):
jsonDo = jsondata['do'][currentTask]
link = searchLink + searchSeed
linkfeed = [link]
while True:
taskDoRepeated = (jsonDo['repeated'])
taskName = jsonDo['name']
taskIsThereASubTask = (jsonDo['isThereASubtask'])
for sublink in linkfeed :
# if the link is not complete
if sublink[0:4]!= 'http':
sublink = startlink + sublink
# now, get the page
page = requests.get( sublink , headers = reqHeaders).text
# Prepare tuple input for getContent function
tup = (page,)
for q in jsonDo['afterSequence'] :
tup = tup + (q,)
tup += ( jsonDo['bet'] , jsonDo['ween'], )
off = 0
ln = len(page)
dat = []
# Scrap !
while off < ln:
try :
val, off = getContent(tup, offset= off)
dat.append(val)
if taskDoRepeated == "False":
break
except ValueError:
break
scrapedData.update({sublink : dat})
print scrapedData
if taskIsThereASubTask == "True":
jsonDo = jsonDo['thenDo']
linkfeed = dat
scrapedData = {}
continue
else:
break
# if taskDoRepeated:
# length = len(page)