-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJSONparser01.py
More file actions
96 lines (65 loc) · 1.87 KB
/
JSONparser01.py
File metadata and controls
96 lines (65 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#JSONparser.py v0.2
import json
import requests
from scrapfunctions import *
jsonfile = 'atom2.json'
def parseJSON(f):
with open(f) as data_file:
return json.load(data_file)
if __name__ == '__main__':
# Parse JSON and assign variables
jsondata = parseJSON(jsonfile)
# parse the root link and jobs . This is considered as a unique case right now .
isSearch = int (jsondata['search'])
targetRoot = jsondata['targetRoot']
# if isSearch == 1 :
# searchLink = jsondata['searchQuery'] + jsondata['searchSeed']
# rootPage = requests.get(searchLink, headers = reqHeaders).text
#
# else :
# rootPage = requests.get(targetRoot, headers = reqHeaders).text
# Start the while loop from parsing jobs
while True:
jobs = jsondata['do']
if len(jobs) > 0 :
for job in jobs:
# Get the requirements and type of job
doRepeatedly = int(job['repeated'])
isThereASubtask = int(job['isThereASubtask'])
wannaSave = int(job['saveThis'])
name = job['name']
# TODO : say what is page
#
# Parsing the data, whatever it is .
#
tup = (page,)
for q in job['afterSequence'] :
tup = tup + (q,)
tup += ( job['bet'] , job['ween'], )
off = 0
ln = len(page)
dat = []
# Scrap !
while off < ln:
try :
val, off = getContent(tup, offset= off)
dat.append(val)
if doRepeatedly == 0:
break
except ValueError:
break
print (dat) # Log scraped things4
# If there is a subtask for the corresponding scrap,
# add each link and its job to queue.
#
if isThereASubtask == 1 :
# TODO: define a queue and level too
for link in dat :
queue.append( {level, link , job['do']})
#
# Save or not , depending on wannaSave
#
if wannaSave == 1:
# TODO : define a dictionary of all scrap data
for d in dat :
dictionary.add({ ,name , d})