-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathCEMSDataDownloadForValidation.py
More file actions
101 lines (92 loc) · 4.3 KB
/
CEMSDataDownloadForValidation.py
File metadata and controls
101 lines (92 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#Michael Craig
#November 7, 2016
#Script downloads hourly CEMS data for given years and states from FTP site:
#ftp://ftp.epa.gov/dmdnload/emissions/hourly/monthly/
from ftplib import FTP
import os, time, zipfile
######### SET PARAMETERS #######################################################
#Set key parameters
def setInputParameters():
downloadFiles = True #whether to download CEMS files
states = ['tx'] #states for CEMS data; 2 letter abbrevs, lowercase
years = [yr for yr in range(2015,2016)] #goes back to 2015
rootCEMSDir = 'C:\\Users\\mtcraig\\Desktop\\EPP Research\\Databases\\CEMSDataStorageValidation'
dirToDownloadTo = os.path.join(rootCEMSDir,'ZipFiles')
dirToExtractTo = os.path.join(rootCEMSDir,'CSVFiles')
return (states,years,dirToDownloadTo,dirToExtractTo,downloadFiles)
#Set generic non-user-defined inputs
def setOtherParameters():
ftpSite = 'ftp.epa.gov'
baseDir = '/dmdnload/emissions/hourly/monthly/'
return (ftpSite,baseDir)
################################################################################
######### MASTER FUNCTION #######################################################
def masterFunction():
(states,years,dirToDownloadTo,dirToExtractTo,downloadFiles) = setInputParameters()
(ftpSite,baseDir) = setOtherParameters()
if downloadFiles==True:
downloadCEMSData(states,years,dirToDownloadTo,ftpSite,baseDir)
extractCSVFiles(dirToExtractTo,dirToDownloadTo,years)
################################################################################
######### DOWNLOAD CEMS ZIP FILES #######################################################
#Download .zip files
def downloadCEMSData(states,years,dirToDownloadTo,ftpSite,baseDir):
epaFtp = connectToFTP(ftpSite,baseDir)
months = getMonthStrs()
parentDir = epaFtp.pwd()
for year in years:
epaFtp.cwd(str(year) + '/')
dirToSaveYr = os.path.join(dirToDownloadTo,str(year))
if not os.path.exists(dirToSaveYr):
os.makedirs(dirToSaveYr)
for state in states:
for month in months:
currFile = str(year) + state + month + '.zip'
ftpInput = 'RETR ' + currFile
filenameToSave = os.path.join(dirToSaveYr,currFile)
try:
with open(filenameToSave, 'wb') as fobj:
epaFtp.retrbinary(ftpInput, fobj.write)
print('Downloaded ' + currFile)
except:
print("Could not download:\t" + currFile)
time.sleep(2) #pause 2 seconds
print('Finished state ' + state)
print('Finished year ' + str(year))
epaFtp.cwd(parentDir)
print('Finished downloads')
epaFtp.quit()
#Connect to FTP
def connectToFTP(ftpSite,baseDir):
epaFtp = FTP(ftpSite)
epaFtp.login()
epaFtp.cwd(baseDir)
return epaFtp
#Create list of month suffixes on CEMS data files w/ leading zeros
def getMonthStrs():
monthStrs = []
for num in range(1,13):
if num<10: monthStrs.append('0' + str(num))
else: monthStrs.append(str(num))
return monthStrs
################################################################################
######### EXTRACT .ZIP DATA INTO .CSV FILES ####################################
#Extract CSV files from zip files
def extractCSVFiles(dirToExtractTo,dirToDownloadTo,years):
for year in years:
dirToExtractToYr = os.path.join(dirToExtractTo,str(year))
dirWithZipFiles = os.path.join(dirToDownloadTo,str(year))
if not os.path.exists(dirToExtractToYr):
os.makedirs(dirToExtractToYr)
cemsZipFiles = os.listdir(dirWithZipFiles)
for cemsZipFile in cemsZipFiles:
fileExtension = os.path.splitext(cemsZipFile)[1]
if fileExtension == '.zip': #make sure .zip file, otherwise skip
try:
cemsZipFileObj = zipfile.ZipFile(os.path.join(dirWithZipFiles,cemsZipFile))
cemsZipFileObj.extractall(dirToExtractToYr)
except:
print('Bad zip file:\t' + cemsZipFile)
print('Extracted ' + str(year))
################################################################################
masterFunction()