From f91cf3faa0e7beb0bfbd4a8756e2a9e6a598b46c Mon Sep 17 00:00:00 2001
From: Brandon Rose <rose.brandon.m@gmail.com>
Date: Mon, 27 Apr 2020 16:53:30 -0500
Subject: [PATCH] Added proxy support for Python3

---
 README.md                |  9 +++++++++
 googlesearch/__init__.py | 21 +++++++++++++++------
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index d3b8b30874..cc3711f485 100644
--- a/README.md
+++ b/README.md
@@ -17,3 +17,12 @@ Installing
 ----------
 
     pip install google
+
+
+Proxy support (Python 3+ only)
+------------------------------
+
+    # Get the first 20 hits for: "Breaking Code" WordPress blog
+    from googlesearch import search
+    for url in search('"Breaking Code" WordPress blog', stop=20, proxy=http:proxy.ip.address:proxy_port):
+        print(url)
\ No newline at end of file
diff --git a/googlesearch/__init__.py b/googlesearch/__init__.py
index 0f21946ac8..e2a16a9260 100644
--- a/googlesearch/__init__.py
+++ b/googlesearch/__init__.py
@@ -35,7 +35,7 @@
 
 if sys.version_info[0] > 2:
     from http.cookiejar import LWPCookieJar
-    from urllib.request import Request, urlopen
+    from urllib.request import Request, ProxyHandler, urlopen, build_opener
     from urllib.parse import quote_plus, urlparse, parse_qs
 else:
     from cookielib import LWPCookieJar
@@ -153,13 +153,15 @@ def get_tbs(from_date, to_date):
 
 # Request the given URL and return the response page, using the cookie jar.
 # If the cookie jar is inaccessible, the errors are ignored.
-def get_page(url, user_agent=None):
+def get_page(url, user_agent=None, proxy=None):
     """
     Request the given URL and return the response page, using the cookie jar.
 
     :param str url: URL to retrieve.
     :param str user_agent: User agent for the HTTP requests.
         Use None for the default.
+    :param str proxy: A URL for a proxy you wish to use (e.g. http:localhost:8118)
+        Use None for no proxy.    
 
     :rtype: str
     :return: Web page retrieved for the given URL.
@@ -173,7 +175,12 @@ def get_page(url, user_agent=None):
     request = Request(url)
     request.add_header('User-Agent', user_agent)
     cookie_jar.add_cookie_header(request)
-    response = urlopen(request)
+    if proxy:
+        proxy_handler = ProxyHandler({'https': proxy})
+        opener = build_opener(proxy_handler)
+        response = opener.open(request)
+    else:
+        response = urlopen(request)
     cookie_jar.extract_cookies(response, request)
     html = response.read()
     response.close()
@@ -209,7 +216,7 @@ def filter_result(link):
 # Returns a generator that yields URLs.
 def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
            stop=None, domains=None, pause=2.0, tpe='', country='',
-           extra_params=None, user_agent=None):
+           extra_params=None, user_agent=None, proxy=None):
     """
     Search the given query string using Google.
 
@@ -240,6 +247,8 @@ def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
         {'filter': '0'} which will append '&filter=0' to every query.
     :param str user_agent: User agent for the HTTP requests.
         Use None for the default.
+    :param str proxy: A URL for a proxy you wish to use (e.g. http:localhost:8118)
+        Use None for no proxy.
 
     :rtype: generator of str
     :return: Generator (iterator) that yields found URLs.
@@ -276,7 +285,7 @@ def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
             )
 
     # Grab the cookie from the home page.
-    get_page(url_home % vars(), user_agent)
+    get_page(url_home % vars(), user_agent, proxy)
 
     # Prepare the URL of the first request.
     if start:
@@ -309,7 +318,7 @@ def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
         time.sleep(pause)
 
         # Request the Google Search results page.
-        html = get_page(url, user_agent)
+        html = get_page(url, user_agent, proxy)
 
         # Parse the response and get every anchored URL.
         if is_bs4: