Pagination Not Working
assignee_type = [2,3]
fields = [
"patent_id",
"patent_type",
"application.filing_date",
"assignees.assignee_organization",
"assignees.assignee_type",
"assignees.assignee_country",
"assignees.assignee_sequence",
"assignees.assignee_id",
"ipcr.ipc_sequence",
"ipcr.ipc_section",
"patent_num_us_patents_cited",
"inventors.inventor_country",
"inventors.inventor_id",
]
query = {
"_and": [
{"patent_type":"utility"},
{"assignees.assignee_type": assignee_type},
{"_gte": {"application.filing_date": "1989-01-01"}},
{"_lte": {"application.filing_date": "2023-12-31"}}
]
}
field_list = json.dumps(fields)
sort_param = json.dumps([{"patent_id": "asc"}])
#Initial URL
url = f"https://search.patentsview.org/api/v1/patent/?q={json.dumps(query)}&f={field_list}&s={sort_param}&o={json.dumps({"size": 1000})}"
REQUEST_LIMIT = 45
REQUEST_INTERVAL = 60
requests_made = 0
last_request_time = 0
def fetch_patent_data(url, api_key):
global requests_made, last_request_time
current_time = time.time()
time_since_last_request = current_time - last_request_time
if requests_made >= REQUEST_LIMIT and time_since_last_request < REQUEST_INTERVAL:
sleep_time = REQUEST_INTERVAL - time_since_last_request
print(f"Rate limit reached. Sleeping for {sleep_time:.2f} seconds...")
time.sleep(sleep_time)
requests_made = 0 # Reset counter after sleep
headers = {"X-Api-Key": api_key}
response = requests.get(url, headers=headers)
requests_made += 1
last_request_time = time.time()
if response.status_code == 200:
data = response.json()
return data["patents"]
else:
# Error handling (same as before)
status_reason = response.headers.get("X-Status-Reason")
status_reason_code = response.headers.get("X-Status-Reason-Code")
print(f"Error fetching data:")
print(f" Status Code: {response.status_code}")
print(f" X-Status-Reason: {status_reason}")
print(f" X-Status-Reason-Code: {status_reason_code}")
print(f" Response Text: {response.text}")
return []
all_patent_data = []
iter = 0
while True:
patent_data = fetch_patent_data(url, API_KEY)
if not patent_data:
break
all_patent_data.extend(patent_data)
iter += 1
print(iter)
if len(patent_data) < 1000: #Check if less than 1000 results were returned which indicates end of pagination
print(len(all_patent_data))
break
# Prepare the URL for the next page using the last patent_id
last_patent_id = patent_data[-1]["patent_id"]
print(last_patent_id)
url = f"https://search.patentsview.org/api/v1/patent/?q={json.dumps(query)}&f={field_list}&s={sort_param}&o={json.dumps({"after": last_patent_id, "size": 1000})}"
print("Patent data downloaded. Convert to csv file next")