!pip install simplejson
Requirement already satisfied: simplejson in c:\users\zarathu09\anaconda3\envs\zarathu\lib\site-packages (3.18.3)
Requirement already satisfied: simplejson in c:\users\zarathu09\anaconda3\envs\zarathu\lib\site-packages (3.18.3)
일 허용 한도 25000건
일일 검색어 제한 10,000개
추가하실 도메인을 넣어주세요
def Google_API(query, wanted_row):
query= query.replace("|","OR")
query += "-filetype:pdf"
start_pages=[]
df_google= pd.DataFrame(columns=['Title','Link','Description'])
row_count =0
for i in range(1,wanted_row+1000,10):
start_pages.append(i)
for start_page in start_pages:
url = f"https://www.googleapis.com/customsearch/v1?key={Google_API_KEY}&cx={Google_SEARCH_ENGINE_ID}&q={query}&start={start_page}"
data = requests.get(url).json()
search_items = data.get("items")
try:
for i, search_item in enumerate(search_items, start=1):
# extract the page url
link = search_item.get("link")
if any(trash in link for trash in Trash_Link):
pass
else:
# get the page title
title = search_item.get("title")
# page snippet
descripiton = search_item.get("snippet")
# print the results
df_google.loc[start_page + i] = [title,link,descripiton]
row_count+=1
if (row_count >= wanted_row) or (row_count == 300) :
return df_google
except:
return df_google
return df_google
def Naver_API(query,wanted_row):
query = urllib.parse.quote(query)
display=100
start=1
end=wanted_row+10000
idx=0
sort='sim'
df= pd.DataFrame(columns=['Title','Link','Description'])
row_count= 0
for start_index in range(start,end,display):
url = "https://openapi.naver.com/v1/search/webkr?query="+ query +\
"&display=" + str(display)+ \
"&start=" + str(start_index) + \
"&sort=" + sort
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",Naver_client_id)
request.add_header("X-Naver-Client-Secret",Naver_client_secret)
try:
response = urllib.request.urlopen(request)
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
items= json.loads(response_body.decode('utf-8'))['items']
remove_tag = re.compile('<.*?>')
for item_index in range(0,len(items)):
link = items[item_index]['link']
if any(trash in link for trash in Trash_Link):
idx+=1
pass
else:
title = re.sub(remove_tag, '', items[item_index]['title'])
description = re.sub(remove_tag, '', items[item_index]['description'])
df.loc[idx] =[title,link,description]
idx+=1
row_count+=1
if (row_count >= wanted_row) or (row_count == 300):
return df
except:
return df
def Daum_API(query,wanted_row):
pages= wanted_row//10
method = "GET"
url = "https://dapi.kakao.com/v2/search/web"
header = {'authorization': f'KakaoAK {Kakao_API_key}'}
df= pd.DataFrame(columns=['Title','Link','Description'])
row_count=0
for page in range(1,pages+10):
params = {'query' : query, 'page' : page}
request = requests.get( url, params= params, headers=header )
for i, item in enumerate(request.json()["documents"], start=1):
link = item['url']
try:
written_year=int(item['datetime'][:4])
except:
written_year = 2023
if (any(trash in link for trash in Trash_Link) or (written_year <2020)):
pass
else:
title= item["title"]
description = item["contents"]
df.loc[10*page+i] =[title,link,description]
row_count+=1
if (row_count >= wanted_row) or (row_count == 300):
remove_tag = re.compile('<.*?>')
df['Title'] =df['Title'].apply(lambda x :re.sub(remove_tag, '',x))
df['Description'] =df['Description'].apply(lambda x :re.sub(remove_tag, '',x))
return df
remove_tag = re.compile('<.*?>')
df['Title'] =df['Title'].apply(lambda x :re.sub(remove_tag, '',x))
df['Description'] =df['Description'].apply(lambda x :re.sub(remove_tag, '',x))
return df
def final(query,wanted_row=100):
df_google = Google_API(query,wanted_row)
df_google['search_engine']='Google'
df_naver = Naver_API(query,wanted_row)
df_naver['search_engine']='Naver'
df_daum = Daum_API(query,wanted_row)
df_daum['search_engine']='Daum'
df_final= pd.concat([df_google,df_naver,df_daum])
df_final['search_date'] = today
df_final.reset_index(inplace=True,drop=True)
return df_final
Title | Link | Description | search_engine | search_date | |
---|---|---|---|---|---|
0 | 외상에 의한 뇌 손상 | 질환백과 | 의료정보 | 건강정보 | 서울아산병원 | https://www.amc.seoul.kr/asan/mobile/healthinf... | 최근에는 광범위한 뇌진탕의 정의에 의식 소실이 없는 경우도 포함합니다. ... 교통... | 20230210 | |
1 | 외상성 뇌 손상 | Disability Rights California | https://www.disabilityrightsca.org/ko/publicat... | 팻비 프로그램 이 프로그램은 정보, 의뢰, 봉사 활동, 훈련, 자기 옹호 지원, 개... | 20230210 | |
2 | 외상성 뇌 손상: Let's Help You - Washington 211 | https://wa211.org/ko/%EC%99%B8%EC%83%81%EC%84%... | Washington 211은 워싱턴주 사회보건서비스부와 협력하여 외상성 뇌손상(TB... | 20230210 | |
3 | Traumatic Brain Injury / Concussion | Concussi... | https://www.cdc.gov/traumaticbraininjury/index... | A traumatic brain injury, or TBI, is an injury... | 20230210 | |
4 | 외상성 뇌손상: 위원회 결정의 근거 | https://www.fcps.edu/sites/default/files/media... | 학생 이름. 학생 번호. 회의 날짜. 정의: 외상성 뇌손상이란 외부의 물리적 힘에 ... | 20230210 | |
... | ... | ... | ... | ... | ... |
239 | 대한진단검사의학회 | https://www.kslm.org/sub07/studying_total.html... | 각각의 특징을 열거한다. 2621 (4) 기타 손상 뇌좌상을 정의하고, 각각의 특징... | Daum | 20230210 |
240 | 두통, 오심을 주소로 하는 뇌진탕후증후군 (postconcussion syndrom... | https://www.jikm.or.kr/journal/scholar_xmls.ph... | 부종을 일으킬 수 있다13. 뇌진탕(concussion) 혹은 경미한 두부 외상(M... | Daum | 20230210 |
241 | 신경외과 | http://gbh.or.kr/hosp/hosp03_03.html | 뇌졸중, 뇌출혈, 뇌경색 등 뇌혈관 질환 뇌, 척추의 외상성 손상 간질, 뇌종양 척... | Daum | 20230210 |
242 | 안내 | https://kangnam.hallym.or.kr/hallymuniv_sub.as... | 이름, 전문진료분야 이름 전문진료분야 이호국 교수상세보기 진료예약 진료예약 뇌졸중(... | Daum | 20230210 |
243 | 의료진/진료시간표 | https://yumc.ac.kr:8443/medical/timetable.do;j... | 및 외상성 척추질환, 척수신경종양, 척추감염, 척추관련통증 교수 오전 인터넷예약 간... | Daum | 20230210 |
244 rows × 5 columns
@online{untitled,
author = {},
url = {https://blog.zarathu.com/posts/2022-02-07-tableone/Custom_Search_Zarathu.html},
langid = {en}
}