디비 연결 후 삽입까지
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import time
import sys
import pandas as pd
#오라클 디비 연결
import oracledb
con = oracledb.connect(user="TEST",password="test",dsn="localhost:1521/orcl")
cursor = con.cursor()
print("!! connection complete !!")
url = 'https://www.data.go.kr/tcs/dss/selectDataSetList.do?dType=FILE&keyword=&operator=AND&detailKeyword=&publicDataPk=&recmSe=&detailText=&relatedKeyword=&commaNotInData=&commaAndData=&commaOrData=&must_not=&tabId=&dataSetCoreTf=&coreDataNm=&sort=&relRadio=&orgFullName=%EB%86%8D%EB%A6%BC%EC%B6%95%EC%82%B0%EC%8B%9D%ED%92%88%EB%B6%80&orgFilter=%EB%86%8D%EB%A6%BC%EC%B6%95%EC%82%B0%EC%8B%9D%ED%92%88%EB%B6%80&org=%EB%86%8D%EB%A6%BC%EC%B6%95%EC%82%B0%EC%8B%9D%ED%92%88%EB%B6%80&orgSearch=¤tPage=1&perPage=400&brm=&instt=&svcType=&kwrdArray=&extsn=&coreDataNmArray=&pblonsipScopeCode=#'
#드라이버 연결
#자동꺼짐방지
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(options=chrome_options)
# 웹사이트 이동
driver.get(url)
driver.implicitly_wait(2)
titles=[]
#href 추출
links = driver.find_elements(By.CSS_SELECTOR,"#fileDataList > div.result-list > ul > li > dl > dt > a")
keys=[]
values=[]
titles=[]
hrefs=[]
for j in links[39:]:
href = j.get_attribute("href")
driver.get(href)
print(href)
driver.implicitly_wait(6)
table = driver.find_element(By.CLASS_NAME,"file-meta-table-pc")
#태그이름과 Xpath로 찾기
title = driver.find_element(By.XPATH,'//*[@id="contents"]/div[2]/div[1]/div[1]/p')
row = table.find_elements(By.TAG_NAME,"th")
data = table.find_elements(By.TAG_NAME,"td")
print('Rows --> {}'.format(len(row)))
print('Data --> {}'.format(len(data)))
# print('title -->'+ title.text)
print('title -->'+ title.text)
driver.implicitly_wait(6)
for key,value in zip(row,data):
# 출력확인
# print(href,'+', key.text,'/', value.text)
keys.append(key.text)
#list 추가
values.append(value.text)
titles.append(title.text)
hrefs.append(href)
driver.implicitly_wait(6)
#DB에 값 삽입
sql_insert = 'insert into lim (title,href,keys,value) values(:title,:href,:keys,:value)'
cursor.execute(sql_insert, title=title.text.encode('utf8').decode('utf8'), href=href.encode('utf8').decode('utf8'), keys=key.text.encode('utf8').decode('utf8'), value=value.text.encode('utf8').decode('utf8'))
con.commit()
driver.back()
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import time
import sys
import pandas as pd
#오라클 디비 연결
import oracledb
con = oracledb.connect(user="TEST",password="test",dsn="localhost:1521/orcl")
cursor = con.cursor()
print("!! connection complete !!")
#드라이버 연결
#자동꺼짐방지
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(options=chrome_options)
#접속
driver.get(url)
driver.implicitly_wait(2)
table = driver.find_element(By.CLASS_NAME,"file-meta-table-pc")
#태그이름으로 찾기
title = driver.find_element(By.XPATH,'//*[@id="contents"]/div[2]/div[1]/div[1]/p')
row = table.find_elements(By.TAG_NAME,"th")
data = table.find_elements(By.TAG_NAME,"td")
# title = driver.find_elements(By.CLASS_NAME, "tit")
print('Rows --> {}'.format(len(row)))
print('Data --> {}'.format(len(data)))
# print('title -->'+ title )
for key,value in zip(row,data):
# print(url,'+', key.text,'/', value.text)
driver.implicitly_wait(2)
driver.implicitly_wait(7)
sql_insert = 'insert into lim (title,href,keys,value) values(:title,:href,:keys,:value)'
cursor.execute(sql_insert, title=title.text.encode('utf8').decode('utf8'), href=url.encode('utf8').decode('utf8'), keys=key.text.encode('utf8').decode('utf8'), value=value.text.encode('utf8').decode('utf8'))
con.commit()
'리눅스' 카테고리의 다른 글
리눅스 디렉토리 (0) | 2024.05.30 |
---|---|
리눅스 디렉토리 확인 (0) | 2024.05.29 |
DB 정보 조회 및 로그 확인 리눅스 명령어 (0) | 2024.05.20 |
오라클 리눅스 환경설정 및 설치 (0) | 2024.05.02 |
[리눅스] Docker 사용하여 리눅스 환경으로 오라클 설치 (0) | 2024.04.18 |