-
Notifications
You must be signed in to change notification settings - Fork 0
/
naver.py
96 lines (75 loc) · 2.98 KB
/
naver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: UTF-8 -*-
import sys
# import requests
# import json
# import time
# from bs4 import BeautifulSoup
from selenium import webdriver
import logging
import time
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from selenium.webdriver.common.by import By
reload(sys)
sys.setdefaultencoding('utf-8')
driver = webdriver.PhantomJS()
def get_chat_list(ctr):
# logger = logging.getLogger('logger')
f = open('./output/' + str(time.localtime().tm_mday) + '.' + str(time.localtime().tm_hour) + '.' + str(time.localtime().tm_min) + '.' + str(time.localtime().tm_sec) + '.txt', 'w')
# file_handler = logging.FileHandler('./result' + str(time.localtime().tm_mday) + '.' + str(time.localtime().tm_hour) + '.' + str(time.localtime().tm_min) + '.' + str(time.localtime().tm_sec) + '.log')
# stream_handler = logging.StreamHandler()
# logger.addHandler(file_handler)
# logger.addHandler(stream_handler)
# logger.setLevel(logging.INFO)
url = 'http://sports.news.naver.com/tv/index.nhn?category=etc&gameId=20160310BADUK'
driver.get(url)
d = driver
comments = []
try:
now = str(time.time())
WebDriverWait(driver, 3).until(
EC.visibility_of_element_located((By.CLASS_NAME, 'cbox_thumb_on'))
)
comment_list = driver.find_elements_by_class_name('cbox_thumb_on')
print(str(ctr) + 'got comment list')
for comment in comment_list:
try:
WebDriverWait(driver, 3).until(
EC.visibility_of_element_located((By.CLASS_NAME, 'yt-user-name'))
)
name = comment.find_element_by_class_name('yt-user-name').text.decode('utf-8').encode('utf-8')
WebDriverWait(driver, 3).until(
EC.visibility_of_element_located((By.CLASS_NAME, 'comment-text'))
)
text = comment.find_element_by_class_name('comment-text').text.decode('utf-8').encode('utf-8')
dic = {'name': name, 'text': text, 'now': now}
comments.append(dic)
except Exception, e:
print(str(ctr) + 'a comment have error', e)
# logger.info('a comment have error', e)
print(str(ctr) + 'comments are appended to list')
except Exception, e:
print(str(ctr) + 'failed to get comment list', e)
# logger.info('failed to get comment list', e)
for chat in comments:
f.write(chat['name'])
f.write('\t//seperator//\t')
f.write(chat['text'])
f.write('\t//seperator// \t')
f.write(chat['now'])
f.write('\n')
print(str(ctr) + 'wrote to file')
f.close()
return comments
def crawl(ctr):
get_chat_list(ctr)
def main():
ctr = 0
while True:
crawl(ctr)
print('success ' + str(ctr))
ctr += 1
# driver.quit()
main()
driver.quit()
test = 'test'