Scraping(crawling) movie boxoffice data in korea

Enhold
Oct 22, 2020

Crawling(scarping) is code with python that korea Boxoffice ranking in 2005–2006.
I`m scraping img url and movie names from naver website.

Sale and Box-office get Data form http://www.kobis.or.kr/

#=================================

import requests

from bs4 import BeautifulSoup

import pprint as ppr

from datetime import date
import pandas
import pandas as pd
import csv
import time
import re
#=================================

'''

https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cnt&date=20181019

https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cnt&date=20181019&tg=18

https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cnt&tg=18&date=20181017

#old_content > table > tbody > tr:nth-child(1)

'''

dt_index = pandas.date_range(start='20050524', end='20061230',freq='W-MON')
# pandas.date_range(start='20160901', end='20161031',freq='W-MON')
# Collect Every Mondays in period start-end
# type(dt_index) => DatetimeIndex
# DatetimeIndex => list(str)
dt_list = dt_index.strftime("%Y%m%d").tolist()
url = "https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cnt&tg=0&date="

a=[]

for i in dt_list :

target_url = url + str(i)
print(target_url)

html = requests.get(target_url)

bs_obj = BeautifulSoup(html.text, "html.parser")

value = [x for x in bs_obj.select("tbody > tr")]

temp_info = dict() # type of dictionary

for v in value:

rank, title = v.find("td", {"class":"ac"}), v.find("td", {"class":"title"})

try :
if rank and title:


t = "{0:s}".format(title.select_one("div > a").attrs['title']) # Movie name
e = title.select_one("div > a").attrs['href']

each_raw = requests.get("https://movie.naver.com"+e,
headers = {"User-Agent" : "Mozilla/5.0"})

each_html = BeautifulSoup(each_raw.text, 'html.parser')

# poster : div.mv_info_area div.poster img
poster = each_html.select_one("div.mv_info_area div.poster img")
poster_src = poster.attrs["src"]

a.append([t,poster_src])


except AttributeError as e:
print(e)
pass

time.sleep(1)



data = pd.DataFrame(a)

--

--

Enhold
0 Followers

He likes mechanical engineering and IT. I'm more interested in the data & network field.