-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdirect_insert
210 lines (192 loc) · 18.2 KB
/
direct_insert
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import pymysql
import requests
from bs4 import BeautifulSoup
conn = pymysql.connect(host="localhost", user="root", password="1019", db="final", charset="utf8")
try:
curs = conn.cursor()
sql = """insert into director(director_code, code_movie, name_korean, name_English, work_name, enter_date)
values (%s, %s , %s, %s, %s, now())"""
movie_codes = []
insert_data = []
code_directors = []
director_code = [180101, 435871, 2049, 130535, 168441, 2610, 274061, 429911, 408379, 349923, 137200, 59767, 442791,
315161, 631, 422873, 2167, 121624, 621, 87207, 3609, 1495, 244, 11816, 1345, 2134, 1347, 1347,
1309, 621, 325345, 442432, 1348, 9479, 184363, 73, 5903, 543, 26503, 1347, 63843, 273854, 1495,
1878, 1916, 1244, 39867, 28616, 45896, 36051, 6405, 1020, 3962, 244, 766, 392014, 392015, 519,
31161, 171771, 144678, 1253, 1244, 121151, 168380, 78621, 51976, 68264, 543, 244, 61820, 244, 621,
1063, 425918, 228356, 261, 52070, 1113, 7928, 7929, 6405, 2268, 37053, 432512, 432513, 390967,
55772, 1457, 6680, 131267, 131268, 1365, 543, 68329, 67923, 45076, 37067, 35407, 173801, 144678,
2462, 513, 762, 1495, 226, 5328, 171771, 867, 107831, 2000, 2224, 1495, 60101, 423298, 423302,
366318, 1822, 2010, 769, 45076, 35407, 115612, 543, 244, 1244, 11528, 420143, 52584, 26503, 35407,
3609, 300152, 246457, 316527, 426810, 242, 871, 20538, 18338, 1874, 1317, 420143, 11907, 37324,
27537, 11907, 35080, 6395, 71915, 299156, 6705, 281724, 281725, 752, 2130, 863, 1958, 61886, 3816,
48514, 2268, 165004, 2928, 173512, 55323, 440907, 49069, 27537, 4967, 86171, 305965, 363031,
166599, 450, 543, 167, 72202, 222375, 1055, 2469, 1428, 174408, 621, 6214, 7798, 73583, 456064,
1543, 86171, 762, 1220, 382855, 401, 408280, 1366, 37465, 2462, 420143, 2000, 407439, 1588, 1366,
55772, 290, 76809, 1958, 1092, 3548, 30532, 3025, 339778, 6029, 1207, 261, 863, 5735, 114809,
355328, 329476, 50806, 141401, 1574, 86008, 63676, 2795, 1495, 752, 8284, 1055, 1488, 148748, 6212,
246952, 53288, 5516, 2469, 134, 441, 226, 1227, 61885, 40145, 375578, 375580, 375581, 1860, 45076,
2057, 140359, 115881, 296535, 1495, 433880, 358871, 335087, 5266, 1347, 2167, 11816, 2369, 320599,
108832, 170, 7804, 96347, 11693, 37534, 58385, 436315, 439, 1347, 2051, 241312, 367555, 173847,
3562, 4892, 769, 13548, 3962, 5191, 79002, 1360, 6405, 42889, 121217, 46214, 12823, 163944, 439,
135792, 62056, 337003, 153512, 62455, 2939, 334111, 5266, 424, 167451, 399151, 1113, 61820, 1866,
1860, 149138, 317, 1925, 1366, 290, 281259, 7928, 7929, 379605, 379606, 379607, 1181, 33765,
122278, 1317, 533, 1037, 129673, 4474, 102925, 199, 4922, 348111, 439, 1181, 320083, 241312,
255339, 2146, 502, 68, 1037, 1574, 63332, 355328, 424356, 73647, 1366, 1114, 43013, 290, 388,
36616, 36617, 4827, 20421, 2610, 4331, 6405, 410516, 1999, 108847, 376825, 88847, 4281, 147209,
417028, 1495, 340826, 447, 7923, 326755, 1450, 2071, 7928, 7929, 8000, 242, 206044, 1084, 1588,
189198, 8220, 62056, 9813, 1145, 11528, 1934, 45386, 1441, 1964, 1347, 135792, 162949, 4132, 1309,
159895, 1309, 1192, 11960, 159786, 51993, 1317, 9922, 261, 307056, 5208, 48, 357044, 543, 5447,
12180, 1884, 157085, 6476, 290, 120, 1569, 1292, 1456, 346230, 176758, 45386, 1830, 85949, 8973,
1166, 79025, 13, 395441, 2688, 4892, 1419, 2089, 3168, 71351, 675, 1317, 11863, 1564, 1860, 1533,
383435, 6439, 7272, 593, 35407, 5219, 290, 121151, 621, 1958, 1494, 352628, 631, 3564, 129611,
136139, 3662, 57713, 252096, 631, 1543, 4867, 285747, 339678, 2610, 7791, 9825, 1181, 115912, 450,
1299, 5331, 61820, 139821, 769, 8283, 966, 136335, 138925, 9648, 792, 1063, 1495, 1869, 924, 1889,
358842, 363418, 1564, 446762, 446763, 68, 274061, 222375, 83993, 51307, 133, 2060, 6405, 9057,
1522, 141394, 83991, 129673, 12664, 144678, 158007, 7798, 4145, 129980, 203937, 290, 6902, 441154,
6485, 177939, 13563, 301387, 102925, 137197, 2928, 148075, 419870, 132864, 54784, 37086, 423358,
6827, 47, 12664, 72818, 285737, 1780, 38137, 1495, 675, 121890, 167128, 261, 1366, 80677, 131531,
343463, 277983, 1750, 11907, 401, 2462, 143134, 52591, 320083, 100565, 1428, 4210, 4867, 63401,
410, 47099, 303944, 374037, 157085, 163659, 163660, 303685, 795, 1279, 194376, 1925, 116871, 4724,
365531, 1309, 335077, 581, 61885, 387744, 136481, 1114, 48514, 543, 4090, 131592, 2565, 11637,
72202, 318605, 90133, 841, 141436, 4175, 486, 42861, 1401, 316, 48166, 217559, 9837, 153599,
164940, 8283, 1908, 1305, 73262, 12664, 871, 122, 1540, 285747, 170, 3939, 762, 285288, 133597,
177943, 1495, 63843, 2692, 5707, 63401, 127382, 1792, 39813, 453058, 452949, 452950, 118470, 2268,
2843, 92162, 6118, 42889, 5656, 4993, 1860, 329112, 165000, 1958, 487, 5191, 7605, 26505, 762,
1244, 5328, 107891, 51307, 173, 261, 99420, 1958, 1336, 1362, 104193, 104194, 96510, 9922, 1860,
105762, 67435, 424, 315203, 6984, 1084, 312115, 72343, 374438, 1220, 12180, 1884, 346590, 5137,
1495, 519, 1404, 290, 1925, 72168, 134052, 91115, 2726, 102256, 86542, 1441, 46262, 1514, 581,
72736, 44614, 40634, 134443, 1650, 613, 7356, 416031, 6325, 51589, 1067, 102768, 106193, 40228,
371, 2653, 56086, 2765, 9599, 1543, 414138, 7834, 299007, 163281, 36109, 170, 320083, 791, 3175,
306840, 4867, 339869, 133102, 12667, 9577, 884, 1495, 29766, 56738, 62886, 675, 160257, 6942, 6943,
283214, 379827, 1514, 130549, 6214, 2695, 1524, 6439, 102277, 175503, 1980, 5331, 50395, 72, 1401,
425866, 1507, 1931, 2369, 301779, 246952, 367461, 18822, 1024, 1347, 2838, 61854, 143393, 136118,
244, 1309, 1488, 167475, 86137, 305845, 37320, 300292, 9062, 5989, 371, 6522, 3460, 52552, 172419,
2039, 91587, 20916, 2369, 56086, 2047, 1574, 44379, 2355, 2134, 199, 11878, 1317, 67923, 46214,
621, 9359, 1904, 7834, 299007, 1890, 2382, 621, 148075, 22287, 367508, 340460, 213995, 1573, 68580,
5191, 144528, 10763, 1574, 199, 164380, 427828, 6212, 10397, 102499, 369667, 37251, 189198, 1063,
315, 8180, 53411, 114565, 7289, 1881, 4277, 312552, 1450, 281259, 63332, 374535, 1913, 13125,
148075, 176824, 72736, 2939, 1934, 1993, 1953, 1781, 145722, 290, 273910, 74533, 37053, 1220, 675,
1925, 428550, 1220, 425122, 631, 454797, 73, 5903, 1754, 1534, 8095, 110255, 337731, 72343, 243029,
153599, 37251, 2179, 2461, 134463, 330309, 315, 155537, 445448, 445449, 1554, 4098, 305, 147919,
1063, 6472, 3939, 1404, 108426, 487, 3238, 953, 6421, 12667, 1866, 1102, 37393, 284769, 175108,
424, 683, 1574, 5805, 439, 91115, 1564, 8222, 255339, 91479, 319, 318692, 36042, 76754, 1543, 2179,
363882, 150546, 6476, 2656, 3223, 5805, 162365, 1711, 405682, 170103, 2726, 11476, 192802, 294687,
72013, 1543, 4922, 317605, 92162, 72736, 391013, 110342, 43, 42889, 63401, 1495, 36244, 285747,
1684, 97495, 47564, 150546, 243029, 153599, 125799, 1266, 45809, 2049, 2050, 166844, 56086, 28,
305845, 1754, 72736, 317135, 5191, 40156, 177943, 316530, 312555, 162669, 7124, 1221, 384266,
77150, 170103, 9695, 141470, 61885, 5172, 791, 82507, 1232, 1503, 45322, 41655, 1114, 5002, 415100,
3131, 133597, 177943, 167, 160568, 909, 62900, 791, 103781, 1881, 408280, 4054, 11141, 355328,
329476, 299854, 2021, 4513, 339999, 340460, 8220, 72736, 2259, 1113, 48, 48472, 4054, 393517,
45076, 304267, 39813, 731, 167, 159873, 87796, 129, 163655, 6732, 13553, 1292, 46214, 344136, 518,
5191, 298998, 268536, 56481, 1024, 52590, 450991, 61685, 1309, 1632, 68792, 68793, 304256, 2060,
253828, 164515, 106861, 46214, 72736, 319, 2462, 2060, 131267, 131268, 74537, 39813, 1692, 7632,
49993, 1173, 71574, 71690, 2385, 3937, 53288, 48166, 1598, 51247, 463, 8232, 3860, 1916, 148549,
41845, 2378, 6394, 1869, 290, 5372, 177939, 162941, 5896, 2053, 4217, 581, 72343, 41656, 242,
206044, 380898, 301426, 2236, 9489, 2565, 319, 1866, 166486, 2385, 215283, 1931, 285747, 68471,
129226, 410, 447, 1959, 138008, 1993, 1574, 12613, 296076, 133597, 12180, 1884, 203937, 99549,
85062, 148075, 2653, 1106, 317939, 52663, 47384, 1441, 154050, 5559, 151982, 1292, 1181, 168400,
1225, 2048, 123, 60007, 63401, 49634, 3223, 1540, 769, 94854, 447442, 11426, 543, 160396, 37509,
5329, 2437, 571, 2134, 114784, 388, 4778, 79078, 163090, 4807, 2173, 2726, 4087, 90873, 2179, 1904,
298274, 108775, 8232, 105298, 101379, 1173, 160473, 1993, 11508, 1010, 52591, 164940, 49829, 43691,
1055, 1904, 106861, 52235, 62913, 68024, 114810, 148581, 139816, 62056, 120420, 73, 5903, 2057,
9479, 38847, 953, 8272, 67977, 4967, 3799, 5208, 2202, 318692, 6441, 71242, 47677, 94854, 46214,
148046, 43942, 953, 38137, 379872, 2565, 1055, 59911, 156529, 148549, 314517, 1816, 1881, 2462,
153599, 1931, 1309, 443, 175108, 3460, 12357, 171771, 252799, 1441, 1347, 203937, 11612, 307407,
52235, 324, 15598, 7928, 7929, 487, 1554, 1063, 1631, 311187, 285747, 91488, 28664, 326768, 356002,
46214, 365667, 127365, 53288, 1200, 1881, 200485, 137145, 70293, 229104, 255251, 888, 1646, 1253,
12498, 3010, 105667, 148321, 62894, 58573, 650, 5805, 137137, 5002, 56086, 15161, 15162, 1904,
1894, 29756, 58573, 78486, 1684, 392841, 392842, 27537, 1662, 5559, 78770, 22053, 22054, 333, 804,
5133, 37053, 52058, 54817, 8136, 49581, 58690, 792, 1934, 39883, 1207, 72033, 9837, 44150, 177943,
519, 2850, 12053, 411, 1103, 68792, 68793, 4179, 343463, 1622, 2019, 173487, 2356, 8272, 339999,
360537, 334711, 44614, 52189, 172, 47677, 133861, 358871, 1299, 1266, 93377, 119842, 328080, 1366,
199248, 1981, 129667, 1309, 49829, 381810, 2850, 72013, 50760, 2268, 56481, 47099, 3460, 1207,
8377, 47202, 1272, 13553, 159300, 72736, 8, 1495, 15161, 15162, 39901, 146587, 258549, 53288, 1904,
2939, 155537, 1043, 2049, 2050, 2850, 139707, 407260, 4883, 4922, 4806, 1554, 1469, 1640, 73583,
317435, 130085, 135397, 9600, 6476, 52189, 298991, 1253, 333419, 157641, 13553, 273854, 137176,
2565, 1071, 92777, 1640, 4175, 2610, 13548, 13548, 316769, 174604, 1244, 3135, 863, 167, 247761,
1866, 121217, 808, 9479, 62056, 214572, 1309, 110342, 1219, 73432, 164940, 1244, 174819, 129282,
92641, 101379, 295467, 9922, 1309, 170103, 3754, 6421, 238634, 9837, 2565, 1266, 171400, 128633,
4891, 72168, 3939, 37440, 1214, 110342, 387557, 91115, 424, 332535, 334211, 316728, 336386, 129760,
29766, 1107, 11426, 156529, 168400, 409100, 1225, 9479, 3253, 160396, 51979, 102495, 252924, 731,
47384, 1543, 63676, 1145, 353170, 322459, 4792, 1866, 301869, 2665, 51764, 5002, 56086, 519, 1754,
1084, 791, 100529, 383429, 171990, 268681, 125, 2432, 439, 841, 163636, 4815, 9665, 9667, 3106,
131272, 1495, 309696, 47, 255339, 37604, 2150, 1225, 1506, 285747, 360516, 19004, 124598, 7304,
841, 49829, 6439, 163944, 163281, 48321, 169767, 91949, 1309, 62863, 909, 137985, 631, 7923,
135792, 37288, 68438, 130, 81850, 2605, 49861, 52552, 72745, 355315, 81852, 431196, 1450, 153961,
1980, 165932, 163281, 5172, 339450, 112802, 1469, 1024, 170, 105491, 165567, 1484, 59466, 7264,
4087, 69555, 71931, 1484, 1958, 92863, 330817, 89595, 2461, 40186, 39867, 170, 44379, 4806, 244,
953, 52552, 37604, 2939, 1597, 1266, 4806, 795, 38365, 6476, 44379, 6395, 5172, 170103, 102256,
369388, 31837, 417922, 7798, 4778, 1545, 63845, 75899, 2850, 259060, 1347, 6912, 12357, 61885,
42672, 1495, 311040, 248774, 100565, 15439, 63056, 63055, 425116, 116915, 9057, 115494, 1173, 8000,
1495, 10888, 6459, 334217, 5559, 363882, 150546, 102169, 9648, 50932, 46666, 4778, 29350, 371586,
13125, 103183, 116871, 295230, 31837, 631, 6421, 81730, 605, 1834, 51993, 17509, 75887, 387100,
37393, 284769, 385039, 152, 41657, 1981, 110653, 1543, 418214, 11528, 1545, 1219, 426115, 62913,
8180, 149214, 71826, 5896, 311272, 862, 5328, 229400, 131245, 2930, 28316, 518, 9606, 5447, 438117,
581, 180104, 22694, 162941, 27483, 300101, 44922, 2051, 86066, 381426, 1299, 72, 9691, 66510,
121958, 412792, 318605, 381810, 174448, 78781, 159199, 130085, 333993, 282533, 467, 64060, 364581,
5403, 301279, 165454, 335659, 62896, 9606, 4054, 8232, 4891, 8222, 605, 56481, 47, 102256, 148598,
12361, 43899, 714, 2829, 3131, 351707, 369667, 63575, 7321, 1964, 1540, 1207, 98308, 1934, 2726,
56086, 21918, 139816, 9732, 1266, 261, 41662, 4285, 322075, 29350, 264590, 290, 31837, 1366, 5516,
204205, 4145, 136603, 88617, 6405, 1401, 1565, 72842, 363081, 118642, 40228, 52753, 1494, 11878,
9779, 86465, 150546, 407413, 791, 9489, 1266, 16086, 322860, 1574, 2572, 4217, 1266, 324, 4054,
68580, 3889, 7264, 4145, 168380, 66809, 69622, 93954, 173386, 201350, 299889, 22965, 311040, 123,
3860, 144135, 9813, 163636, 4815, 2843, 129726, 71826, 47, 129452, 358689, 453739, 144678, 162941,
381810, 260856, 38977, 1692, 16707, 3238, 334969, 334970, 93954, 7193, 1347, 22833, 173304, 518,
6113, 163290, 174408, 73196, 50946, 73432, 179496, 72207, 394627, 394628, 56655, 40228, 60526,
2497, 613, 22833, 4040, 68852, 127, 388, 166844, 121182, 348194, 3996, 2769, 4086, 12448, 12447,
601, 62624, 841, 46262, 1913, 543, 73, 1565, 5903, 46160, 108775, 71697, 12487, 52189, 48954,
12613, 54817, 27537, 142589, 12353, 51314, 7688, 15185, 146754, 50467, 168552, 102768, 1543, 11476,
418, 727, 2427, 60526, 2695, 27537, 266220, 2134, 1214, 145805, 42889, 86316, 55772, 137176, 60653,
4210, 102276, 255521, 5191, 37440, 86217, 44584, 290958, 165819, 133, 3860, 74556, 6494, 347321,
108847, 958, 166844, 360516, 443, 1292, 1242, 13553, 22576, 91115, 3599, 1024, 1450, 1543, 1063,
1543, 1830, 3599, 57, 58039, 4188, 2179, 148321, 166224, 1107, 137135, 1507, 3010, 1366, 137137,
86316, 123, 58573, 2134, 71248, 37393, 2134, 1570, 12613, 114575, 121181, 121182, 2199, 9479, 5329,
1185, 3010, 790, 8272, 2437, 9571]
# url = 'https://movie.naver.com/movie/bi/pi/basic.naver?code=180101'
for director in director_code:
url = f'https://movie.naver.com/movie/bi/pi/basic.naver?code={director}'
response = requests.get(url)
if response.status_code == 200:
html = response.text
soup = BeautifulSoup(html, 'html.parser')
name_korean = soup.select_one('#content > div.article > div.mv_info_area > div.mv_info.character > h3 > a')
if name_korean is None:
name_korean = None
else:
name_korean = name_korean.text.strip()
name_english = soup.select_one('#content > div.article > div.mv_info_area > div.mv_info.character > strong')
if name_english is None:
name_english = None
else:
name_english = name_english.text.strip()
works = soup.select('#content > div.article > div.section_group.section_group_frst > div > div > ul > li')
if works is None:
works_tmp = None
else:
for work in works:
tmp = work.select_one('div > strong > a')
if tmp is None:
works_tmp = None
works_code_tmp = None
# director_code, movie_code, name_korean, name_english, work
insert_tmp = [director, works_code_tmp, name_korean, name_english, works_tmp]
insert_data.append(insert_tmp)
print(insert_tmp)
else:
works_tmp = tmp.text.strip()
works_code_tmp = tmp['href']
index = works_code_tmp.find('=')
works_code_tmp = works_code_tmp[index + 1:]
insert_tmp = [director, works_code_tmp, name_korean, name_english, works_tmp]
insert_data.append(insert_tmp)
print(insert_tmp)
# insert_tmp = [name_korean, name_english, works_tmp, works_code_tmp]
# print(name_korean)
# print(name_english)
# print(works_tmp)
# print(works_code_tmp)
curs.executemany(sql, insert_data)
conn.commit()
finally:
conn.close()