1 | #!/usr/bin/python |
---|
2 | # mediaprem.py by Ambrosa http://www.ambrosa.net |
---|
3 | # this module is used for download EPG data from Mediaset website |
---|
4 | # derived from E2_LOADEPG |
---|
5 | |
---|
6 | __author__ = "ambrosa http://www.ambrosa.net" |
---|
7 | __copyright__ = "Copyright (C) 2008-2011 Alessandro Ambrosini" |
---|
8 | __license__ = "CreativeCommons by-nc-sa http://creativecommons.org/licenses/by-nc-sa/3.0/" |
---|
9 | |
---|
10 | import gc |
---|
11 | import os |
---|
12 | import sys |
---|
13 | import time |
---|
14 | import codecs |
---|
15 | import socket |
---|
16 | import urllib |
---|
17 | import urllib2 |
---|
18 | import ConfigParser |
---|
19 | #from xml.dom import minidom |
---|
20 | |
---|
21 | # import CrossEPG functions |
---|
22 | import crossepg |
---|
23 | |
---|
24 | # location of local python modules under "scripts/lib" dir. |
---|
25 | # add it to sys.path() |
---|
26 | crossepg_instroot = crossepg.epgdb_get_installroot() |
---|
27 | if crossepg_instroot == False: |
---|
28 | sys.exit(1) |
---|
29 | libdir = os.path.join(crossepg_instroot , 'scripts/lib') |
---|
30 | sys.path.append(libdir) |
---|
31 | |
---|
32 | # import local modules |
---|
33 | import sgmllib |
---|
34 | import scriptlib |
---|
35 | |
---|
36 | # ================================================================= |
---|
37 | # HTML PARSER used for parsing description |
---|
38 | |
---|
39 | |
---|
40 | class Description_parser(sgmllib.SGMLParser): |
---|
41 | def parse(self, s): |
---|
42 | self.feed(s) |
---|
43 | self.close() |
---|
44 | |
---|
45 | def __init__(self, verbose=0): |
---|
46 | sgmllib.SGMLParser.__init__(self, verbose) |
---|
47 | self.start_div_box = False |
---|
48 | self.start_div_boxtxt = False |
---|
49 | self.description = '' |
---|
50 | |
---|
51 | |
---|
52 | def start_div(self, attributes): |
---|
53 | for name, value in attributes: |
---|
54 | if name == "class": |
---|
55 | if value == "box_Text": |
---|
56 | self.start_div_box = True |
---|
57 | elif value == "txtBox_cms": |
---|
58 | self.start_div_boxtxt = True |
---|
59 | |
---|
60 | def end_div(self): |
---|
61 | if self.start_div_boxtxt == True: |
---|
62 | self.start_div_box = False |
---|
63 | self.start_div_boxtxt = False |
---|
64 | |
---|
65 | |
---|
66 | def handle_data(self, data): |
---|
67 | if self.start_div_boxtxt == True: |
---|
68 | self.description += data.decode('iso-8859-1') |
---|
69 | |
---|
70 | def get_descr(self): |
---|
71 | return (self.description.strip(' \n\r') ) |
---|
72 | |
---|
73 | |
---|
74 | |
---|
75 | # ================================================================= |
---|
76 | |
---|
77 | |
---|
78 | class main(sgmllib.SGMLParser): |
---|
79 | |
---|
80 | # main config file |
---|
81 | CONF_CONFIGFILENAME = "mediaprem.conf" |
---|
82 | |
---|
83 | # Network socket timeout (in seconds) |
---|
84 | CONF_SOCKET_TIMEOUT = 20 |
---|
85 | |
---|
86 | # log text |
---|
87 | CONF_LOG_SCRIPT_NAME = "MediasetPremium (Italy)" |
---|
88 | CONF_LOG_PREFIX = "" |
---|
89 | |
---|
90 | # max chars in description |
---|
91 | CONF_DLDESCMAXCHAR = 250 |
---|
92 | |
---|
93 | # retry number if HTTP error |
---|
94 | HTTP_ERROR_RETRY = 3 |
---|
95 | # seconds to wait between retries |
---|
96 | HTTP_ERROR_WAIT_RETRY = 5 |
---|
97 | |
---|
98 | # charset used in remote website epg data |
---|
99 | REMOTE_EPG_CHARSET = 'utf-8' |
---|
100 | |
---|
101 | TODAYMP = '' |
---|
102 | DAYCACHEMP = [] |
---|
103 | FIELD_SEPARATOR = '###' |
---|
104 | CHANNELLIST = {} |
---|
105 | |
---|
106 | DESCRIPTIONS_WEBCACHE = {} |
---|
107 | |
---|
108 | |
---|
109 | # -------- xml processing using SGMLLIB ----------- |
---|
110 | # best way is use xml.minidom but it's very memory hungry (about 40MB memory for 2 MB XML file) |
---|
111 | # sgmllib can simple parse xml data |
---|
112 | SGML_PALINSESTO_INSIDE = False |
---|
113 | SGML_TITOLO_INSIDE = False |
---|
114 | SGML_LINKSCHEDA_INSIDE = False |
---|
115 | |
---|
116 | SGML_GIORNOMP = None |
---|
117 | SGML_CHID = None |
---|
118 | SGML_FD = None |
---|
119 | SGML_TOTAL_EVENTS = 0 |
---|
120 | |
---|
121 | SGML_EVENT_STARTHOUR = None |
---|
122 | SGML_EVENT_TITLE = None |
---|
123 | SGML_EVENT_SUMMARIE = None |
---|
124 | |
---|
125 | def parse(self, s): |
---|
126 | self.feed(s) |
---|
127 | self.close() |
---|
128 | |
---|
129 | def start_palinsesto(self, attr): |
---|
130 | self.SGML_PALINSESTO_INSIDE = True |
---|
131 | |
---|
132 | def end_palinsesto(self): |
---|
133 | self.SGML_PALINSESTO_INSIDE = False |
---|
134 | self.SGML_GIORNOMP = None |
---|
135 | self.log("extracted %d events" % self.SGML_TOTAL_EVENTS) |
---|
136 | |
---|
137 | def start_giorno(self,attr): |
---|
138 | if self.SGML_PALINSESTO_INSIDE == True : |
---|
139 | self.SGML_GIORNOMP = None |
---|
140 | for name,value in attr: |
---|
141 | if name == "data": |
---|
142 | if str(value).strip(' \n\r') in self.DAYCACHEMP : |
---|
143 | self.SGML_GIORNOMP = str(value).strip(' \n\r') |
---|
144 | break |
---|
145 | |
---|
146 | def end_giorno(self): |
---|
147 | self.SGML_GIORNOMP = None |
---|
148 | |
---|
149 | def start_canale(self,attr): |
---|
150 | if self.SGML_GIORNOMP != None: |
---|
151 | for name,value in attr: |
---|
152 | if name == "id": |
---|
153 | self.SGML_CHID = str(value).strip(' \n\r').lower() |
---|
154 | |
---|
155 | if not self.CHANNELLIST.has_key(self.SGML_CHID) : |
---|
156 | self.log("Warning: new channel id=%s found in XML data" % self.SGML_CHID ) |
---|
157 | break |
---|
158 | |
---|
159 | # get cache option |
---|
160 | # 0 : don't download/cache |
---|
161 | # 1 : download and cache (optional 1,new_name ) |
---|
162 | # 2 : always download overwriting existing files (optional 2,new_name ) |
---|
163 | # 3 : always download overwriting existing files only for TODAY (optional 3,new_name ) |
---|
164 | |
---|
165 | cacheopt = int(self.CHANNELLIST[self.SGML_CHID].split(",")[0]) |
---|
166 | |
---|
167 | # if cacheopt == 0, do nothing |
---|
168 | if cacheopt == 0: |
---|
169 | break |
---|
170 | |
---|
171 | channel_name = '' |
---|
172 | if len(self.CHANNELLIST[self.SGML_CHID].split(",")) > 1 : |
---|
173 | if self.CHANNELLIST[self.SGML_CHID].split(",")[1] != '' : |
---|
174 | # channel renamed, new name provided by user |
---|
175 | channel_name = self.CHANNELLIST[self.SGML_CHID].split(",")[1].strip(' \n\r').lower() |
---|
176 | |
---|
177 | # if channel name is not present as option, quit with error |
---|
178 | if channel_name == '': |
---|
179 | self.log("ERROR ! ID=%s channel name not present" % self.SGML_CHID) |
---|
180 | sys.exit(1) |
---|
181 | |
---|
182 | channel_provider = self.CONF_DEFAULT_PROVIDER |
---|
183 | if len(self.CHANNELLIST[self.SGML_CHID].split(",")) > 2 : |
---|
184 | if self.CHANNELLIST[self.SGML_CHID].split(",")[2] != '' : |
---|
185 | channel_provider = self.CHANNELLIST[self.SGML_CHID].split(",")[2].strip(' \n\r').lower() |
---|
186 | |
---|
187 | # if channel name is not present as option in channel_list.conf , quit with error |
---|
188 | if channel_name == '': |
---|
189 | self.log("ERROR ! ID=" + self.SGML_CHID + " channel name not present. Skip !") |
---|
190 | break |
---|
191 | |
---|
192 | day = str(self.convert_daymp(self.SGML_GIORNOMP)) |
---|
193 | eventfilename = scriptlib.fn_escape(self.SGML_CHID + self.FIELD_SEPARATOR + channel_name + self.FIELD_SEPARATOR + day) |
---|
194 | eventfilepath = os.path.join(self.CONF_CACHEDIR, eventfilename) |
---|
195 | if (cacheopt == 1) and os.path.exists(eventfilepath): |
---|
196 | break |
---|
197 | if (cacheopt == 3) and os.path.exists(eventfilepath) and (self.SGML_GIORNOMP != self.TODAYMP): |
---|
198 | break |
---|
199 | if (cacheopt != 1) and (cacheopt != 2) and (cacheopt != 3): |
---|
200 | self.log("Warning: unknown cache option " + str(cacheopt)) |
---|
201 | break |
---|
202 | |
---|
203 | self.log(" Writing in cache \'" + eventfilename + "\'",2) |
---|
204 | self.log2video(" extracting \"%s\" (%s)" % (channel_name, day)) |
---|
205 | |
---|
206 | self.SGML_FD = codecs.open(eventfilepath,"w",'utf-8') |
---|
207 | |
---|
208 | self.SGML_FD.write(self.SGML_CHID + self.FIELD_SEPARATOR + channel_name + self.FIELD_SEPARATOR + channel_provider + self.FIELD_SEPARATOR + day + '\n') |
---|
209 | self.SGML_FD.write("Local Time (human readeable)###Unix GMT Time###Event Title###Event Description\n") |
---|
210 | |
---|
211 | break |
---|
212 | |
---|
213 | def end_canale(self): |
---|
214 | if self.SGML_FD != None: |
---|
215 | self.SGML_FD.close() |
---|
216 | self.SGML_FD = None |
---|
217 | self.SGML_CHID = None |
---|
218 | |
---|
219 | def start_prg(self,attr): |
---|
220 | if self.SGML_FD != None : |
---|
221 | self.SGML_EVENT_STARTHOUR = None |
---|
222 | for name,value in attr: |
---|
223 | if name == "orainizio": |
---|
224 | self.SGML_EVENT_STARTHOUR = str(value).strip(' \n\r') |
---|
225 | break |
---|
226 | |
---|
227 | def end_prg(self): |
---|
228 | if self.SGML_FD != None : |
---|
229 | |
---|
230 | if (self.SGML_EVENT_STARTHOUR >='00:00') and (self.SGML_EVENT_STARTHOUR <= '05:59') : |
---|
231 | nextdayevent = 86400 |
---|
232 | else: |
---|
233 | nextdayevent = 0 |
---|
234 | |
---|
235 | event_starttime = self.SGML_GIORNOMP + '_' + self.SGML_EVENT_STARTHOUR |
---|
236 | event_startime_unix_gmt = str(int(time.mktime(time.strptime(event_starttime,"%Y/%m/%d_%H:%M"))) - self.DELTA_UTC + nextdayevent) |
---|
237 | |
---|
238 | event_title = unicode(self.SGML_EVENT_TITLE) |
---|
239 | event_title = event_title.replace('\r','') |
---|
240 | event_title = event_title.replace('\n','') |
---|
241 | event_title = event_title.strip(u' ') |
---|
242 | |
---|
243 | event_description = '' |
---|
244 | if self.CONF_DL_DESC == 1 : |
---|
245 | event_description = unicode(self.get_description(self.SGML_EVENT_SUMMARIE_LINK.strip(' \n\r'), self.CONF_DLDESCMAXCHAR) ) |
---|
246 | event_description = event_description.replace('\r','') |
---|
247 | event_description = event_description.replace('\n',u' ') |
---|
248 | event_description = event_description.strip(u' ') |
---|
249 | |
---|
250 | self.SGML_FD.write(event_starttime + self.FIELD_SEPARATOR + event_startime_unix_gmt + self.FIELD_SEPARATOR + event_title + self.FIELD_SEPARATOR + event_description + '\n') |
---|
251 | self.SGML_TOTAL_EVENTS += 1 |
---|
252 | |
---|
253 | def start_titolo(self,attr): |
---|
254 | if self.SGML_FD != None: |
---|
255 | self.SGML_TITOLO_INSIDE = True |
---|
256 | |
---|
257 | def end_titolo(self): |
---|
258 | if self.SGML_FD != None: |
---|
259 | self.SGML_TITOLO_INSIDE = False |
---|
260 | |
---|
261 | def start_linkscheda(self,attr): |
---|
262 | if self.SGML_FD != None: |
---|
263 | self.SGML_LINKSCHEDA_INSIDE = True |
---|
264 | |
---|
265 | def end_linkscheda(self): |
---|
266 | if self.SGML_FD != None: |
---|
267 | self.SGML_LINKSCHEDA_INSIDE = False |
---|
268 | |
---|
269 | |
---|
270 | def handle_data(self, data): |
---|
271 | if self.SGML_TITOLO_INSIDE == True: |
---|
272 | self.SGML_EVENT_TITLE = data.encode('utf-8') |
---|
273 | self.SGML_EVENT_TITLE = self.SGML_EVENT_TITLE.strip(' \n\r') |
---|
274 | |
---|
275 | if self.SGML_LINKSCHEDA_INSIDE == True: |
---|
276 | self.SGML_EVENT_SUMMARIE_LINK = data.encode('utf-8') |
---|
277 | self.SGML_EVENT_SUMMARIE_LINK = self.SGML_EVENT_SUMMARIE_LINK.strip(' \n\r') |
---|
278 | |
---|
279 | |
---|
280 | # ----------------------------------------------- |
---|
281 | |
---|
282 | def log(self,s,video=0): |
---|
283 | self.logging.log(self.CONF_LOG_PREFIX + str(s)) |
---|
284 | if video == 1: |
---|
285 | self.log2video(str(s)) |
---|
286 | |
---|
287 | def log2video(self,s): |
---|
288 | self.logging.log2video_status(str(s)) |
---|
289 | |
---|
290 | def convert_daymp(self,dmp): |
---|
291 | daystandard = time.strftime("%Y%m%d",time.strptime(dmp,"%Y/%m/%d")) |
---|
292 | return daystandard |
---|
293 | |
---|
294 | |
---|
295 | def get_description(self,url,maxchar=128): |
---|
296 | |
---|
297 | if url[:7] != 'http://': |
---|
298 | return('') |
---|
299 | |
---|
300 | if (url[-5:] != '.html') and (url[-4:] != '.htm') : |
---|
301 | return('') |
---|
302 | |
---|
303 | url_hash = hash(url) |
---|
304 | if self.DESCRIPTIONS_WEBCACHE.has_key(url_hash): |
---|
305 | self.log(" cached description " + url) |
---|
306 | return(self.DESCRIPTIONS_WEBCACHE[url_hash]) |
---|
307 | |
---|
308 | self.log(" downloading description " + url ) |
---|
309 | url_enc = str(urllib.quote(url,safe=":/")) |
---|
310 | try: |
---|
311 | sock = urllib2.urlopen(url_enc) |
---|
312 | data = sock.read() |
---|
313 | except IOError, e: |
---|
314 | serr = "unknown" |
---|
315 | if hasattr(e, 'reason'): |
---|
316 | serr = str(e.reason) |
---|
317 | elif hasattr(e, 'code'): |
---|
318 | serr = str(e.code) |
---|
319 | if hasattr(e, 'msg'): |
---|
320 | serr += " , " + str(e.msg) |
---|
321 | |
---|
322 | self.log(" error, reason: " + serr + ". Skip it.") |
---|
323 | return('') |
---|
324 | |
---|
325 | else: |
---|
326 | sock.close() |
---|
327 | dsparser = Description_parser() |
---|
328 | dsparser.parse(data) |
---|
329 | self.DESCRIPTIONS_WEBCACHE[url_hash] = dsparser.get_descr()[:maxchar] |
---|
330 | return(self.DESCRIPTIONS_WEBCACHE[url_hash]) |
---|
331 | |
---|
332 | return('') |
---|
333 | |
---|
334 | |
---|
335 | |
---|
336 | def __init__(self, confdir, dbroot): |
---|
337 | |
---|
338 | # initialize SGMLLIB |
---|
339 | sgmllib.SGMLParser.__init__(self, 0) |
---|
340 | |
---|
341 | # initialize logging |
---|
342 | self.logging = scriptlib.logging_class() |
---|
343 | # write to video OSD the script name |
---|
344 | self.logging.log2video_scriptname(self.CONF_LOG_SCRIPT_NAME) |
---|
345 | |
---|
346 | CONF_FILE = os.path.join(confdir,self.CONF_CONFIGFILENAME) |
---|
347 | if not os.path.exists(CONF_FILE) : |
---|
348 | self.log("ERROR: %s not present" % CONF_FILE,1) |
---|
349 | sys.exit(1) |
---|
350 | |
---|
351 | config = ConfigParser.ConfigParser() |
---|
352 | #config.optionxform = str # needed to return case sensitive index |
---|
353 | config.read(CONF_FILE) |
---|
354 | |
---|
355 | # reading [global] section options |
---|
356 | self.CONF_DEFAULT_PROVIDER = config.get("global","DEFAULT_PROVIDER") |
---|
357 | # save cache under dbroot |
---|
358 | self.CONF_CACHEDIR = os.path.join(dbroot,config.get("global","CACHE_DIRNAME")) |
---|
359 | |
---|
360 | self.CONF_DL_DESC = config.getint("global","DL_DESC") |
---|
361 | self.CONF_MAX_DAY_EPG = config.getint("global","MAX_DAY_EPG") |
---|
362 | self.CONF_URL = config.get("global","URL") |
---|
363 | |
---|
364 | self.CONF_GMT_ZONE = config.get("global","GMT_ZONE") |
---|
365 | if self.CONF_GMT_ZONE.strip(' ').lower() == 'equal': |
---|
366 | #self.DELTA_UTC = -scriptlib.delta_utc() # return negative if timezone is east of GMT (like Italy), invert sign |
---|
367 | self.DELTA_UTC = 0 |
---|
368 | else: |
---|
369 | self.DELTA_UTC = float(self.CONF_GMT_ZONE)*3600.0 |
---|
370 | if self.DELTA_UTC >= 0: |
---|
371 | self.DELTA_UTC = self.DELTA_UTC + scriptlib.delta_dst() |
---|
372 | else: |
---|
373 | self.DELTA_UTC = self.DELTA_UTC - scriptlib.delta_dst() |
---|
374 | |
---|
375 | self.DELTA_UTC = int(self.DELTA_UTC) |
---|
376 | #self.log("Website timezone - UTC = %d seconds" % self.DELTA_UTC) |
---|
377 | |
---|
378 | if not os.path.exists(self.CONF_CACHEDIR): |
---|
379 | self.log("Creating \'%s\' directory for caching" % self.CONF_CACHEDIR) |
---|
380 | os.mkdir(self.CONF_CACHEDIR) |
---|
381 | |
---|
382 | # reading [channels] section |
---|
383 | temp = config.items("channels"); |
---|
384 | |
---|
385 | # create a dictionary (Python array) with index = channel ID |
---|
386 | for i in temp: |
---|
387 | self.CHANNELLIST[i[0].strip(' \n\r').lower()] = unicode(i[1].strip(' \n\r').lower(),'utf-8') |
---|
388 | |
---|
389 | if len(self.CHANNELLIST) == 0 : |
---|
390 | self.log("ERROR: [channels] section empty ?",1) |
---|
391 | sys.exit(1) |
---|
392 | |
---|
393 | # set network socket timeout |
---|
394 | socket.setdefaulttimeout(self.CONF_SOCKET_TIMEOUT) |
---|
395 | |
---|
396 | self.TODAYMP = time.strftime("%Y/%m/%d") |
---|
397 | # create a list filled with dates (format AAAA/MM/DD) from today to today+ MAX_DAY_EPG |
---|
398 | self.DAYCACHEMP=[] |
---|
399 | for day in range(0,self.CONF_MAX_DAY_EPG): |
---|
400 | self.DAYCACHEMP.append(time.strftime("%Y/%m/%d",time.localtime(time.time()+86400*day))) |
---|
401 | |
---|
402 | |
---|
403 | |
---|
404 | # ---------------------------------------------------------------------- |
---|
405 | |
---|
406 | |
---|
407 | def download_and_cache(self): |
---|
408 | self.log("--- START DOWNLOAD AND CACHE DATA ---") |
---|
409 | self.log2video("STARTING DOWNLOAD") |
---|
410 | |
---|
411 | self.log("Removing old cached files") |
---|
412 | scriptlib.cleanup_oldcachedfiles(self.CONF_CACHEDIR, self.FIELD_SEPARATOR) |
---|
413 | |
---|
414 | |
---|
415 | self.log("Start download XML data from \'" + self.CONF_URL+"\'") |
---|
416 | self.log2video("downloading XML data ...") |
---|
417 | |
---|
418 | i = self.HTTP_ERROR_RETRY |
---|
419 | while i > 0: |
---|
420 | try: |
---|
421 | sock = urllib2.urlopen(self.CONF_URL) |
---|
422 | data = sock.read() |
---|
423 | except IOError, e: |
---|
424 | serr = "unknown" |
---|
425 | if hasattr(e, 'reason'): |
---|
426 | serr = str(e.reason) |
---|
427 | elif hasattr(e, 'code'): |
---|
428 | serr = str(e.code) |
---|
429 | if hasattr(e, 'msg'): |
---|
430 | serr += " , " + str(e.msg) |
---|
431 | |
---|
432 | self.log("\'" + self.CONF_URL + "\' connection error. Reason: "+serr+". Waiting "+str(self.HTTP_ERROR_WAIT_RETRY)+" sec. and retry ["+str(i)+"] ...") |
---|
433 | time.sleep(self.HTTP_ERROR_WAIT_RETRY) # add sleep |
---|
434 | i -= 1 |
---|
435 | |
---|
436 | else: |
---|
437 | i = -99 |
---|
438 | sock.close() |
---|
439 | |
---|
440 | if (i != -99): |
---|
441 | self.log("Cannot retrieve data from \'" + self.CONF_URL + "\'. Abort script") |
---|
442 | self.log2video("Error: cannot download XML data, abort") |
---|
443 | time.sleep(5) |
---|
444 | sys.exit(1) |
---|
445 | |
---|
446 | self.log("end download XML data, now processing") |
---|
447 | self.log2video("processing XML data, wait ...") |
---|
448 | |
---|
449 | # start SGMLLIB parsing |
---|
450 | self.parse(data) |
---|
451 | |
---|
452 | self.log("end process XML data",1) |
---|
453 | |
---|
454 | # ---------------------------------------------------------------------- |
---|
455 | |
---|
456 | |
---|
457 | def process_cache(self): |
---|
458 | self.log("--- START PROCESSING CACHE ---") |
---|
459 | self.log2video("START PROCESSING CACHE") |
---|
460 | if not os.path.exists(self.CONF_CACHEDIR): |
---|
461 | self.log("ERROR: %s not present" % self.CONF_CACHEDIR,1) |
---|
462 | sys.exit(1) |
---|
463 | |
---|
464 | self.log("Loading lamedb") |
---|
465 | lamedb = scriptlib.lamedb_class() |
---|
466 | |
---|
467 | self.log("Initialize CrossEPG database") |
---|
468 | crossdb = scriptlib.crossepg_db_class() |
---|
469 | crossdb.open_db() |
---|
470 | |
---|
471 | events = [] |
---|
472 | previous_id = '' |
---|
473 | channels_name = '' |
---|
474 | total_events = 0 |
---|
475 | |
---|
476 | self.log("Start data processing") |
---|
477 | filelist = sorted(os.listdir(self.CONF_CACHEDIR)) |
---|
478 | filelist.append('***END***') |
---|
479 | |
---|
480 | for f in filelist : |
---|
481 | id = f.split(self.FIELD_SEPARATOR)[0] |
---|
482 | if previous_id == '': |
---|
483 | previous_id = id |
---|
484 | |
---|
485 | if id != previous_id : |
---|
486 | total_events += len(events) |
---|
487 | self.log(" ...processing \'%s\' , nr. events %d" % (previous_id,len(events))) |
---|
488 | self.log2video("processed %d events ..." % total_events ) |
---|
489 | |
---|
490 | for c in channels_name: |
---|
491 | # a channel can have zero or more SID (different channel with same name) |
---|
492 | # return the list [0e1f:00820000:0708:00c8:1:0 , 1d20:00820000:2fa8:013e:1:0 , ..... ] |
---|
493 | # return [] if channel name is not in lamedb |
---|
494 | sidbyname = lamedb.get_sid_byname(c.strip(' \n').lower()) |
---|
495 | |
---|
496 | # process every SID |
---|
497 | for s in sidbyname: |
---|
498 | # convert "0e1f:00820000:0708:00c8:1:0" to sid,tsid,onid |
---|
499 | # return the list [sid,tsid,onid] |
---|
500 | ch_sid = lamedb.convert_sid(s) |
---|
501 | if len(ch_sid) == 0: |
---|
502 | continue |
---|
503 | |
---|
504 | # add channel into db |
---|
505 | # doesn't matter if the channel already exist... epgdb do all the work |
---|
506 | crossdb.add_channel(ch_sid) |
---|
507 | |
---|
508 | i = 0 |
---|
509 | L = len(events) - 1 |
---|
510 | |
---|
511 | # process events |
---|
512 | for e in events: |
---|
513 | |
---|
514 | items = e.split(self.FIELD_SEPARATOR) |
---|
515 | e_starttime = int(items[1]) |
---|
516 | |
---|
517 | if i < L : |
---|
518 | e_length = int(events[i+1].split(self.FIELD_SEPARATOR)[1]) - e_starttime |
---|
519 | else: |
---|
520 | # last event, dummy length 90 min. |
---|
521 | e_length = 5400 |
---|
522 | i += 1 |
---|
523 | |
---|
524 | # extract title and encode Python Unicode with UTF-8 |
---|
525 | e_title = items[2].encode('utf-8') |
---|
526 | |
---|
527 | # extract summarie and encode Python Unicode with UTF-8 |
---|
528 | e_summarie = items[3].encode('utf-8') |
---|
529 | |
---|
530 | # add_event(start_time , duration , title , summarie , ISO639_language_code , strings_encoded_with_UTF-8) |
---|
531 | crossdb.add_event(e_starttime, e_length, e_title, e_summarie, 'ita', True ) |
---|
532 | |
---|
533 | if f == '***END***': |
---|
534 | break |
---|
535 | |
---|
536 | events = [] |
---|
537 | previous_id = id |
---|
538 | channels_name = '' |
---|
539 | |
---|
540 | if id == previous_id: |
---|
541 | self.log("Reading \'%s\'" % f) |
---|
542 | # read events from cache file using UTF-8 and insert them in events list |
---|
543 | fd = codecs.open(os.path.join(self.CONF_CACHEDIR, f),"r","utf-8") |
---|
544 | lines = fd.readlines() |
---|
545 | fd.close() |
---|
546 | if channels_name == '': |
---|
547 | # first line has channel data (id,name,provider,date) |
---|
548 | channels_name = lines[0].split(self.FIELD_SEPARATOR)[1].split('|') |
---|
549 | # the second line is only a remark |
---|
550 | # add events starting from third line |
---|
551 | events.extend(lines[2:]) |
---|
552 | |
---|
553 | # end process, close CrossEPG DB saving data |
---|
554 | crossdb.close_db() |
---|
555 | self.log("TOTAL EPG EVENTS PROCESSED: %d" % total_events) |
---|
556 | self.log("--- END ---") |
---|
557 | self.log2video("END , events processed: %d" % total_events) |
---|
558 | |
---|
559 | |
---|
560 | |
---|
561 | # **************************************************************************************************************************** |
---|
562 | |
---|
563 | # MAIN CODE: SCRIPT START HERE |
---|
564 | |
---|
565 | # increase this process niceness (other processes have higher priority) |
---|
566 | os.nice(10) |
---|
567 | |
---|
568 | # set Garbage Collector to do a "generational jump" more frequently than default 700 |
---|
569 | # memory saving: about 50% (!!), some performance loss (obviously) |
---|
570 | gc.set_threshold(50,10,10) |
---|
571 | |
---|
572 | SCRIPT_DIR = 'scripts/mediaprem/' |
---|
573 | |
---|
574 | # get CrossEPG installation dir. |
---|
575 | crossepg_instroot = crossepg.epgdb_get_installroot() |
---|
576 | if crossepg_instroot == False: |
---|
577 | sys.exit(1) |
---|
578 | scriptlocation = os.path.join(crossepg_instroot , SCRIPT_DIR) |
---|
579 | |
---|
580 | # get where CrossEPG save data (dbroot) and use it as script cache repository |
---|
581 | crossepg_dbroot = crossepg.epgdb_get_dbroot() |
---|
582 | if crossepg_dbroot == False: |
---|
583 | sys.exit(1) |
---|
584 | |
---|
585 | # initialize script class |
---|
586 | script_class = main(scriptlocation , crossepg_dbroot) |
---|
587 | |
---|
588 | # download data and cache them |
---|
589 | script_class.download_and_cache() |
---|
590 | |
---|
591 | # read cached data and inject into CrossEPG database |
---|
592 | script_class.process_cache() |
---|
593 | |
---|