#
#      Copyright (C) 2005-2008 Team XBMC
#      http://www.xbmc.org
#
#  This Program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2, or (at your option)
#  any later version.
#
#  This Program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with XBMC; see the file COPYING.  If not, write to
#  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
#  http://www.gnu.org/copyleft/gpl.html
#

""" IMDbLib Movie Information Library:
Movie Information parsing from http://www.imdb.com

Written By:
--------

No1CaNTeL
AllTheKillx

Special Thanks to AllTheKill for not just rewriting almost all of the code,
but for tutoring me through his coding, and making sure I have a full
understanding of what is being done.

Another Special thanks to Donno for having enough patience for my 1001
questions over the last year as I've been learning python.
(This doesn't mean he'll answer your 1001 questions! ;)

***Please open and read the "IMDbLib_Example1.py" for library usage***

To Do:
------

    - Trailer Function
    - Cast Icons
    - Title Page Thumbs 
    
"""

import os,sys,re,urllib,string

reg = re.compile(r"&#[0-9]{1,3};")
def fix(s):
    codes = reg.findall(s)
    for i in codes:
        s = s.replace(i,'%c'%(int(i.replace('&#','').replace(';','')))).replace('<em>','').replace('</em>','').replace('<br>','')
    return(s)

titleReg       = re.compile(r'<title>(.*)<.*?')
directorsReg   = re.compile(r'.*?Directed by.*')
writersReg     = re.compile(r'.*?Writing credits.*')
genresReg      = re.compile(r'.*?Genre:.*')
yearReg        = re.compile(r'.*?<a href="/Sections/Years/.*">(.*?)</a>.*')
taglineReg     = re.compile(r'.*?Tagline:</b>(.*?)(<a href.*>)?$')
plotReg        = re.compile(r'.*?Plot Outline:</b>(.*?)<a href.*')
ratingReg      = re.compile(r'.*?User Rating.*')
runtimeReg     = re.compile(r'.*?Runtime.*')
castReg        = re.compile(r'.*?(?:Cast overview, first billed only|Complete credited cast).*?(<tr>.*)')
countriesReg   = re.compile(r'.*?Country:.*')
languagesReg   = re.compile(r'.*?Language:.*')
mpaaReg        = re.compile(r'.*?/mpaa">MPAA</a>:</b>(.*?)<br>')
details        = re.compile(r'<p class="plotpar">(.*?)</p>')

class IMDb:
   def __init__(self,url):
      self.Title           = 'N/A'
      self.Writers         = 'N/A'
      self.Genres          = 'N/A'
      self.Details         = 'N/A'
      self.Year            = 'N/A'
      self.Tagline         = 'N/A'
      self.Plot            = 'N/A'
      self.Rating          = 'N/A'
      self.MPAA            = 'N/A'
      self.Runtime         = 'N/A'
      self.Cast            = 'N/A'
      self.Countries       = 'N/A'
      self.Languages       = 'N/A'
      self.Searchresults   = 'N/A'
      self.CoverURL        = 'N/A'
      self.CoverSize       = 'N/A'

      sock = urllib.urlopen(url)
      page = sock.readlines()
      sock.close()

      i = 0
      while i < len(page):
          if titleReg.match(page[i]):
              self.Title = titleReg.search(page[i]).group(1).strip()
          if directorsReg.match(page[i]):
              self.Directors = 'N/A'
              in_directors = re.compile(r'.*?">(.*?)</.*?')
              i += 1
              self.Directors = in_directors.findall(page[i])
          if writersReg.match(page[i]):
              in_writers = re.compile(r'<a href="/name/.*?>(.*?)<')
              i += 3
              if in_writers.search(page[i]) != None:
                  self.Writers = in_writers.findall(page[i])
          if genresReg.match(page[i]):
              in_genres = re.compile(r'<a href="/Sections/Genres/.*?>(.*?)<')
              i += 1
              self.Genres = in_genres.findall(page[i])
          if yearReg.match(page[i]):
              self.Year = yearReg.search(page[i]).group(1).strip()
          if taglineReg.match(page[i]):
              self.Tagline = taglineReg.search(page[i]).group(1).strip()
          if plotReg.match(page[i]):
              self.Plot = plotReg.search(page[i]).group(1).strip()
          if ratingReg.match(page[i]):
              in_rating = re.compile(r'<b>(.*?)</b> (.*)')
              i += 4
              ratings_all = in_rating.findall(page[i])
              for rate in ratings_all:
                  self.Rating = '%s %s' % (rate[0],rate[1])
          if mpaaReg.match(page[i]):
              self.MPAA = mpaaReg.search(page[i]).group(1).strip()
          if runtimeReg.match(page[i]):
              in_runtime = re.compile(r'(.*)\n')
              i += 1
              self.Runtime = in_runtime.findall(page[i])
          if castReg.match(page[i]):
              in_cast = re.compile(r'.*?<a href="/name.*?>(.*?)<.*?(?:top|middle)">(.*?)</td>')
              self.Cast = {}
              l = page[i].replace('</tr> <tr','</tr><tr').split('</tr><tr')
              for j in l:
                  r = in_cast.search(j)
                  if r != None:
                      self.Cast[r.group(1).strip()] = r.group(2).strip()
          if cover.match(page[i]):
              self.CoverSize = {}
              Cover = cover.findall(page[i])
              for a in Cover:
                  self.CoverURL = a[0]
                  self.CoverSize[a[1]] = a[2]
          if countriesReg.match(page[i]):
              in_countries = re.compile(r'<a href=.*?>(.*?)</a>')
              i += 1
              self.Countries = in_countries.findall(page[i])
          if languagesReg.match(page[i]):
              in_languages = re.compile(r'<a href=.*?>(.*?)</a>')
              i += 1
              self.Languages = in_languages.findall(page[i])
          i+=1

      try:
          sock = urllib.urlopen(url+'plotsummary')
          page = sock.readlines()
          sock.close()
      except:
          pass

      for i in page:
          match = details.search(i)
          self.Details = []
          if match != None:
              self.Details = match.group(1).strip()
              break

class IMDbSearch:
    def __init__(self, find):
        find = find.strip().translate(string.maketrans('',''),'~!@#$%^&*()_+`-={}|[]\:";\'<>?,./').replace(' ','+')
        self.SearchResults = {}
        sock = urllib.urlopen('http://www.imdb.com/find?s=tt&q=' + find)
        page = sock.read()
        sock.close()
        search = re.compile('<a href="/title/(.*?)/.*?">(.*?)</a>((.*?))</li>')
        title = search.findall(page)
        if title != None:
            for a in title:
                b = "%s%s" % (a[1],a[2])
                url = "http://www.imdb.com/title/%s/" % (a[0])
                Title = fix(str(b))
                self.SearchResults[Title] = url


class IMDbGallery:
    def __init__(self, url):
        sock = urllib.urlopen(url)
        page = sock.read()
        sock.close()

        self.Gal1_info = 'N/A'
        self.Gal1_xy   = 'N/A'
        self.Gal2_info = 'N/A'
        self.Gal2_xy   = 'N/A'
        self.Gal3_info = 'N/A'
        self.Gal3_xy   = 'N/A'

        cover = re.compile('src="(http://.*?m.jpg)" height="(.*?)" width="(.*?)"></a>')
        lvl1 = re.compile('<a href="photogallery(.*?)"><img src="(.*?)" width="(.*?)" height="(.*?)".*')
        lvl2 = re.compile('<td><a href="(.*?)"><img src="(.*?)" width="(.*?)" height="(.*?)"')
        lvl3 = re.compile('class="photosrc"><td><img src="(.*?)" border=".*?" width="(.*?)" height="(.*?)">')

        if cover.search(page) != None:
            image = cover.findall(page)
            self.CoverURL = []
            self.CoverSize = {}
            for a in image:
                self.CoverURL.append(a[0])
                self.CoverSize[a[1]] = a[2]
        if lvl1.search(page) != None:
            Gal_Lvl1 = lvl1.findall(page)
            self.Gal1_info   = {}
            self.Gal1_xy     = {}
            for g1 in Gal_Lvl1:
                g = url + g1[0]
                self.Gal1_info[g]   = g1[1]
                self.Gal1_xy[g1[2]]  = g1[3]
        if lvl2.search(page) != None:
            Gal_Lvl2 = lvl2.findall(page)
            self.Gal2_info   = {}
            self.Gal2_xy     = {}
            for g2 in Gal_Lvl2:
                g = str('http://www.imdb.com' + g2[0])
                self.Gal2_info[g]   = g2[1]
                self.Gal2_xy[g2[2]]  = g2[3]
        if lvl3.search(page) != None:
            Gal_Lvl3 = lvl3.findall(page)
            self.Gal3_info   = []
            self.Gal3_xy     = {}
            for g3 in Gal_Lvl3:
                self.Gal3_info      = g3[0]
                self.Gal3_xy[g3[1]]  = g3[2]

print IMDbGallery('http://www.imdb.com/title/tt0429589/').CoverSize
