English > Support

Personal Video Database 1.0.2.7 MOD

<< < (8/73) > >>

Ivek23:
IMDB_ [EN] [HTTPS] script

ParsePage_IMDBSearchTitle

I corrected the pieces of code of search results for movies, where there are now missing posters.

Example for this link:
https://www.imdb.com/search/title?title=Lawrence+Of+Arabia&title_type=feature&release_date=,&view=simple
Poster link:
https://m.media-amazon.com/images/G/01/imdb/images/nopicture/140x209/film-4001654135._CB470041848_.png


--- Quote ---Function ParsePage_IMDBSearchTitle(HTML:String):Cardinal; //BlockOpen
    //Returns:
    //     Result:=prDownload; (Global var DownloadURL=IMDB page) if there is one results.
    //     Result:=prList; if there are several results. (Don't work with Preferences/Plugings/Silent Mode).
    //     Result:=prError; If not results
  Var
      curPos,ResultsNumber:Integer;
      ItemValue,ImageFile:String;
      Title,Year,MovieURL,PreviewURL:String;
  Begin
    LogMessage('Function ParsePage_IMDBSearchTitle BEGIN======================|');
    LogMessage('Global Var-Mode|'+IntToStr(Mode)+'|');
    LogMessage('Global Var-DownloadURL|'+DownloadURL+'|');   
    If (Pos('No results',HTML)<>0) Then Begin                                           //WEB_SPECIFIC
        LogMessage('        No Movies results');
        Result:=prError;
    End Else Begin
        LogMessage('        Parsing search Movies results');
        curPos:=Pos('<div class="lister-list">',HTML);                                 //String which opens the Web Search List data. WEB_SPECIFIC
        //Get first item
        ResultsNumber:=0;
        curPos:=PosFrom('<div class="lister-item mode-simple">',HTML,curPos);            //String which opens the Web Result List data. WEB_SPECIFIC
        While curPos>0 Do Begin
            ResultsNumber:=ResultsNumber+1;
            LogMessage('    Parsing search movie results in '+IntToStr(curPos)+' ...');
            curPos:=PosFrom('<div class="lister-item-image">',HTML,curPos);             //String which opens the Web URL+Title+URL+PreviewURL data. WEB_SPECIFIC
            //Get PreviewURL
            //PreviewURL:='';
            If POSTER_IN_SEARCH Then begin
                ItemValue:=TextBetWeen(HTML,'loadlate="','"',false,curPos);               //Strings which opens/closes the data. WEB_SPECIFIC
                If (Pos('https://m.media-amazon.com/images/G/01/imdb/images/',ItemValue)>0) Then Begin
                    PreviewURL:=TextBetWeenFirst(ItemValue,'https://m.media-amazon.com/images/G/01/imdb/images/','.');     //Get poster code. Strings which opens/closes the data. WEB_SPECIFIC
                    If (Length(PreviewURL)>0) then begin
                        PreviewURL:='https://m.media-amazon.com/images/G/01/imdb/images/' + PreviewURL                        //Base poster URL without '.jpg'. WEB_SPECIFIC
                        ImageFile:=GetAppPath+'Scripts\'+BASE_DOWNLOAD_FILE_IMAGE_NAME+'-SearchResult-'+IntToStr(ResultsNumber)+'-.jpg'
                        // Avoid HTTPS redirection: Download https image to file  OutPutFile=
                        If (1=DownloadImage(PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg',ImageFile)) then begin  //Dowload in the selected user max size. WEB_SPECIFIC
                            LogMessage('      Parse Results PreviewURL:'+PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg'+'||');
                            PreviewURL:=ImageFile;  //Pass the downloaded file to PVD
                        end else if (1=DownloadImage(PreviewURL +'.jpg',ImageFile)) then begin  //Dowload in the web base size. WEB_SPECIFIC
                            LogMessage('      Parse Results PreviewURL:' + PreviewURL + '.jpg'+'||');
                            PreviewURL:=ImageFile;  //Pass the downloaded file to PVD
                        end;       
                    end;
                end;
            //end;             
            //Get PreviewURL         
            //PreviewURL:='';
            //If POSTER_IN_SEARCH Then begin
                //ItemValue:=TextBetWeen(HTML,'loadlate="','"',false,curPos);               //Strings which opens/closes the data. WEB_SPECIFIC            
                If (Pos(BASE_URL_IMAGE_PRE_TRUE,ItemValue)>0) Then Begin
                    PreviewURL:=TextBetWeenFirst(ItemValue,BASE_URL_IMAGE_PRE_TRUE,'.');     //Get poster code. Strings which opens/closes the data. WEB_SPECIFIC
                    If (Length(PreviewURL)>0) then begin
                        PreviewURL:=BASE_URL_IMAGE_PRE_TRUE + PreviewURL                        //Base poster URL without '.jpg'. WEB_SPECIFIC
                        ImageFile:=GetAppPath+'Scripts\'+BASE_DOWNLOAD_FILE_IMAGE_NAME+'-SearchResult-'+IntToStr(ResultsNumber)+'-.jpg'
                        // Avoid HTTPS redirection: Download https image to file  OutPutFile=
                        If (1=DownloadImage(PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg',ImageFile)) then begin  //Dowload in the selected user max size. WEB_SPECIFIC
                            LogMessage('      Parse Results PreviewURL:'+PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg'+'||');
                            PreviewURL:=ImageFile;  //Pass the downloaded file to PVD
                        end else if (1=DownloadImage(PreviewURL +'.jpg',ImageFile)) then begin  //Dowload in the web base size. WEB_SPECIFIC
                            LogMessage('      Parse Results PreviewURL:' + PreviewURL + '.jpg'+'||');
                            PreviewURL:=ImageFile;  //Pass the downloaded file to PVD
                        end;       
                    end;
                end;
            end;
            //Get MovieURL  (Always must it has)
            MovieURL:=TextBetWeen(HTML,'<a href="/title/','/',false,curPos);               //Strings which opens/closes the data. WEB_SPECIFIC
            MovieURL:= BASE_URL_PRE+MovieURL+BASE_URL_SUF;
            LogMessage('      Parse Results URL:'+MovieURL+'||');
            //If true HTPPS link PVdB gives "Socket Error # 11001"
            //If ancient http, get redirected and give "IOHandler value is not valid
            //If invalid url, "HTTP/1.1 400 Bad Request"               
            MovieURL:=CHEAT_PREFIX_URL+MovieURL;                 //'Cheat' URL for avoid HTTPS issue in GET return.
            LogMessage('      Parse Results CHEAT URL:'+MovieURL+'||');
            //Get Title (Always must it has)
            Title:=TextBetWeen(HTML,'>','<',false,curPos);                               //Strings which opens/closes the data. WEB_SPECIFIC
            LogMessage('      Parse Results Title:'+Title+'||');
            //Get Year
            Year:=TextBetWeen(HTML,'(',')',false,curPos);                               //Strings which opens/closes the data. WEB_SPECIFIC
            Year:=StringReplace(Year,'typeof uet == '+Chr(39)+'function'+Chr(39),'',True,False,False);    //Eliminate the CHEAT_PREFIX_URL for get the true movie link           
            LogMessage('    Parse Results in Year:'+Year+'||');
            AddSearchResult(Title,'',Year,MovieURL,PreviewURL);
            //Just to check the website (Only to check the web page) not PVdB valid result.
            if CHECK_WEBSITE then AddSearchResult('                             '+#8729+' Just to check the website: '+StringReplace(MovieURL,CHEAT_PREFIX_URL,'',True,False,False),'','',StringReplace(MovieURL,CHEAT_PREFIX_URL,'',True,False,False),'');
            curPos:=PosFrom('<div class="lister-item mode-simple">',HTML,curPos)          //String which opens the Web Result item List data. WEB_SPECIFIC
        End;
        If ResultsNumber=1 Then Begin
            DownloadURL:=StringReplace(MovieURL,CHEAT_PREFIX_URL,'',True,False,False);    //Eliminate the CHEAT_PREFIX_URL for get the true movie link
            DownloadURL:=StringReplace(DownloadURL,'http:','https:',True,True,False);     //The direct url needed (no https redirection)
            Result:=prDownload; //One result in the DownloadURL.
            LogMessage('      After parsing search Movies,only one result detected');   
        End Else Begin
            Result:=prList; //Don't work with Preferences/Plugings/Silent Mode.
            LogMessage('      After parsing search Movies go to choose List Results');   
        End;
    End;
    LogMessage('Function ParsePage_IMDBSearchTitle END=====================||');
    Exit;
End; //BlockClose
--- End quote ---


ParsePage_IMDBMovieMPAA

I corrected parts of the code for ParsePage_IMDBMovieMPAA.


--- Quote ---Function ParsePage_IMDBMovieMPAA(HTML:String):Cardinal; //BlockOpen
    //Returns:
    //     Result:=prFinished; Script has finished gathering data
    //     Result:=prError;  If żany big problem? with exit
    //Retrieve: ~MPAA~
  Var
    curPos:Integer;
    mpaaValue:String;
  Begin
.
.
    LogMessage('      MPAA.USA.Corrected:'+mpaaValue+'||');
   mpaaValue:=StringReplace(mpaaValue,'update: ','',False,False,True);                //WEB_SPECIFIC
   LogMessage('      MPAA rating: '+mpaaValue+' ||');
    //Not or bad MPAA USA then get Spain certification (if exist) and convert in USA-KODI values
.
.
    End;
 
    AddFieldValueXML('mpaa',mpaaValue);
    LogMessage('      Get result MPAA='+mpaaValue+'||');
    LogMessage('Function ParsePage_IMDBMovieMPAA END=====================||');
  End; //BlockClose
--- End quote ---


Function ParsePage

I added part of the code for Function ParsePage.


--- Quote ---//USER FUNCTIONS AND PROCEDURES==================================================================================BlockClose
//OBLIGATORY CALLBACK PRINCIPAL FUNCTION=========================================================================BlockOpen
Function ParsePage(HTML:String;URL:AnsiString):Cardinal; //BlockOpen
  Var
    MovieID,titleValue,yearValue:String;
    Date:String;
    DateParts: TWideArray;   
  Begin
    LogMessage('Function ParsePage BEGIN======================|');
    LogMessage('Global Var-Mode|'+IntToStr(Mode)+'|');
    LogMessage('Global Var-DownloadURL|'+DownloadURL+'|');
    LogMessage('Local Var-URL|'+URL+'|');
//Parse return in mode 'Search Movie URL'--------------------------------------------------------------------------------         
.
.
//Parse with the Movie URL 'smNormal'------------------------------------------------------------------------------------
    If (Mode=smNormal) Then Begin
        Result:=prFinished;  //It will change to prError if any big problem with exit; 
        //Information in several page. Needs Provider MovieID
        MovieID:=TextBetWeenFirst(DownloadURL,BASE_URL_PRE_TRUE,BASE_URL_SUF);
        LogMessage('  ParsePage mode smNormal|'+IntToStr(Mode)+'|. Getting provider data for MovieID|'+ MovieID +'|');
        AddCustomFieldValueByName('IMDbID',MovieID);
        LogMessage('      Get result IMDbID:'+MovieID+'||');
        //Get ~url~
        AddFieldValueXML('url',StringReplace(DownloadURL,BASE_URL_PRE_TRUE,BASE_URL_PRE,True,False,False));
        LogMessage('      Get result url:'+StringReplace(DownloadURL,BASE_URL_PRE_TRUE,BASE_URL_PRE,True,False,False)+'||');
    //Parse principal provider page = BASE_URL_PRE_TRUE------------------------------------------------------------------
.
.         
    //Date ~Updated~ (choose simple or verbose version)
        Date:=DateToStr(CurrentDateTime);
        if Date <> '' then
         begin
            ExplodeString(Date,DateParts,'-');
            Date:=DateParts[2]+'.'+ DateParts[1]+'.'+DateParts[0];   
            AddCustomFieldValueByName('Updated',Date+' at '+TimeToStr(CurrentDateTime)+' • '+SCRIPT_NAME+' '+SCRIPT_VERSION);  // Annoying 
        End;         
        LogMessage('    Provider data info retreived Ok in '+DateToStr(CurrentDateTime)+' '+TimeToStr(CurrentDateTime)+'|');
        LogMessage('Function ParsePage NORMAL END======================|');
        exit;
    End;
//Parse with unknow mode-----------------------------------------------------------------------------------------------   
    Result:=prError;
    exit;
End; //BlockClose
//OBLIGATORY CALLBACK PRINCIPAL FUNCTION=========================================================================BlockClose   
--- End quote ---

Ivek23:
IMDB_ [EN] [HTTPS] script

I added and Corrected part of the code for Function ParsePage_IMDBMovieBASE to now work properly.

Part 1:

//SCRIPT INFO


--- Quote ---//SCRIPT INFO================================================================================================== BlockClose
//SCRIPT CONSTANTS============================================================================================= BlockOpen
Const
//Script types-----------------------------------------------------------------------------------------------------------
  stMovies        = 0;
  stPeople        = 1;
  stPoster        = 2;
  stScreenShots   = 3;
  stCover         = 4;
//Script Obligatory Infos----------------------------------------------------------------------------------------------
  SCRIPT_NAME  = 'IMDB [EN][HTTPS]_1'; //The most complete information provider
  SCRIPT_DESC  = 'IMDB download Movie info [EN]_1'; 
  SCRIPT_VERSION  = '1.1.0.0';
  SCRIPT_TYPE  = stMovies;
  SCRIPT_LANG  = $09; //English //$0a es-Spanish   $0c fr-French   $09 en-English
  RATING_NAME    = 'IMDB'; //PVD has ~imdbrating~ for this value
  CODE_PAGE      = 65001; //28591=ISO 8859-1 Latin 1; Western European (ISO). Use: 65001=Unicode (UTF-8) | 0=for Autodetect   Autodetect   
//Script Options-------------------------------------------------------------------------------------------------------
  //Retreive Data Config
  GET_MAINPAGE_ACTORS_CREDIT  = True ;  //Set to True to ensure MAINPAGE_ACTORS_CREDIT info is added. Otherwise: Set to False to ensure MAINPAGE_ACTORS_CREDIT info is not added. MAINPAGE_ACTORS_CREDIT info is in to the principal movie page.
  GET_FULL_ACTORS_CREDIT  = True ;  //Set to True to ensure FULL_ACTORS_CREDIT info is for Full Cast & Crew provider page. Otherwise: Set to False to ensure FULL_ACTORS_CREDIT info is not added.
  GET_FULL_MPAA  = True ;  //Download Certification provider page for retreive the MPAA info. Otherwise only the info of the principal movie page.
  GET_FULL_AKA  = True ;  //Download 'Also Known As' provider page for retreive the info. Otherwise only the info of the principal movie page.
  GET_FULL_CREDIT  = True ;  //Download Cast or Credit provider page for retreive the info. Otherwise only the info of the principal movie page.
  PEOPLE_LIMIT  = 25;    //Limit of number of actors (cast) or of credits (crew) retrieved.
  GET_FULL_AWARDS  = True ;  //Download Awards provider page for retreive the info. Otherwise doesn't do nothingh because no info in the principal movie page.
  EVENTS_LIMIT  = 100;   //Limit of number of events (USA Academy Awards, Golden Globes, etc) to retrive awards.
  GET_FEATURES  = True ; //Retreive the original movie features (Runtime, Sound Mix, Color, Aspect Ratio).
  GET_FULL_FEATURES  = True ; //Download Technical Specs for retreive the original movie features (Runtime, Sound Mix, Color, Aspect Ratio, etc).
  MAX_IMAGE_HEIGTH  = 1200; //Heigth limit of the stored posters.
  //Process Data Config
.
.
//Download methods-------------------------------------------------------------------------------------------------------
  dmGET           = 0;
  dmPOST          = 1;
//Movie fields----------------------------------------------------------------------------------------------------------
   mfURL      = 0;
   mfTitle      = 1;
   mfOrigTitle   = 2;
   mfAka      = 3;
   mfYear      = 4;
   mfGenre      = 5;
   mfCategory   = 6;
   mfCountry   = 7;
   mfStudio      = 8;
   mfMPAA      = 9;
   mfRating      = 10;   //This is 'Additional rating', not 'Rating'
   mfTags      = 11;
   mfTagline   = 12;
   mfDescription   = 13;
   mfDuration   = 14;
   mfFeatures   = 15;
//Credits types----------------------------------------------------------------------------------------------------------
--- End quote ---

Function ParsePage_IMDBMovieBASE


--- Quote ---Function ParsePage_IMDBMovieBASE(HTML:String):Cardinal; //BlockOpen
    //Returns:
    //     Result:=prFinished; Script has finished gathering data
    //     Result:=prError; If żany big problem? with exit;
    //Retrieve: ~title~, ~year~, ~origtitle~, ~poster~ / ~imdbrating~, ~IMDB_Votes~ (Custom Field) / ~TOP_250~(Custom Field) /
    //          If Not(GET_FULL_CREDIT): ~crew~ctDirectors,ctWriters,ctComposers,ctProducers(Not in base page), ctActors
    //         ~description~ / ~category~ "keywords" / ~tagline~ / ~genre~
    //         If Not(GET_FULL_MPAA) ~mpaa~
    //         ~country~ / ~rdate~ in contry provider local IP geolocation
    //         If Not(GET_FULL_AKA) ~aka~.
    //         ~budget~ / ~money~ / ~studio~ "Production Co"
   //         If GET_FEATURES ~features~   
    //         //If Not(GET_FULL_FEATURES) ~features~
  Var
      curPos,endPos,index:Integer;
      ItemValue,ItemList,ImageFile:String;
      titleValue:String;
      Name,Role,PersonURL:String;
  Begin
    LogMessage('Function ParsePage_IMDBMovieBASE BEGIN======================|');
    Result:=prFinished;  //It will change to prError if any big problem with exit;
    //Because the script don't retreives the data in the page order a token search for the first curPos position or block select is mandatory
    //Get ~title~, ~year~, ~origtitle~, ~poster~
    //Get all "raw" title summary (in raw because we need the hidden links, we avoid "complete" token in strings which opens/closes)
    //ItemList:=TextBetWeenFirst(HTML,'<div class="title_wrapper"','div class="plot_summary_wrapper">'); //WEB_SPECIFIC.
    ItemList:=TextBetWeenFirst(HTML,'<div class="title_block"','div class="plot_summary_wrapper">'); //WEB_SPECIFIC.   
    If (Length(ItemList)>0) Then Begin           
        //titleValue:=TextBetWeenFirst(ItemList,'<h1 itemprop="name" class="">','<');       //Strings which opens/closes the data. WEB_SPECIFIC
        titleValue:=TextBetWeenFirst(ItemList,'<h1 class="">','<');       //Strings which opens/closes the data. WEB_SPECIFIC      
        AddFieldValueXML('title',titleValue);
        LogMessage('      Get result title:'+titleValue+'||');
        ItemValue:=TextBetWeenFirst(ItemList,'(','–');                                 //Strings which opens/closes the data. WEB_SPECIFIC   
        LogMessage('      Get result year_00:'+TextBetWeenFirst(ItemList,'(','–')+'||');           
        AddFieldValueXML('year',ItemValue);
        LogMessage('      Get result year_0:'+ItemValue+'||');      
        //ItemValue:=TextBetWeenFirst(ItemList,'(',')');                                 //Strings which opens/closes the data. WEB_SPECIFIC
        ItemValue:=TextBetWeenFirst(ItemList,'<span id="titleYear">(',')</span>            </h1>');                                 //Strings which opens/closes the data. WEB_SPECIFIC      
        AddFieldValueXML('year',ItemValue);
        LogMessage('      Get result year:'+ItemValue+'||');
        ItemValue:=TextBetWeenFirst(ItemList,'<div class="originalTitle">','<');          //Strings which opens/closes the data. WEB_SPECIFIC
        If (Length(ItemValue)=0) Then ItemValue:=titleValue;                           //Provider hides the original title if same that title. WEB_SPECIFIC
        AddFieldValueXML('origtitle',ItemValue);
        LogMessage('      Get result origtitle:'+ItemValue+'||');
        ItemValue:=TextBetWeenFirst(ItemList,BASE_URL_IMAGE_PRE_TRUE,'.');                 //Get poster code. Strings which opens/closes the data. WEB_SPECIFIC
        If (Length(ItemValue)>0) then begin
            ItemValue:=BASE_URL_IMAGE_PRE_TRUE + ItemValue;                             //Base poster URL without '.jpg'. WEB_SPECIFIC
            ImageFile:=GetAppPath+'Scripts\'+BASE_DOWNLOAD_FILE_IMAGE_NAME+'-Poster.jpg'
            // Avoid HTTPS redirection: Download https image to file  OutPutFile=
            If (1=DownloadImage(ItemValue + '._V1_UY' + IntToStr(MAX_IMAGE_HEIGTH) + '_.jpg',ImageFile)) then begin  //Dowload in the selected user max size. WEB_SPECIFIC
                AddImageURL(itPoster,ImageFile);    //Get the photo from a file even if the next line we log the true URL.
                LogMessage('      Get result poster:'+ItemValue + '._V1_UY' + IntToStr(MAX_IMAGE_HEIGTH) + '_.jpg'+'||');
            end else if (1=DownloadImage(ItemValue +'.jpg',ImageFile)) then begin  //Dowload in the web base size. WEB_SPECIFIC
                AddImageURL(itPoster,ImageFile);    //Get the photo from a file even if the next line we log the true URL.
                LogMessage('      Get result poster:' + ItemValue + '.jpg'+'||');
            end;       
        end;
    End;
    //Get ~imdbrating~, ~IMDB_Votes~
    //curPos:=Pos('<span itemprop="ratingValue">',HTML);                      //WEB_SPECIFIC.
    curPos:=Pos('<div class="ratingValue">',HTML);                      //WEB_SPECIFIC.
    If 0<curPos Then Begin                         
       //ItemValue:=TextBetWeen(HTML,'<span itemprop="ratingValue">','<',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=TextBetWeen(HTML,'<span>','<',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=StringReplace(ItemValue,',','.',True,True,False);                           //Decimal comma spanish separator to point english separator.
       AddFieldValueXML('imdbrating',ItemValue);
       LogMessage('      Get result imdbrating:'+ItemValue+'||');
       //ItemValue:=TextBetWeen(HTML,'<span class="small" itemprop="ratingCount">','</span>',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=TextBetWeen(HTML,'<span class="small">','</span>',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC      
       ItemValue:=StringReplace(ItemValue,'.','',True,True,False);            //Remove thousand point spanish separator.
       ItemValue:=StringReplace(ItemValue,',','',True,True,False);            //Remove thousand comma english separator.
       AddCustomFieldValueByName('IMDB_Votes',ItemValue);
       LogMessage('      Get result IMDB_Votes:'+ItemValue+'||');
    End;
    //Get ~TOP_250~
    curPos:=Pos('Top Rated Movies #',HTML);                                 //WEB_SPECIFIC.
    If 0<curPos Then Begin
       ItemValue:=TextBetWeen(HTML,'Top Rated Movies #','<',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
       AddCustomFieldValueByName('TOP_250',ItemValue);
       LogMessage('      Get result TOP_250:'+ItemValue+'||');
    End;
    //Get ~Bottom 100~
    curPos:=Pos('Bottom Rated Movies #',HTML);                                 //WEB_SPECIFIC.
    If 0<curPos Then Begin
       ItemValue:=TextBetWeen(HTML,'Bottom Rated Movies #','<',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
       AddCustomFieldValueByName('Bottom 100',ItemValue);
       LogMessage('      Get result Bottom_100:'+ItemValue+'||');
    End;
    //~Metascore:~
    curPos := PosFrom('> <div class="metacriticScore score_favorable titleReviewBarSubItem">',HTML,EndPos);
    if curPos>0 then begin
        curPos:=curPos+Length('> <div class="metacriticScore score_favorable titleReviewBarSubItem">');
        EndPos:=PosFrom('</span>',HTML,curPos);
        ItemValue:=RemoveTags(Copy(HTML,curPos,endPos-curPos+2),false);
        ItemValue:=StringReplace(ItemValue,'0</',',0',True,False,True);
        ItemValue:=StringReplace(ItemValue,'1</',',1',True,False,True);
        ItemValue:=StringReplace(ItemValue,'2</',',2',True,False,True);
        ItemValue:=StringReplace(ItemValue,'3</',',3',True,False,True);
        ItemValue:=StringReplace(ItemValue,'4</',',4',True,False,True);
        ItemValue:=StringReplace(ItemValue,'5</',',5',True,False,True);
        ItemValue:=StringReplace(ItemValue,'6</',',6',True,False,True);
        ItemValue:=StringReplace(ItemValue,'7</',',7',True,False,True);
        ItemValue:=StringReplace(ItemValue,'8</',',8',True,False,True);
        ItemValue:=StringReplace(ItemValue,'9</',',9',True,False,True);
            //AddFieldValueXML('orname',RATING_NAME1);
            //AddFieldValueXML('orating',ItemValue);   
        AddCustomFieldValueByName('Metascore',FloatToStr((StrToFloat(ItemValue) * 1)));
    End;
    //~Metascore:~
    curPos:=PosFrom('> <div class="metacriticScore score_unfavorable titleReviewBarSubItem">',HTML,EndPos);
    if curPos>0 then begin
        curPos:=curPos+Length('> <div class="metacriticScore score_unfavorable titleReviewBarSubItem">');
        EndPos:=PosFrom('</span>',HTML,curPos);
        ItemValue:=RemoveTags(Copy(HTML, curPos,endPos-curPos+2),false);
        ItemValue:=StringReplace(ItemValue,'0</',',0',True,False,True);
        ItemValue:=StringReplace(ItemValue,'1</',',1',True,False,True);
        ItemValue:=StringReplace(ItemValue,'2</',',2',True,False,True);
        ItemValue:=StringReplace(ItemValue,'3</',',3',True,False,True);
        ItemValue:=StringReplace(ItemValue,'4</',',4',True,False,True);
        ItemValue:=StringReplace(ItemValue,'5</',',5',True,False,True);
        ItemValue:=StringReplace(ItemValue,'6</',',6',True,False,True);
        ItemValue:=StringReplace(ItemValue,'7</',',7',True,False,True);
        ItemValue:=StringReplace(ItemValue,'8</',',8',True,False,True);
        ItemValue:=StringReplace(ItemValue,'9</',',9',True,False,True);
            //AddFieldValueXML('orname',RATING_NAME1);
            //AddFieldValueXML('orating',ItemValue);   
        AddCustomFieldValueByName('Metascore',FloatToStr((StrToFloat(ItemValue) * 1)));
    End;    
    //Get Cast. GET_FULL_CREDIT = False only the info of the principal movie page.
.
.
    //Get ~plot_summary~
    If Pos('<div class="plot_summary',HTML)>0 then begin
    EndPos:=Pos('<div class="plot_summary',HTML);
    curPos:=Pos('<div class="summary_text">',HTML);                                 //WEB_SPECIFIC.
       If 0<curPos Then Begin
          ItemValue:=TextBetWeen(HTML,'<div class="summary_text">','</div>',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
          ItemValue:=StringReplace(ItemValue,',            ',', ',True,False,True);
          ItemValue:=StringReplace(ItemValue,'            ',' ',True,False,True);
          ItemValue:=StringReplace(ItemValue,'        ',' ',True,False,True);
          ItemValue:=StringReplace(ItemValue,'Add a Plot »','',True,False,True);
          ItemValue:=StringReplace(ItemValue,'See full summary »','',True,False,True);
          curPos:=Pos('var ue_t0=ue_t0||+new Date();var ',ItemValue);
          If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);
          AddFieldValueXML('comment',ItemValue);
          LogMessage('      Get result plot_summary description:'+ItemValue+'||');
       End;
    End;[/b]
.
.
 
--- End quote ---

Ivek23:
IMDB_ [EN] [HTTPS] script

Part 2:

Function ParsePage_IMDBMovieBASE


--- Quote ---
    //Get ~plot_summary~
    If Pos('<div class="plot_summary',HTML)>0 then begin
    EndPos:=Pos('<div class="plot_summary',HTML);
    curPos:=Pos('<div class="summary_text">',HTML);                                 //WEB_SPECIFIC.
       If 0<curPos Then Begin
          ItemValue:=TextBetWeen(HTML,'<div class="summary_text">','</div>',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
          ItemValue:=StringReplace(ItemValue,',            ',', ',True,False,True);
          ItemValue:=StringReplace(ItemValue,'            ',' ',True,False,True);
          ItemValue:=StringReplace(ItemValue,'        ',' ',True,False,True);
          ItemValue:=StringReplace(ItemValue,'Add a Plot »','',True,False,True);
          ItemValue:=StringReplace(ItemValue,'See full summary »','',True,False,True);
          curPos:=Pos('var ue_t0=ue_t0||+new Date();var ',ItemValue);
          If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);
          AddFieldValueXML('comment',ItemValue);
          LogMessage('      Get result plot_summary description:'+ItemValue+'||');
       End;
    End;
.
.
    //Get ~category~ "keywords" (field with several values in a comma separated list)
    curPos:=Pos('<h4 class="inline">Plot Keywords:</h4>',HTML);                                      //WEB_SPECIFIC.IC.
    If 0<curPos Then Begin       
       ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Plot Keywords:</h4>','<a href="/title/',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=StringReplace(ItemValue,'|',',',True,True,False);                                  //Change the separator to comma
       //AddFieldValueXML('category',ItemValue);
      AddFieldValueXML('tags',ItemValue);       
       LogMessage('      Get results Category:'+ItemValue+'||');
    End;
.
.
    //Get ~mpaa~. GET_FULL_MPAA = False only the info of the principal movie page.
    If Not(GET_FULL_MPAA) Then Begin
       //The text can be "Certificate:" or "Motion Picture Rating" but alway after genres.
       If 0<curPos Then Begin
          ItemValue:=TextBetWeen(HTML,'<span itemprop="contentRating">','</span>',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
          AddFieldValueXML('mpaa',ItemValue);
          LogMessage('      Get result mpaa:'+ItemValue+'||');
       End;
    End;
    //Get ~origlang~ (several values in a comma separated list)
    curPos:=Pos('<h4 class="inline">Language:</h4>',HTML);                                      //WEB_SPECIFIC.
    if 0<curPos then begin       
       ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Language:</h4>','</div>',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
       curPos:=Pos('            |',ItemValue);
       If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);     
       AddFieldValueXML('origlang',ItemValue);
       LogMessage('      Get results Language:'+ItemValue+'||');
    End;
    //Get ~country~ (several values in a comma separated list)
    curPos:=Pos('<h4 class="inline">Country:</h4>',HTML);                                      //WEB_SPECIFIC.
    If 0<curPos Then Begin       
       ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Country:</h4>','</div>',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=StringReplace(ItemValue,'|',',',True,True,False);                           //Change the separator to comma
       AddFieldValueXML('country',ItemValue);
       LogMessage('      Get results Country:'+ItemValue+'||');
    End;
    //Get ~rdate~ in contry provider local IP geolocation. See: http://sobizarre-en.blogspot.fr/2014/12/how-to-easily-defeat-imdb-geolocation.html
    curPos:=Pos('<h4 class="inline">Release Date:</h4>',HTML);                                      //WEB_SPECIFIC
    If 0<curPos Then Begin       
        ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Release Date:</h4>','(',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
        ItemValue:=StringReplace(ItemValue,'January','1',true,true,true);     //Format web corrections. WEB_SPECIFIC
        ItemValue:=StringReplace(ItemValue,'February','2',true,true,true);
        ItemValue:=StringReplace(ItemValue,'March','3',true,true,true);
        ItemValue:=StringReplace(ItemValue,'April','4',true,true,true);
        ItemValue:=StringReplace(ItemValue,'May','5',true,true,true);
        ItemValue:=StringReplace(ItemValue,'June','6',true,true,true);
        ItemValue:=StringReplace(ItemValue,'July','7',true,true,true);
        ItemValue:=StringReplace(ItemValue,'August','8',true,true,true);
        ItemValue:=StringReplace(ItemValue,'September','9',true,true,true);
        ItemValue:=StringReplace(ItemValue,' October ',' 10 ',true,true,true);
        ItemValue:=StringReplace(ItemValue,' November ',' 11 ',true,true,true);
        ItemValue:=StringReplace(ItemValue,' December ',' 12 ',true,true,true);
            //ItemValue:=StringReplace(ItemValue,' ','.',True,False,True);
        ItemValue:=StringReplace(ItemValue,' ',#46,True,False,True);
        ItemValue:=StringReplace(ItemValue,'October.','10.',True,True,True);
        ItemValue:=StringReplace(ItemValue,'November.','11.',True,True,True);       
        ItemValue:=StringReplace(ItemValue,'December.','12.',True,True,True);     //Format web corrections. WEB_SPECIFIC
        AddFieldValueXML('rdate',ItemValue);
        LogMessage('      Get results Release Date:'+ItemValue+'||');
    End;       
    //Get ~aka~. GET_FULL_AKA = False only the info of the principal movie page.       
    If Not(GET_FULL_AKA) Then Begin
       curPos:=Pos('<h4 class="inline">Also Known As:</h4>',HTML);                                      //WEB_SPECIFIC
       If 0<curPos Then Begin
          ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Also Known As:</h4>','<',false,curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
          AddFieldValueXML('aka',ItemValue);
          LogMessage('      Get result aka:'+ItemValue+'||');
       End;
    End;
    //Get ~budget~
    curPos:=Pos('<h4 class="inline">Budget:</h4>',HTML);                                      //WEB_SPECIFIC
    If 0<curPos Then Begin       
       ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Budget:</h4>','<',false,curPos);      //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=StringReplace(ItemValue,#36,'',True,True,False);        //Eliminate '$' if exists 
       ItemValue:=StringReplace(ItemValue,'€','',True,True,False);        //Eliminate '€' if exists         
       ItemValue:=StringReplace(ItemValue,',','',True,True,False);        //Eliminate ',' if exists             
       AddFieldValueXML('budget',ItemValue);
       LogMessage('      Get results Budget:'+ItemValue+'||');
    End;       
    //Get ~money~ Box Office worldwide     
.
.
    //Get ~studio~ "Production Co" (several values in a comma separated list)
    curPos:=Pos('<h4 class="inline">Production Co:</h4>',HTML);                                      //WEB_SPECIFIC.IC.
    If 0<curPos Then Begin       
       ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Production Co:</h4>','</span>',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=StringReplace(ItemValue,'See more »','',True,False,True);      
       ItemValue:=StringReplace(ItemValue,'See more','',True,True,False);                           //Eliminate 'See more' if exists           
       ItemValue:=StringReplace(ItemValue,', The','',True,False,True);
       ItemValue:=StringReplace(ItemValue,'The, ','',True,False,True);
       //ItemValue:=StringReplace(ItemValue,'The ','',True,False,True);      
       AddFieldValueXML('studio',ItemValue);
       LogMessage('      Get results Studio/Production Co:'+ItemValue+'||');
    End;
    //Get ~features~ (multiline) GET_FULL_FEATURES = False only the info of the principal movie page.
    //If Not(GET_FULL_FEATURES) Then Begin
    If GET_FEATURES Then Begin
        ItemList:='';
        //Get "Runtime" (in minutes)
        curPos:=Pos('<h4 class="inline">Runtime:</h4>',HTML);                                      //WEB_SPECIFIC.
        If 0<curPos Then Begin 
            ItemValue:=TextBetWeen(HTML,'M">',' min</time>',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
            ItemList:=ItemList+'Original Runtime: '+ItemValue+' min.<br>';
            LogMessage('      Get result Original Runtime:'+ItemValue+'||');
            AddFieldValue(mfDuration,ItemValue);
            LogMessage('      Get result lenght:'+ItemValue+'||');
        End;   
.
.
        if (Length(ItemList)>0) then begin
            AddFieldValueXML('features',ItemList);
            LogMessage('      Get result Movie Features:'+ItemList+'||');
        End;
    End;
    //Get ~User Reviews~
    If Pos('<h2>User Reviews</h2>',HTML)>0 then begin
    EndPos:=Pos('<h2>User Reviews</h2>',HTML);
    curPos:=Pos('<h2>User Reviews</h2>',HTML);                                 //WEB_SPECIFIC.
       If 0<curPos Then Begin
        curPos:=PosFrom('<p>',HTML,EndPos)+Length('<p>');
          EndPos:=PosFrom('</span>',HTML,curPos);
          ItemValue:=Copy(HTML,curPos,endPos-curPos);   //Strings which opens/closes the data. WEB_SPECIFIC
        ItemValue:=StringReplace(ItemValue,'</p>','',True,False,True);
        ItemValue:=StringReplace(ItemValue,'</div>','',True,False,True);
        ItemValue:=StringReplace(ItemValue,'<ul><li>','',True,False,True);            
        ItemValue:=StringReplace(ItemValue,'</li></ul>','',True,False,True);
        ItemValue:=StringReplace(ItemValue,':)','',True,False,True);   
        curPos:=Pos('Find showtimes, ',ItemValue);
        If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);
          AddCustomFieldValueByName('User Comments',ItemValue);
        AddCustomFieldValueByName('User Reviews',ItemValue);
          LogMessage('      Get result User Reviews: <br>'+ItemValue+'||');
       End;
    End;      
    LogMessage('Function ParsePage_IMDBMovieBASE END=====================||');
  End; //BlockClose
--- End quote ---

//Get ~category~ "keywords"


--- Quote ---    //Get ~category~ "keywords" (field with several values in a comma separated list)
    curPos:=Pos('<h4 class="inline">Plot Keywords:</h4>',HTML);                                      //WEB_SPECIFIC.IC.
    If 0<curPos Then Begin       
       ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Plot Keywords:</h4>','<a href="/title/',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
       ItemValue:=StringReplace(ItemValue,'|',',',True,True,False);                                  //Change the separator to comma
       //AddFieldValueXML('category',ItemValue);
      AddFieldValueXML('tags',ItemValue);       
       LogMessage('      Get results Category:'+ItemValue+'||');
    End;
--- End quote ---

As for ~ category ~ "keywords" IMDB has never, explicitly repeat I never knew the ~ category ~ "keywords" as they say, have the AllMovie site. This has always been taken as ~ tags ~ "keywords" and information has always been transferred to the Tags field in dababase.

For example for 21 movie:

IMDB "keywords"

--- Quote --- Plot Keywords: based on true story | card counting | blackjack | massachusetts institute of technology | gambling | See All (109) »
--- End quote ---
https://www.imdb.com/title/tt0478087/

AllMovie "keywords"
https://www.allmovie.com/movie/21-v386347

--- Quote ---Keywords
betrayal, blackjack [card game], casino, college-student
--- End quote ---
AllMovie "category"

--- Quote ---Themes
Gambling | Schemes and Ruses | Teachers and Students | Betrayal | Crime Gone Awry | Dishonor Among Thieves | Perfect Crime
--- End quote ---
https://www.allmovie.com/movie/21-v386347

It has been used since the creation of the PVD program, or it has been almost ten years since, for this I know that it was added to the database

Ivek23:
IMDB_ [EN] [HTTPS] script

//Get ~features~ (multiline)


--- Quote ---    //Get ~features~ (multiline) GET_FULL_FEATURES = False only the info of the principal movie page.
    //If Not(GET_FULL_FEATURES) Then Begin
    If GET_FEATURES Then Begin
        ItemList:='';
        //Get "Runtime" (in minutes)
        curPos:=Pos('<h4 class="inline">Runtime:</h4>',HTML);                                      //WEB_SPECIFIC.
        If 0<curPos Then Begin 
            ItemValue:=TextBetWeen(HTML,'M">',' min</time>',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
            ItemList:=ItemList+'Original Runtime: '+ItemValue+' min.<br>';
            LogMessage('      Get result Original Runtime:'+ItemValue+'||');
            AddFieldValue(mfDuration,ItemValue);
            LogMessage('      Get result lenght:'+ItemValue+'||');
        End;   
.
.
        if (Length(ItemList)>0) then begin
            AddFieldValueXML('features',ItemList);
            LogMessage('      Get result Movie Features:'+ItemList+'||');
        End;
    End;
--- End quote ---

Here is a change because this is a bit different information than in the Technical Specifications section.

Get ~features~

I've repaired parts of the Information Transfer Code for the Technical Specifications section.


--- Quote ---Function ParsePage_IMDBMovieTECHNICAL(HTML:String):Cardinal; //BlockOpen
    //Returns:
    //     Result:=prFinished; Script has finished gathering data
    //     Result:=prError; If żany big problem? with exit
    //Retrieve: ~features~
  Var
    //curPos:Integer;
    curPos,endPos:Integer;
    ItemValue,ItemList:String;     
  Begin
.
.
    //Get "Runtime" info
    ItemValue:=HTMLValues2(HTML,'Runtime','</tr>','<td>','</tr>','<br><br>',EndPos);
    ItemValue:=StringReplace(ItemValue,'                                ','<br>',True,False,True);
    ItemValue:=StringReplace(ItemValue,'                      ','<br>',True,False,True);
    ItemValue:=StringReplace(ItemValue,'          ','',True,False,True);
    LogMessage('      Get result Runtime:'+ItemValue+'||');
    if ItemValue <> '' then ItemList:=ItemList+'Runtime'+'<br>'+ItemValue+'<br><br>'; 
         curPos:=Pos('<td class="label"> Runtime </td>',HTML);                                      //WEB_SPECIFIC.
         If 0<curPos Then Begin 
            ItemValue:=TextBetWeen(HTML,' (',' min)',false,curPos);  //Strings which opens/closes the data. WEB_SPECIFIC
            AddFieldValue(mfDuration,ItemValue);
            //AddFieldValueXML('lenght',ItemValue);
            LogMessage('      Get result Original Runtime_1:'+ItemValue+'||');
         End;         
.
.
    //Get "Color" info
    ItemValue:=HTMLValues2(HTML,'Color','</tr>','<td>','</tr>','<br><br>',EndPos);
    ItemValue:=StringReplace(ItemValue,'                     |          ',' • ',True,False,True);
    ItemValue:=StringReplace(ItemValue,'             |         ',' • ',True,False,True);
    ItemValue:=StringReplace(ItemValue,'        ',' ',True,False,True);
   ItemValue:=StringReplace(ItemValue,'  • ',' • ',True,False,True);
    LogMessage('      Get result Color:'+ItemValue+'||');
    if ItemValue <> '' then ItemList:=ItemList+'Color'+'<br>'+ItemValue+'<br><br>';
.
.         
    if (Length(ItemList)>0) then begin
        //AddFieldValueXML('features',ItemList);   //Ivek23 CustomField ~ImdbTechSpecs~
      AddCustomFieldValueByName('ImdbTechSpecs',ItemList);
        LogMessage('      Get result Movie Features:'+ItemList+'||');
    End;
    LogMessage('Function ParsePage_IMDBMovieTECHNICAL END=====================||');
  End; //BlockClose
--- End quote ---

The Technical Specifications section information is recommended to be transferred to the ~ ImdbTechSpecs ~ custom field.

IMDB_ [EN] [HTTPS] _1 script is attached.

fmb64:
Is it normal that importing from IMDb is very slow and screen is flickering from quickly disappearing black windows... also importing does hang when I run it from C:\Program Files\... instead of a temporary directory?

Navigation

[0] Message Index

[#] Next page

[*] Previous page

Go to full version