English > Support
Personal Video Database 1.0.2.7 MOD
Ivek23:
IMDB_ [EN] [HTTPS] script
ParsePage_IMDBSearchTitle
I corrected the pieces of code of search results for movies, where there are now missing posters.
Example for this link:
https://www.imdb.com/search/title?title=Lawrence+Of+Arabia&title_type=feature&release_date=,&view=simple
Poster link:
https://m.media-amazon.com/images/G/01/imdb/images/nopicture/140x209/film-4001654135._CB470041848_.png
--- Quote ---Function ParsePage_IMDBSearchTitle(HTML:String):Cardinal; //BlockOpen
//Returns:
// Result:=prDownload; (Global var DownloadURL=IMDB page) if there is one results.
// Result:=prList; if there are several results. (Don't work with Preferences/Plugings/Silent Mode).
// Result:=prError; If not results
Var
curPos,ResultsNumber:Integer;
ItemValue,ImageFile:String;
Title,Year,MovieURL,PreviewURL:String;
Begin
LogMessage('Function ParsePage_IMDBSearchTitle BEGIN======================|');
LogMessage('Global Var-Mode|'+IntToStr(Mode)+'|');
LogMessage('Global Var-DownloadURL|'+DownloadURL+'|');
If (Pos('No results',HTML)<>0) Then Begin //WEB_SPECIFIC
LogMessage(' No Movies results');
Result:=prError;
End Else Begin
LogMessage(' Parsing search Movies results');
curPos:=Pos('<div class="lister-list">',HTML); //String which opens the Web Search List data. WEB_SPECIFIC
//Get first item
ResultsNumber:=0;
curPos:=PosFrom('<div class="lister-item mode-simple">',HTML,curPos); //String which opens the Web Result List data. WEB_SPECIFIC
While curPos>0 Do Begin
ResultsNumber:=ResultsNumber+1;
LogMessage(' Parsing search movie results in '+IntToStr(curPos)+' ...');
curPos:=PosFrom('<div class="lister-item-image">',HTML,curPos); //String which opens the Web URL+Title+URL+PreviewURL data. WEB_SPECIFIC
//Get PreviewURL
//PreviewURL:='';
If POSTER_IN_SEARCH Then begin
ItemValue:=TextBetWeen(HTML,'loadlate="','"',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
If (Pos('https://m.media-amazon.com/images/G/01/imdb/images/',ItemValue)>0) Then Begin
PreviewURL:=TextBetWeenFirst(ItemValue,'https://m.media-amazon.com/images/G/01/imdb/images/','.'); //Get poster code. Strings which opens/closes the data. WEB_SPECIFIC
If (Length(PreviewURL)>0) then begin
PreviewURL:='https://m.media-amazon.com/images/G/01/imdb/images/' + PreviewURL //Base poster URL without '.jpg'. WEB_SPECIFIC
ImageFile:=GetAppPath+'Scripts\'+BASE_DOWNLOAD_FILE_IMAGE_NAME+'-SearchResult-'+IntToStr(ResultsNumber)+'-.jpg'
// Avoid HTTPS redirection: Download https image to file OutPutFile=
If (1=DownloadImage(PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg',ImageFile)) then begin //Dowload in the selected user max size. WEB_SPECIFIC
LogMessage(' Parse Results PreviewURL:'+PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg'+'||');
PreviewURL:=ImageFile; //Pass the downloaded file to PVD
end else if (1=DownloadImage(PreviewURL +'.jpg',ImageFile)) then begin //Dowload in the web base size. WEB_SPECIFIC
LogMessage(' Parse Results PreviewURL:' + PreviewURL + '.jpg'+'||');
PreviewURL:=ImageFile; //Pass the downloaded file to PVD
end;
end;
end;
//end;
//Get PreviewURL
//PreviewURL:='';
//If POSTER_IN_SEARCH Then begin
//ItemValue:=TextBetWeen(HTML,'loadlate="','"',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
If (Pos(BASE_URL_IMAGE_PRE_TRUE,ItemValue)>0) Then Begin
PreviewURL:=TextBetWeenFirst(ItemValue,BASE_URL_IMAGE_PRE_TRUE,'.'); //Get poster code. Strings which opens/closes the data. WEB_SPECIFIC
If (Length(PreviewURL)>0) then begin
PreviewURL:=BASE_URL_IMAGE_PRE_TRUE + PreviewURL //Base poster URL without '.jpg'. WEB_SPECIFIC
ImageFile:=GetAppPath+'Scripts\'+BASE_DOWNLOAD_FILE_IMAGE_NAME+'-SearchResult-'+IntToStr(ResultsNumber)+'-.jpg'
// Avoid HTTPS redirection: Download https image to file OutPutFile=
If (1=DownloadImage(PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg',ImageFile)) then begin //Dowload in the selected user max size. WEB_SPECIFIC
LogMessage(' Parse Results PreviewURL:'+PreviewURL + '._V1_UY' + IntToStr(MAX_SEARCHLIST_HEIGTH) + '_.jpg'+'||');
PreviewURL:=ImageFile; //Pass the downloaded file to PVD
end else if (1=DownloadImage(PreviewURL +'.jpg',ImageFile)) then begin //Dowload in the web base size. WEB_SPECIFIC
LogMessage(' Parse Results PreviewURL:' + PreviewURL + '.jpg'+'||');
PreviewURL:=ImageFile; //Pass the downloaded file to PVD
end;
end;
end;
end;
//Get MovieURL (Always must it has)
MovieURL:=TextBetWeen(HTML,'<a href="/title/','/',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
MovieURL:= BASE_URL_PRE+MovieURL+BASE_URL_SUF;
LogMessage(' Parse Results URL:'+MovieURL+'||');
//If true HTPPS link PVdB gives "Socket Error # 11001"
//If ancient http, get redirected and give "IOHandler value is not valid
//If invalid url, "HTTP/1.1 400 Bad Request"
MovieURL:=CHEAT_PREFIX_URL+MovieURL; //'Cheat' URL for avoid HTTPS issue in GET return.
LogMessage(' Parse Results CHEAT URL:'+MovieURL+'||');
//Get Title (Always must it has)
Title:=TextBetWeen(HTML,'>','<',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
LogMessage(' Parse Results Title:'+Title+'||');
//Get Year
Year:=TextBetWeen(HTML,'(',')',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
Year:=StringReplace(Year,'typeof uet == '+Chr(39)+'function'+Chr(39),'',True,False,False); //Eliminate the CHEAT_PREFIX_URL for get the true movie link
LogMessage(' Parse Results in Year:'+Year+'||');
AddSearchResult(Title,'',Year,MovieURL,PreviewURL);
//Just to check the website (Only to check the web page) not PVdB valid result.
if CHECK_WEBSITE then AddSearchResult(' '+#8729+' Just to check the website: '+StringReplace(MovieURL,CHEAT_PREFIX_URL,'',True,False,False),'','',StringReplace(MovieURL,CHEAT_PREFIX_URL,'',True,False,False),'');
curPos:=PosFrom('<div class="lister-item mode-simple">',HTML,curPos) //String which opens the Web Result item List data. WEB_SPECIFIC
End;
If ResultsNumber=1 Then Begin
DownloadURL:=StringReplace(MovieURL,CHEAT_PREFIX_URL,'',True,False,False); //Eliminate the CHEAT_PREFIX_URL for get the true movie link
DownloadURL:=StringReplace(DownloadURL,'http:','https:',True,True,False); //The direct url needed (no https redirection)
Result:=prDownload; //One result in the DownloadURL.
LogMessage(' After parsing search Movies,only one result detected');
End Else Begin
Result:=prList; //Don't work with Preferences/Plugings/Silent Mode.
LogMessage(' After parsing search Movies go to choose List Results');
End;
End;
LogMessage('Function ParsePage_IMDBSearchTitle END=====================||');
Exit;
End; //BlockClose
--- End quote ---
ParsePage_IMDBMovieMPAA
I corrected parts of the code for ParsePage_IMDBMovieMPAA.
--- Quote ---Function ParsePage_IMDBMovieMPAA(HTML:String):Cardinal; //BlockOpen
//Returns:
// Result:=prFinished; Script has finished gathering data
// Result:=prError; If żany big problem? with exit
//Retrieve: ~MPAA~
Var
curPos:Integer;
mpaaValue:String;
Begin
.
.
LogMessage(' MPAA.USA.Corrected:'+mpaaValue+'||');
mpaaValue:=StringReplace(mpaaValue,'update: ','',False,False,True); //WEB_SPECIFIC
LogMessage(' MPAA rating: '+mpaaValue+' ||');
//Not or bad MPAA USA then get Spain certification (if exist) and convert in USA-KODI values
.
.
End;
AddFieldValueXML('mpaa',mpaaValue);
LogMessage(' Get result MPAA='+mpaaValue+'||');
LogMessage('Function ParsePage_IMDBMovieMPAA END=====================||');
End; //BlockClose
--- End quote ---
Function ParsePage
I added part of the code for Function ParsePage.
--- Quote ---//USER FUNCTIONS AND PROCEDURES==================================================================================BlockClose
//OBLIGATORY CALLBACK PRINCIPAL FUNCTION=========================================================================BlockOpen
Function ParsePage(HTML:String;URL:AnsiString):Cardinal; //BlockOpen
Var
MovieID,titleValue,yearValue:String;
Date:String;
DateParts: TWideArray;
Begin
LogMessage('Function ParsePage BEGIN======================|');
LogMessage('Global Var-Mode|'+IntToStr(Mode)+'|');
LogMessage('Global Var-DownloadURL|'+DownloadURL+'|');
LogMessage('Local Var-URL|'+URL+'|');
//Parse return in mode 'Search Movie URL'--------------------------------------------------------------------------------
.
.
//Parse with the Movie URL 'smNormal'------------------------------------------------------------------------------------
If (Mode=smNormal) Then Begin
Result:=prFinished; //It will change to prError if any big problem with exit;
//Information in several page. Needs Provider MovieID
MovieID:=TextBetWeenFirst(DownloadURL,BASE_URL_PRE_TRUE,BASE_URL_SUF);
LogMessage(' ParsePage mode smNormal|'+IntToStr(Mode)+'|. Getting provider data for MovieID|'+ MovieID +'|');
AddCustomFieldValueByName('IMDbID',MovieID);
LogMessage(' Get result IMDbID:'+MovieID+'||');
//Get ~url~
AddFieldValueXML('url',StringReplace(DownloadURL,BASE_URL_PRE_TRUE,BASE_URL_PRE,True,False,False));
LogMessage(' Get result url:'+StringReplace(DownloadURL,BASE_URL_PRE_TRUE,BASE_URL_PRE,True,False,False)+'||');
//Parse principal provider page = BASE_URL_PRE_TRUE------------------------------------------------------------------
.
.
//Date ~Updated~ (choose simple or verbose version)
Date:=DateToStr(CurrentDateTime);
if Date <> '' then
begin
ExplodeString(Date,DateParts,'-');
Date:=DateParts[2]+'.'+ DateParts[1]+'.'+DateParts[0];
AddCustomFieldValueByName('Updated',Date+' at '+TimeToStr(CurrentDateTime)+' • '+SCRIPT_NAME+' '+SCRIPT_VERSION); // Annoying
End;
LogMessage(' Provider data info retreived Ok in '+DateToStr(CurrentDateTime)+' '+TimeToStr(CurrentDateTime)+'|');
LogMessage('Function ParsePage NORMAL END======================|');
exit;
End;
//Parse with unknow mode-----------------------------------------------------------------------------------------------
Result:=prError;
exit;
End; //BlockClose
//OBLIGATORY CALLBACK PRINCIPAL FUNCTION=========================================================================BlockClose
--- End quote ---
Ivek23:
IMDB_ [EN] [HTTPS] script
I added and Corrected part of the code for Function ParsePage_IMDBMovieBASE to now work properly.
Part 1:
//SCRIPT INFO
--- Quote ---//SCRIPT INFO================================================================================================== BlockClose
//SCRIPT CONSTANTS============================================================================================= BlockOpen
Const
//Script types-----------------------------------------------------------------------------------------------------------
stMovies = 0;
stPeople = 1;
stPoster = 2;
stScreenShots = 3;
stCover = 4;
//Script Obligatory Infos----------------------------------------------------------------------------------------------
SCRIPT_NAME = 'IMDB [EN][HTTPS]_1'; //The most complete information provider
SCRIPT_DESC = 'IMDB download Movie info [EN]_1';
SCRIPT_VERSION = '1.1.0.0';
SCRIPT_TYPE = stMovies;
SCRIPT_LANG = $09; //English //$0a es-Spanish $0c fr-French $09 en-English
RATING_NAME = 'IMDB'; //PVD has ~imdbrating~ for this value
CODE_PAGE = 65001; //28591=ISO 8859-1 Latin 1; Western European (ISO). Use: 65001=Unicode (UTF-8) | 0=for Autodetect Autodetect
//Script Options-------------------------------------------------------------------------------------------------------
//Retreive Data Config
GET_MAINPAGE_ACTORS_CREDIT = True ; //Set to True to ensure MAINPAGE_ACTORS_CREDIT info is added. Otherwise: Set to False to ensure MAINPAGE_ACTORS_CREDIT info is not added. MAINPAGE_ACTORS_CREDIT info is in to the principal movie page.
GET_FULL_ACTORS_CREDIT = True ; //Set to True to ensure FULL_ACTORS_CREDIT info is for Full Cast & Crew provider page. Otherwise: Set to False to ensure FULL_ACTORS_CREDIT info is not added.
GET_FULL_MPAA = True ; //Download Certification provider page for retreive the MPAA info. Otherwise only the info of the principal movie page.
GET_FULL_AKA = True ; //Download 'Also Known As' provider page for retreive the info. Otherwise only the info of the principal movie page.
GET_FULL_CREDIT = True ; //Download Cast or Credit provider page for retreive the info. Otherwise only the info of the principal movie page.
PEOPLE_LIMIT = 25; //Limit of number of actors (cast) or of credits (crew) retrieved.
GET_FULL_AWARDS = True ; //Download Awards provider page for retreive the info. Otherwise doesn't do nothingh because no info in the principal movie page.
EVENTS_LIMIT = 100; //Limit of number of events (USA Academy Awards, Golden Globes, etc) to retrive awards.
GET_FEATURES = True ; //Retreive the original movie features (Runtime, Sound Mix, Color, Aspect Ratio).
GET_FULL_FEATURES = True ; //Download Technical Specs for retreive the original movie features (Runtime, Sound Mix, Color, Aspect Ratio, etc).
MAX_IMAGE_HEIGTH = 1200; //Heigth limit of the stored posters.
//Process Data Config
.
.
//Download methods-------------------------------------------------------------------------------------------------------
dmGET = 0;
dmPOST = 1;
//Movie fields----------------------------------------------------------------------------------------------------------
mfURL = 0;
mfTitle = 1;
mfOrigTitle = 2;
mfAka = 3;
mfYear = 4;
mfGenre = 5;
mfCategory = 6;
mfCountry = 7;
mfStudio = 8;
mfMPAA = 9;
mfRating = 10; //This is 'Additional rating', not 'Rating'
mfTags = 11;
mfTagline = 12;
mfDescription = 13;
mfDuration = 14;
mfFeatures = 15;
//Credits types----------------------------------------------------------------------------------------------------------
--- End quote ---
Function ParsePage_IMDBMovieBASE
--- Quote ---Function ParsePage_IMDBMovieBASE(HTML:String):Cardinal; //BlockOpen
//Returns:
// Result:=prFinished; Script has finished gathering data
// Result:=prError; If żany big problem? with exit;
//Retrieve: ~title~, ~year~, ~origtitle~, ~poster~ / ~imdbrating~, ~IMDB_Votes~ (Custom Field) / ~TOP_250~(Custom Field) /
// If Not(GET_FULL_CREDIT): ~crew~ctDirectors,ctWriters,ctComposers,ctProducers(Not in base page), ctActors
// ~description~ / ~category~ "keywords" / ~tagline~ / ~genre~
// If Not(GET_FULL_MPAA) ~mpaa~
// ~country~ / ~rdate~ in contry provider local IP geolocation
// If Not(GET_FULL_AKA) ~aka~.
// ~budget~ / ~money~ / ~studio~ "Production Co"
// If GET_FEATURES ~features~
// //If Not(GET_FULL_FEATURES) ~features~
Var
curPos,endPos,index:Integer;
ItemValue,ItemList,ImageFile:String;
titleValue:String;
Name,Role,PersonURL:String;
Begin
LogMessage('Function ParsePage_IMDBMovieBASE BEGIN======================|');
Result:=prFinished; //It will change to prError if any big problem with exit;
//Because the script don't retreives the data in the page order a token search for the first curPos position or block select is mandatory
//Get ~title~, ~year~, ~origtitle~, ~poster~
//Get all "raw" title summary (in raw because we need the hidden links, we avoid "complete" token in strings which opens/closes)
//ItemList:=TextBetWeenFirst(HTML,'<div class="title_wrapper"','div class="plot_summary_wrapper">'); //WEB_SPECIFIC.
ItemList:=TextBetWeenFirst(HTML,'<div class="title_block"','div class="plot_summary_wrapper">'); //WEB_SPECIFIC.
If (Length(ItemList)>0) Then Begin
//titleValue:=TextBetWeenFirst(ItemList,'<h1 itemprop="name" class="">','<'); //Strings which opens/closes the data. WEB_SPECIFIC
titleValue:=TextBetWeenFirst(ItemList,'<h1 class="">','<'); //Strings which opens/closes the data. WEB_SPECIFIC
AddFieldValueXML('title',titleValue);
LogMessage(' Get result title:'+titleValue+'||');
ItemValue:=TextBetWeenFirst(ItemList,'(','–'); //Strings which opens/closes the data. WEB_SPECIFIC
LogMessage(' Get result year_00:'+TextBetWeenFirst(ItemList,'(','–')+'||');
AddFieldValueXML('year',ItemValue);
LogMessage(' Get result year_0:'+ItemValue+'||');
//ItemValue:=TextBetWeenFirst(ItemList,'(',')'); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=TextBetWeenFirst(ItemList,'<span id="titleYear">(',')</span> </h1>'); //Strings which opens/closes the data. WEB_SPECIFIC
AddFieldValueXML('year',ItemValue);
LogMessage(' Get result year:'+ItemValue+'||');
ItemValue:=TextBetWeenFirst(ItemList,'<div class="originalTitle">','<'); //Strings which opens/closes the data. WEB_SPECIFIC
If (Length(ItemValue)=0) Then ItemValue:=titleValue; //Provider hides the original title if same that title. WEB_SPECIFIC
AddFieldValueXML('origtitle',ItemValue);
LogMessage(' Get result origtitle:'+ItemValue+'||');
ItemValue:=TextBetWeenFirst(ItemList,BASE_URL_IMAGE_PRE_TRUE,'.'); //Get poster code. Strings which opens/closes the data. WEB_SPECIFIC
If (Length(ItemValue)>0) then begin
ItemValue:=BASE_URL_IMAGE_PRE_TRUE + ItemValue; //Base poster URL without '.jpg'. WEB_SPECIFIC
ImageFile:=GetAppPath+'Scripts\'+BASE_DOWNLOAD_FILE_IMAGE_NAME+'-Poster.jpg'
// Avoid HTTPS redirection: Download https image to file OutPutFile=
If (1=DownloadImage(ItemValue + '._V1_UY' + IntToStr(MAX_IMAGE_HEIGTH) + '_.jpg',ImageFile)) then begin //Dowload in the selected user max size. WEB_SPECIFIC
AddImageURL(itPoster,ImageFile); //Get the photo from a file even if the next line we log the true URL.
LogMessage(' Get result poster:'+ItemValue + '._V1_UY' + IntToStr(MAX_IMAGE_HEIGTH) + '_.jpg'+'||');
end else if (1=DownloadImage(ItemValue +'.jpg',ImageFile)) then begin //Dowload in the web base size. WEB_SPECIFIC
AddImageURL(itPoster,ImageFile); //Get the photo from a file even if the next line we log the true URL.
LogMessage(' Get result poster:' + ItemValue + '.jpg'+'||');
end;
end;
End;
//Get ~imdbrating~, ~IMDB_Votes~
//curPos:=Pos('<span itemprop="ratingValue">',HTML); //WEB_SPECIFIC.
curPos:=Pos('<div class="ratingValue">',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
//ItemValue:=TextBetWeen(HTML,'<span itemprop="ratingValue">','<',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=TextBetWeen(HTML,'<span>','<',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,',','.',True,True,False); //Decimal comma spanish separator to point english separator.
AddFieldValueXML('imdbrating',ItemValue);
LogMessage(' Get result imdbrating:'+ItemValue+'||');
//ItemValue:=TextBetWeen(HTML,'<span class="small" itemprop="ratingCount">','</span>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=TextBetWeen(HTML,'<span class="small">','</span>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'.','',True,True,False); //Remove thousand point spanish separator.
ItemValue:=StringReplace(ItemValue,',','',True,True,False); //Remove thousand comma english separator.
AddCustomFieldValueByName('IMDB_Votes',ItemValue);
LogMessage(' Get result IMDB_Votes:'+ItemValue+'||');
End;
//Get ~TOP_250~
curPos:=Pos('Top Rated Movies #',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'Top Rated Movies #','<',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
AddCustomFieldValueByName('TOP_250',ItemValue);
LogMessage(' Get result TOP_250:'+ItemValue+'||');
End;
//Get ~Bottom 100~
curPos:=Pos('Bottom Rated Movies #',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'Bottom Rated Movies #','<',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
AddCustomFieldValueByName('Bottom 100',ItemValue);
LogMessage(' Get result Bottom_100:'+ItemValue+'||');
End;
//~Metascore:~
curPos := PosFrom('> <div class="metacriticScore score_favorable titleReviewBarSubItem">',HTML,EndPos);
if curPos>0 then begin
curPos:=curPos+Length('> <div class="metacriticScore score_favorable titleReviewBarSubItem">');
EndPos:=PosFrom('</span>',HTML,curPos);
ItemValue:=RemoveTags(Copy(HTML,curPos,endPos-curPos+2),false);
ItemValue:=StringReplace(ItemValue,'0</',',0',True,False,True);
ItemValue:=StringReplace(ItemValue,'1</',',1',True,False,True);
ItemValue:=StringReplace(ItemValue,'2</',',2',True,False,True);
ItemValue:=StringReplace(ItemValue,'3</',',3',True,False,True);
ItemValue:=StringReplace(ItemValue,'4</',',4',True,False,True);
ItemValue:=StringReplace(ItemValue,'5</',',5',True,False,True);
ItemValue:=StringReplace(ItemValue,'6</',',6',True,False,True);
ItemValue:=StringReplace(ItemValue,'7</',',7',True,False,True);
ItemValue:=StringReplace(ItemValue,'8</',',8',True,False,True);
ItemValue:=StringReplace(ItemValue,'9</',',9',True,False,True);
//AddFieldValueXML('orname',RATING_NAME1);
//AddFieldValueXML('orating',ItemValue);
AddCustomFieldValueByName('Metascore',FloatToStr((StrToFloat(ItemValue) * 1)));
End;
//~Metascore:~
curPos:=PosFrom('> <div class="metacriticScore score_unfavorable titleReviewBarSubItem">',HTML,EndPos);
if curPos>0 then begin
curPos:=curPos+Length('> <div class="metacriticScore score_unfavorable titleReviewBarSubItem">');
EndPos:=PosFrom('</span>',HTML,curPos);
ItemValue:=RemoveTags(Copy(HTML, curPos,endPos-curPos+2),false);
ItemValue:=StringReplace(ItemValue,'0</',',0',True,False,True);
ItemValue:=StringReplace(ItemValue,'1</',',1',True,False,True);
ItemValue:=StringReplace(ItemValue,'2</',',2',True,False,True);
ItemValue:=StringReplace(ItemValue,'3</',',3',True,False,True);
ItemValue:=StringReplace(ItemValue,'4</',',4',True,False,True);
ItemValue:=StringReplace(ItemValue,'5</',',5',True,False,True);
ItemValue:=StringReplace(ItemValue,'6</',',6',True,False,True);
ItemValue:=StringReplace(ItemValue,'7</',',7',True,False,True);
ItemValue:=StringReplace(ItemValue,'8</',',8',True,False,True);
ItemValue:=StringReplace(ItemValue,'9</',',9',True,False,True);
//AddFieldValueXML('orname',RATING_NAME1);
//AddFieldValueXML('orating',ItemValue);
AddCustomFieldValueByName('Metascore',FloatToStr((StrToFloat(ItemValue) * 1)));
End;
//Get Cast. GET_FULL_CREDIT = False only the info of the principal movie page.
.
.
//Get ~plot_summary~
If Pos('<div class="plot_summary',HTML)>0 then begin
EndPos:=Pos('<div class="plot_summary',HTML);
curPos:=Pos('<div class="summary_text">',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<div class="summary_text">','</div>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,', ',', ',True,False,True);
ItemValue:=StringReplace(ItemValue,' ',' ',True,False,True);
ItemValue:=StringReplace(ItemValue,' ',' ',True,False,True);
ItemValue:=StringReplace(ItemValue,'Add a Plot »','',True,False,True);
ItemValue:=StringReplace(ItemValue,'See full summary »','',True,False,True);
curPos:=Pos('var ue_t0=ue_t0||+new Date();var ',ItemValue);
If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);
AddFieldValueXML('comment',ItemValue);
LogMessage(' Get result plot_summary description:'+ItemValue+'||');
End;
End;[/b]
.
.
--- End quote ---
Ivek23:
IMDB_ [EN] [HTTPS] script
Part 2:
Function ParsePage_IMDBMovieBASE
--- Quote ---
//Get ~plot_summary~
If Pos('<div class="plot_summary',HTML)>0 then begin
EndPos:=Pos('<div class="plot_summary',HTML);
curPos:=Pos('<div class="summary_text">',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<div class="summary_text">','</div>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,', ',', ',True,False,True);
ItemValue:=StringReplace(ItemValue,' ',' ',True,False,True);
ItemValue:=StringReplace(ItemValue,' ',' ',True,False,True);
ItemValue:=StringReplace(ItemValue,'Add a Plot »','',True,False,True);
ItemValue:=StringReplace(ItemValue,'See full summary »','',True,False,True);
curPos:=Pos('var ue_t0=ue_t0||+new Date();var ',ItemValue);
If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);
AddFieldValueXML('comment',ItemValue);
LogMessage(' Get result plot_summary description:'+ItemValue+'||');
End;
End;
.
.
//Get ~category~ "keywords" (field with several values in a comma separated list)
curPos:=Pos('<h4 class="inline">Plot Keywords:</h4>',HTML); //WEB_SPECIFIC.IC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Plot Keywords:</h4>','<a href="/title/',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'|',',',True,True,False); //Change the separator to comma
//AddFieldValueXML('category',ItemValue);
AddFieldValueXML('tags',ItemValue);
LogMessage(' Get results Category:'+ItemValue+'||');
End;
.
.
//Get ~mpaa~. GET_FULL_MPAA = False only the info of the principal movie page.
If Not(GET_FULL_MPAA) Then Begin
//The text can be "Certificate:" or "Motion Picture Rating" but alway after genres.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<span itemprop="contentRating">','</span>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
AddFieldValueXML('mpaa',ItemValue);
LogMessage(' Get result mpaa:'+ItemValue+'||');
End;
End;
//Get ~origlang~ (several values in a comma separated list)
curPos:=Pos('<h4 class="inline">Language:</h4>',HTML); //WEB_SPECIFIC.
if 0<curPos then begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Language:</h4>','</div>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
curPos:=Pos(' |',ItemValue);
If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);
AddFieldValueXML('origlang',ItemValue);
LogMessage(' Get results Language:'+ItemValue+'||');
End;
//Get ~country~ (several values in a comma separated list)
curPos:=Pos('<h4 class="inline">Country:</h4>',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Country:</h4>','</div>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'|',',',True,True,False); //Change the separator to comma
AddFieldValueXML('country',ItemValue);
LogMessage(' Get results Country:'+ItemValue+'||');
End;
//Get ~rdate~ in contry provider local IP geolocation. See: http://sobizarre-en.blogspot.fr/2014/12/how-to-easily-defeat-imdb-geolocation.html
curPos:=Pos('<h4 class="inline">Release Date:</h4>',HTML); //WEB_SPECIFIC
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Release Date:</h4>','(',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'January','1',true,true,true); //Format web corrections. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'February','2',true,true,true);
ItemValue:=StringReplace(ItemValue,'March','3',true,true,true);
ItemValue:=StringReplace(ItemValue,'April','4',true,true,true);
ItemValue:=StringReplace(ItemValue,'May','5',true,true,true);
ItemValue:=StringReplace(ItemValue,'June','6',true,true,true);
ItemValue:=StringReplace(ItemValue,'July','7',true,true,true);
ItemValue:=StringReplace(ItemValue,'August','8',true,true,true);
ItemValue:=StringReplace(ItemValue,'September','9',true,true,true);
ItemValue:=StringReplace(ItemValue,' October ',' 10 ',true,true,true);
ItemValue:=StringReplace(ItemValue,' November ',' 11 ',true,true,true);
ItemValue:=StringReplace(ItemValue,' December ',' 12 ',true,true,true);
//ItemValue:=StringReplace(ItemValue,' ','.',True,False,True);
ItemValue:=StringReplace(ItemValue,' ',#46,True,False,True);
ItemValue:=StringReplace(ItemValue,'October.','10.',True,True,True);
ItemValue:=StringReplace(ItemValue,'November.','11.',True,True,True);
ItemValue:=StringReplace(ItemValue,'December.','12.',True,True,True); //Format web corrections. WEB_SPECIFIC
AddFieldValueXML('rdate',ItemValue);
LogMessage(' Get results Release Date:'+ItemValue+'||');
End;
//Get ~aka~. GET_FULL_AKA = False only the info of the principal movie page.
If Not(GET_FULL_AKA) Then Begin
curPos:=Pos('<h4 class="inline">Also Known As:</h4>',HTML); //WEB_SPECIFIC
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Also Known As:</h4>','<',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
AddFieldValueXML('aka',ItemValue);
LogMessage(' Get result aka:'+ItemValue+'||');
End;
End;
//Get ~budget~
curPos:=Pos('<h4 class="inline">Budget:</h4>',HTML); //WEB_SPECIFIC
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Budget:</h4>','<',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,#36,'',True,True,False); //Eliminate '$' if exists
ItemValue:=StringReplace(ItemValue,'€','',True,True,False); //Eliminate '€' if exists
ItemValue:=StringReplace(ItemValue,',','',True,True,False); //Eliminate ',' if exists
AddFieldValueXML('budget',ItemValue);
LogMessage(' Get results Budget:'+ItemValue+'||');
End;
//Get ~money~ Box Office worldwide
.
.
//Get ~studio~ "Production Co" (several values in a comma separated list)
curPos:=Pos('<h4 class="inline">Production Co:</h4>',HTML); //WEB_SPECIFIC.IC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Production Co:</h4>','</span>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'See more »','',True,False,True);
ItemValue:=StringReplace(ItemValue,'See more','',True,True,False); //Eliminate 'See more' if exists
ItemValue:=StringReplace(ItemValue,', The','',True,False,True);
ItemValue:=StringReplace(ItemValue,'The, ','',True,False,True);
//ItemValue:=StringReplace(ItemValue,'The ','',True,False,True);
AddFieldValueXML('studio',ItemValue);
LogMessage(' Get results Studio/Production Co:'+ItemValue+'||');
End;
//Get ~features~ (multiline) GET_FULL_FEATURES = False only the info of the principal movie page.
//If Not(GET_FULL_FEATURES) Then Begin
If GET_FEATURES Then Begin
ItemList:='';
//Get "Runtime" (in minutes)
curPos:=Pos('<h4 class="inline">Runtime:</h4>',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'M">',' min</time>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemList:=ItemList+'Original Runtime: '+ItemValue+' min.<br>';
LogMessage(' Get result Original Runtime:'+ItemValue+'||');
AddFieldValue(mfDuration,ItemValue);
LogMessage(' Get result lenght:'+ItemValue+'||');
End;
.
.
if (Length(ItemList)>0) then begin
AddFieldValueXML('features',ItemList);
LogMessage(' Get result Movie Features:'+ItemList+'||');
End;
End;
//Get ~User Reviews~
If Pos('<h2>User Reviews</h2>',HTML)>0 then begin
EndPos:=Pos('<h2>User Reviews</h2>',HTML);
curPos:=Pos('<h2>User Reviews</h2>',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
curPos:=PosFrom('<p>',HTML,EndPos)+Length('<p>');
EndPos:=PosFrom('</span>',HTML,curPos);
ItemValue:=Copy(HTML,curPos,endPos-curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'</p>','',True,False,True);
ItemValue:=StringReplace(ItemValue,'</div>','',True,False,True);
ItemValue:=StringReplace(ItemValue,'<ul><li>','',True,False,True);
ItemValue:=StringReplace(ItemValue,'</li></ul>','',True,False,True);
ItemValue:=StringReplace(ItemValue,':)','',True,False,True);
curPos:=Pos('Find showtimes, ',ItemValue);
If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-1);
AddCustomFieldValueByName('User Comments',ItemValue);
AddCustomFieldValueByName('User Reviews',ItemValue);
LogMessage(' Get result User Reviews: <br>'+ItemValue+'||');
End;
End;
LogMessage('Function ParsePage_IMDBMovieBASE END=====================||');
End; //BlockClose
--- End quote ---
//Get ~category~ "keywords"
--- Quote --- //Get ~category~ "keywords" (field with several values in a comma separated list)
curPos:=Pos('<h4 class="inline">Plot Keywords:</h4>',HTML); //WEB_SPECIFIC.IC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<h4 class="inline">Plot Keywords:</h4>','<a href="/title/',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemValue:=StringReplace(ItemValue,'|',',',True,True,False); //Change the separator to comma
//AddFieldValueXML('category',ItemValue);
AddFieldValueXML('tags',ItemValue);
LogMessage(' Get results Category:'+ItemValue+'||');
End;
--- End quote ---
As for ~ category ~ "keywords" IMDB has never, explicitly repeat I never knew the ~ category ~ "keywords" as they say, have the AllMovie site. This has always been taken as ~ tags ~ "keywords" and information has always been transferred to the Tags field in dababase.
For example for 21 movie:
IMDB "keywords"
--- Quote --- Plot Keywords: based on true story | card counting | blackjack | massachusetts institute of technology | gambling | See All (109) »
--- End quote ---
https://www.imdb.com/title/tt0478087/
AllMovie "keywords"
https://www.allmovie.com/movie/21-v386347
--- Quote ---Keywords
betrayal, blackjack [card game], casino, college-student
--- End quote ---
AllMovie "category"
--- Quote ---Themes
Gambling | Schemes and Ruses | Teachers and Students | Betrayal | Crime Gone Awry | Dishonor Among Thieves | Perfect Crime
--- End quote ---
https://www.allmovie.com/movie/21-v386347
It has been used since the creation of the PVD program, or it has been almost ten years since, for this I know that it was added to the database
Ivek23:
IMDB_ [EN] [HTTPS] script
//Get ~features~ (multiline)
--- Quote --- //Get ~features~ (multiline) GET_FULL_FEATURES = False only the info of the principal movie page.
//If Not(GET_FULL_FEATURES) Then Begin
If GET_FEATURES Then Begin
ItemList:='';
//Get "Runtime" (in minutes)
curPos:=Pos('<h4 class="inline">Runtime:</h4>',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'M">',' min</time>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
ItemList:=ItemList+'Original Runtime: '+ItemValue+' min.<br>';
LogMessage(' Get result Original Runtime:'+ItemValue+'||');
AddFieldValue(mfDuration,ItemValue);
LogMessage(' Get result lenght:'+ItemValue+'||');
End;
.
.
if (Length(ItemList)>0) then begin
AddFieldValueXML('features',ItemList);
LogMessage(' Get result Movie Features:'+ItemList+'||');
End;
End;
--- End quote ---
Here is a change because this is a bit different information than in the Technical Specifications section.
Get ~features~
I've repaired parts of the Information Transfer Code for the Technical Specifications section.
--- Quote ---Function ParsePage_IMDBMovieTECHNICAL(HTML:String):Cardinal; //BlockOpen
//Returns:
// Result:=prFinished; Script has finished gathering data
// Result:=prError; If żany big problem? with exit
//Retrieve: ~features~
Var
//curPos:Integer;
curPos,endPos:Integer;
ItemValue,ItemList:String;
Begin
.
.
//Get "Runtime" info
ItemValue:=HTMLValues2(HTML,'Runtime','</tr>','<td>','</tr>','<br><br>',EndPos);
ItemValue:=StringReplace(ItemValue,' ','<br>',True,False,True);
ItemValue:=StringReplace(ItemValue,' ','<br>',True,False,True);
ItemValue:=StringReplace(ItemValue,' ','',True,False,True);
LogMessage(' Get result Runtime:'+ItemValue+'||');
if ItemValue <> '' then ItemList:=ItemList+'Runtime'+'<br>'+ItemValue+'<br><br>';
curPos:=Pos('<td class="label"> Runtime </td>',HTML); //WEB_SPECIFIC.
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,' (',' min)',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
AddFieldValue(mfDuration,ItemValue);
//AddFieldValueXML('lenght',ItemValue);
LogMessage(' Get result Original Runtime_1:'+ItemValue+'||');
End;
.
.
//Get "Color" info
ItemValue:=HTMLValues2(HTML,'Color','</tr>','<td>','</tr>','<br><br>',EndPos);
ItemValue:=StringReplace(ItemValue,' | ',' • ',True,False,True);
ItemValue:=StringReplace(ItemValue,' | ',' • ',True,False,True);
ItemValue:=StringReplace(ItemValue,' ',' ',True,False,True);
ItemValue:=StringReplace(ItemValue,' • ',' • ',True,False,True);
LogMessage(' Get result Color:'+ItemValue+'||');
if ItemValue <> '' then ItemList:=ItemList+'Color'+'<br>'+ItemValue+'<br><br>';
.
.
if (Length(ItemList)>0) then begin
//AddFieldValueXML('features',ItemList); //Ivek23 CustomField ~ImdbTechSpecs~
AddCustomFieldValueByName('ImdbTechSpecs',ItemList);
LogMessage(' Get result Movie Features:'+ItemList+'||');
End;
LogMessage('Function ParsePage_IMDBMovieTECHNICAL END=====================||');
End; //BlockClose
--- End quote ---
The Technical Specifications section information is recommended to be transferred to the ~ ImdbTechSpecs ~ custom field.
IMDB_ [EN] [HTTPS] _1 script is attached.
fmb64:
Is it normal that importing from IMDb is very slow and screen is flickering from quickly disappearing black windows... also importing does hang when I run it from C:\Program Files\... instead of a temporary directory?
Navigation
[0] Message Index
[#] Next page
[*] Previous page
Go to full version