English > PVD Python Scripts
PVD Selenium MOD v4 IMDb Movie Script Confusion
Ivek23:
Function ParsePage_IMDBMovieBASE
--- Quote ---// Get ~User Reviews~
--- End quote ---
This part of the code is added here.
--- Quote ---FullReview := StringReplace(FullReview, '; - )', '', True, False, True);
--- End quote ---
The whole code is now like this.
--- Quote --- // Get ~User Reviews~
curPos := Pos('<section data-testid="UserReviews"', HTML); // Start of the User Reviews section
If 0 < curPos Then Begin
// Loop through multiple reviews if needed
While 0 < curPos Do Begin
// Find the review subject start
curPos := PosFrom('data-testid="review-summary"><a href="', HTML, curPos);
If 0 < curPos Then Begin
curPos := PosFrom(' class="ipc-title-link-wrapper" tabindex="0"><h3 class="ipc-title__text">', HTML, curPos) + Length(' class="ipc-title-link-wrapper" tabindex="0"><h3 class="ipc-title__text">');
EndPos := PosFrom('<svg', HTML, curPos);
ItemValue := Copy(HTML, curPos, EndPos - curPos);
ItemValue := Trim(ItemValue); // Clean up leading/trailing whitespace
curPos := EndPos;
// Find the review content start
curPos := PosFrom('<div class="ipc-overflowText--children"><div class="ipc-html-content ipc-html-content--base" role="presentation"><div class="ipc-html-content-inner-div" role="presentation">', HTML, curPos) + Length('<div class="ipc-overflowText--children"><div class="ipc-html-content ipc-html-content--base" role="presentation"><div class="ipc-html-content-inner-div" role="presentation">');
EndPos := PosFrom('</div></div></div></div></div><div class="ipc-list-card__actions">', HTML, curPos);
ReviewContent := Copy(HTML, curPos, EndPos - curPos);
ReviewContent := Trim(ReviewContent); // Clean up leading/trailing whitespace
// Combine review subject and content
FullReview := ItemValue + #13#10 + '-------------------------------------------' + #13#10 + ReviewContent; // Add new line between subject and content
// Clean up unnecessary parts of the review content
FullReview := StringReplace(FullReview, '; - )', '', True, False, True);
FullReview := StringReplace(FullReview, ':)', '', True, False, True);
FullReview := StringReplace(FullReview, '=)', '', True, False, True);
// Add the review to custom fields
AddCustomFieldValueByName('User Reviews', FullReview);
AddCustomFieldValueByName('User Comments', FullReview);
// Log the review
If FullReview <> '' Then LogMessage('Function ParsePage_IMDBMovieBASE - Get result User Reviews (CF~User Reviews~) (CF~User Comments~): <br>' + FullReview + ' | |');
// Move to the next review
curPos := PosFrom('<section data-testid="UserReviews"', HTML, curPos);
End Else Break;
End;
End;
--- End quote ---
In Function ParsePage_IMDBMovieBASE, everything would be there.
Ivek23:
Function ParsePage_IMDBMovieMPAA
Fix part of the code.
--- Quote --- mpaaValue := '';
// Get (CF~IMDbmpaaSummary~)
curPos := Pos('<select id="jump-to"', HTML);
If curPos > 0 Then
Begin
// Extract the relevant section for categories
curPos := PosFrom('<option', HTML, curPos);
endPos := PosFrom('</select>', HTML, curPos);
mpaaSection := Copy(HTML, curPos, endPos - curPos);
//LogMessage('Function ParsePage_IMDBMovieMPAA - Extracted Category section for (CF~IMDbmpaaSummary~): ' + mpaaSection);
// Parse the options and category names
curPos := 1;
mpaaContent := '';
While PosFrom('<option', mpaaSection, curPos) > 0 Do
Begin
curPos := PosFrom('<option', mpaaSection, curPos) + Length('<option');
optionValue := TextBetween(mpaaSection, 'value="', '">', False, curPos);
categoryName := TextBetween(mpaaSection, '">', '</option>', False, curPos);
// Format the category link
mpaaContent := mpaaContent + '<link url="' + MovieURL + optionValue + '">' + categoryName + '</link> ';
// Move to the next position
curPos := PosFrom('</option>', mpaaSection, curPos) + Length('</option>');
End;
// Remove the trailing " • " if it exists and keep the last </link> intact
If Copy(mpaaContent, Length(mpaaContent) - 13, 7) = ' • ' Then
Begin
mpaaContent := Copy(mpaaContent, 1, Length(mpaaContent) - 14) + '</link>';
End;
// Combine and format the final result
mpaaContent := '<link url="' + MovieURL + '#contentRating' + '">Content Ratings Summary:</link> • ' + mpaaContent;
// Store the result in the custom field
AddCustomFieldValueByName('IMDbmpaaSummary', mpaaContent);
LogMessage('Function ParsePage_IMDBMovieMPAA - Stored result for (CF~IMDbmpaaSummary~)');
End
Else
Begin
LogMessage('Function ParsePage_IMDBMovieMPAA - Content Rating section for (CF~IMDbmpaaSummary~) not found');
Result := prError; // Set to error if content rating section is not found
End;
--- End quote ---
Custom field to remove.
--- Quote --- AddFieldValueXML('mpaa', mpaaValue);
//AddCustomFieldValueByName('IMDB_MPAA', mpaaValue);
LogMessage('Function ParsePage_IMDBMovieMPAA - Final ~mpaa~ Result: ' + mpaaValue + ' | |');
// Get "Certification" (CF~Certification~) info
--- End quote ---
Ivek23:
Function ParsePage_IMDBMovieBASE
Minor code correction.
--- Quote --- If Not (ReferencePageDownloaded) Then Begin
If Not (USE_SAVED_PVDCONFIG And (ConfigOptions[19] = '0')) Then Begin
// Get ~budget~
//curPos := Pos('<span class="ipc-metadata-list-item__label" aria-disabled="false">Budget</span>', HTML); // WEB_SPECIFIC
curPos := Pos('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Budget</span>', HTML); // WEB_SPECIFIC
If 0 < curPos Then Begin
ItemValue := TextBetWeen(HTML, '<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Budget</span><div class="ipc-metadata-list-item__content-container"><ul class="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--inline ipc-metadata-list-item__list-content base" role="presentation"><li role="presentation" class="ipc-inline-list__item"><span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">', '</span></li></ul>', false, curPos); // Strings which opens/closes the data. WEB_SPECIFIC
ItemValue := StringReplace(ItemValue, ' (estimated)', '', True, True, False); // Eliminate '(estimated)' if exists
if ItemValue <> '' then AddCustomFieldValueByName('ProductionBudget', ItemValue);
ItemValue := StringReplace(ItemValue, 'EUR', '', True, True, False); // Eliminate 'EUR' if exists
ItemValue := StringReplace(ItemValue, #36, '', True, True, False); // Eliminate '$' if exists
ItemValue := StringReplace(ItemValue, '€', '', True, True, False); // Eliminate '€' if exists
ItemValue := StringReplace(ItemValue, ',', '', True, True, False); // Eliminate ',' if exists
AddFieldValueXML('budget', ItemValue);
if ItemValue <> '' then LogMessage('Function ParsePage_IMDBMovieBASE - ParsePage_IMDBMovieBASE - Get results ~budget~: ' + ItemValue + ' | |');
End;
End;
End; //If Not (ReferencePageDownloaded) Then Begin should end here
//Get (CF~Domestic~) Gross US & Canada
//curPos:=Pos('<span class="ipc-metadata-list-item__label" aria-disabled="false">Gross US & Canada</span>',HTML); //WEB_SPECIFIC
curPos:=Pos('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross US & Canada</span>',HTML); //WEB_SPECIFIC
If 0<curPos Then Begin
ItemValue:=TextBetWeen(HTML,'<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross US & Canada</span>','</li></ul>',false,curPos); //Strings which opens/closes the data. WEB_SPECIFIC
//LogMessage('Function ParsePage_IMDBMovieBASE - Get results BoxOffice Gross US & Canada:'+ItemValue+'||');
if ItemValue <> '' then AddCustomFieldValueByName('Domestic',ItemValue);
ItemValue:=StringReplace(ItemValue,#36,'',True,True,False); //Eliminate '$' if exists
ItemValue:=StringReplace(ItemValue,'€','',True,True,False); //Eliminate '€' if exists
ItemValue:=StringReplace(ItemValue,',','',True,True,False); //Eliminate ',' if exists
if ItemValue <> '' then LogMessage('Function ParsePage_IMDBMovieBASE - Get results Box Office Gross US & Canada (CF~Domestic~):'+ItemValue+'||');
End;
If Not (USE_SAVED_PVDCONFIG And (ConfigOptions[20] = '0')) Then Begin
// Get ~money~ + (CF~Worldwide~) Box Office worldwide
//curPos := Pos('<span class="ipc-metadata-list-item__label" aria-disabled="false">Gross worldwide</span>', HTML); // WEB_SPECIFIC
curPos := Pos('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross worldwide</span>', HTML); // WEB_SPECIFIC
If 0 < curPos Then Begin
ItemValue := TextBetWeen(HTML, '<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross worldwide</span><div class="ipc-metadata-list-item__content-container"><ul class="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--inline ipc-metadata-list-item__list-content base" role="presentation"><li role="presentation" class="ipc-inline-list__item"><span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">', '</span></li></ul>', false, curPos); // Strings which opens/closes the data. WEB_SPECIFIC
LogMessage('Function ParsePage_IMDBMovieBASE - Get results BoxOffice worldwide: ' + ItemValue + ' | |');
// Process the monetary value for XML
MoneyValue := ItemValue;
MoneyValue := StringReplace(MoneyValue, #36, '', True, True, False); // Eliminate '$' if exists
MoneyValue := StringReplace(MoneyValue, '€', '', True, True, False); // Eliminate '€' if exists
MoneyValue := StringReplace(MoneyValue, ',', '', True, True, False); // Eliminate ',' if exists
LogMessage('Function ParsePage_IMDBMovieBASE - Processed Box Office value for "money" PVD field: ' + MoneyValue);
AddFieldValueXML('money', MoneyValue);
// Add the Worldwide value with currency symbol
If ItemValue <> '' Then AddCustomFieldValueByName('Worldwide', ItemValue);
If MoneyValue <> '' Then LogMessage('Function ParsePage_IMDBMovieBASE - Get results Box Office worldwide ~money~ + (CF~Worldwide~): ' + MoneyValue + ' | |');
End;
End;
--- End quote ---
Ivek23:
Function ParsePage_IMDBMovieAKA
Minor code correction.
--- Quote ---//(*
//Get ~aka~ (multiple values in a line break separated list) Be careful don't repeat #13
// Note that "For" loop iterates through each element in the ItemArray and checks if it is already present in the ItemValue string. If it's not present, it adds it to ItemValue. only unique AKA values are stored and logged!
//if (Pos('It looks like we don' + Chr(39) + 't have any AKAs for this title yet.', HTML)>0) then Exit;
//Go to "Also Known As"
curPos := Pos('<h3 class="ipc-title__text"><span id="akas">Also Known As (AKA)</span>', HTML); //Strings start which opens the block content data. WEB_SPECIFIC
If 0<curPos then begin
EndPos := curPos;
//ItemList := HTMLValues2(HTML, '<h3 class="ipc-title__text"><span id="akas">Also Known As (AKA)</span>', '</section>', '<label class="', '</label>', '*', EndPos);
ItemList := HTMLValues2(HTML, '<h3 class="ipc-title__text"><span id="akas">Also Known As (AKA)</span>', '</section>', '<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable"', '</span>', '*', EndPos);
//LogMessage('Function ParsePage_IMDBMovieAKA - Get result ItemList: ' + ItemList);
ItemValue1 := ItemList;
ExplodeString(ItemList, ItemArray, '*');
ItemValue := '';
For index := Low(ItemArray) To High(ItemArray) Do Begin //Remember index begins at 0
//LogMessage('Function ParsePage_IMDBMovieAKA - Get result ItemArray[index]*: ' + ItemArray[index]);
If (Pos(ItemArray[index], ItemValue)=0) Then ItemValue := ItemValue + '<br>' + ItemArray[index];
End;
//ItemValue := StringReplace(ItemValue, '', '', True, True, False);
AddFieldValueXML('aka', ItemValue);
AddCustomFieldValueByName('Imdb Aka', ItemValue);
if ItemValue <> '' then LogMessage('Function ParsePage_IMDBMovieAKA - Get results ~aka~ & (CF~Imdb Aka~) : ' + ItemValue);
End;
//*)
//(*
// Initialize the string to collect all entries
allItemValues := '';
// Get "Also Known As (AKA)" information with countries
curPos := Pos('>Also Known As (AKA)<', HTML);
If 0 < curPos Then Begin
EndPos := PosFrom('</section>', HTML, curPos);
While curPos < EndPos Do Begin
// Find each list item
curPos := PosFrom('<li role="presentation" class="ipc-metadata-list__item', HTML, curPos);
If (curPos = 0) Or (curPos > EndPos) Then Break;
// Extract the country or label
countryStart := PosFrom('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable"', HTML, curPos) + Length('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">');
countryEnd := PosFrom('</span>', HTML, countryStart);
countryValue := Copy(HTML, countryStart, countryEnd - countryStart);
// Extract the AKA title
akaStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable"', HTML, countryEnd) + Length('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">');
akaEnd := PosFrom('</span>', HTML, akaStart);
akaValue := Copy(HTML, akaStart, akaEnd - akaStart);
// Move curPos past the end of the AKA title span
curPos := akaEnd + Length('</span>');
// Extract any subText (like (English), (Japanese), etc.)
subTextStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item--subText"', HTML, akaEnd);
If (subTextStart > 0) And (subTextStart < PosFrom('</li>', HTML, akaStart)) Then Begin
subTextStart := subTextStart + Length('<span class="ipc-metadata-list-item__list-content-item--subText">');
subTextEnd := PosFrom('</span>', HTML, subTextStart);
subText := Copy(HTML, subTextStart, subTextEnd - subTextStart);
akaValue := akaValue + ' ' + subText;
// Move curPos past the end of the subText span
curPos := subTextEnd + Length('</span>');
End Else
subText := '';
// Combine country and AKA for the custom field
ItemValue := countryValue + ' - ' + akaValue;
// Collect the result
allItemValues := allItemValues + ItemValue + #13#10;
// Move to the next list item
curPos := PosFrom('</li>', HTML, curPos) + Length('</li>');
End;
// Add the collected results to the custom field
AddCustomFieldValueByName('IMDbCountryAKA', allItemValues);
LogMessage('Function ParsePage_IMDBMovieAKA - Get result IMDbCountryAKA: ' + allItemValues);
End;
//*)
//(*
// Get (CF~IMDbPremiereDates~) information with countries and dates
// Initialize the string to collect all entries
allItemValues := '';
curPos := Pos('<h3 class="ipc-title__text"><span id="releases">Release Date</span>', HTML);
LogMessage('Function ParsePage_IMDBMovieAKA - CurPos initial: ' + IntToStr(curPos));
If 0 < curPos Then Begin
EndPos := PosFrom('</section>', HTML, curPos);
//LogMessage('Function ParsePage_IMDBMovieAKA - EndPos: ' + IntToStr(EndPos));
While curPos < EndPos Do Begin
// Find each list item
curPos := PosFrom('<li role="presentation" class="ipc-metadata-list__item ipc-metadata-list', HTML, curPos);
//LogMessage('Function ParsePage_IMDBMovieAKA - CurPos list item: ' + IntToStr(curPos));
If (curPos = 0) Or (curPos > EndPos) Then Break;
// Extract the country or label
countryStart := PosFrom('aria-label="', HTML, curPos) + Length('aria-label="');
countryEnd := PosFrom('"', HTML, countryStart);
countryValue := Copy(HTML, countryStart, countryEnd - countryStart);
//LogMessage('Function ParsePage_IMDBMovieAKA - CountryValue for (CF~IMDbPremiereDates~): ' + countryValue);
// Extract the release date
dateStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">', HTML, countryEnd) + Length('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">');
dateEnd := PosFrom('</span>', HTML, dateStart);
dateValue := Copy(HTML, dateStart, dateEnd - dateStart);
//LogMessage('Function ParsePage_IMDBMovieAKA - DateValue for (CF~IMDbPremiereDates~): ' + dateValue);
// Move curPos past the end of the date span
curPos := dateEnd + Length('</span>');
//LogMessage('Function ParsePage_IMDBMovieAKA - CurPos after date for (CF~IMDbPremiereDates~): ' + IntToStr(curPos));
// Extract any subText (like (limited), (Fantasia International Film Festival), etc.)
subTextStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item--subText"', HTML, dateEnd);
//LogMessage('Function ParsePage_IMDBMovieAKA - SubTextStart for (CF~IMDbPremiereDates~): ' + IntToStr(subTextStart));
If (subTextStart > 0) And (subTextStart < PosFrom('</li>', HTML, dateStart)) Then Begin
subTextStart := subTextStart + Length('<span class="ipc-metadata-list-item__list-content-item--subText">');
subTextEnd := PosFrom('</span>', HTML, subTextStart);
subText := Copy(HTML, subTextStart, subTextEnd - subTextStart);
dateValue := dateValue + ' ' + subText;
// Move curPos past the end of the subText span
curPos := subTextEnd + Length('</span>');
//LogMessage('Function ParsePage_IMDBMovieAKA - SubText for (CF~IMDbPremiereDates~): ' + subText);
//LogMessage('Function ParsePage_IMDBMovieAKA - CurPos after subText for (CF~IMDbPremiereDates~): ' + IntToStr(curPos));
End Else
subText := '';
// Combine country, date, and subText for the custom field
ItemValue := countryValue + ' - ' + dateValue;
//LogMessage('Function ParsePage_IMDBMovieAKA - ItemValue for (CF~IMDbPremiereDates~): ' + ItemValue);
// Collect the result
allItemValues := allItemValues + ItemValue + #13#10;
// Move to the next list item
curPos := PosFrom('</li>', HTML, curPos) + Length('</li>');
//LogMessage('Function ParsePage_IMDBMovieAKA - CurPos after </li>: ' + IntToStr(curPos));
End;
// Add the collected results to the custom field
AddCustomFieldValueByName('IMDbPremiereDates', allItemValues);
LogMessage('Function ParsePage_IMDBMovieAKA - Get result (CF~IMDbPremiereDates~): ' + allItemValues);
End;
//*)
LogMessage('Function ParsePage_IMDBMovieAKA END=====================| |');
//LogMessage('Function ParsePage_IMDBMovieAKA: Ending processing.')
End; //BlockClose
--- End quote ---
Ivek23:
Function ParsePage_IMDBMovieMPAA
Minor code correction.
--- Quote ---//(*
// Get "Certification" (CF~Certification~) info
If Pos('<span id="certificates">Certifications</span>', HTML) > 0 Then
Begin
curPos := Pos('<span id="certificates">Certifications</span>', HTML);
EndPos := PosFrom('</section>', HTML, curPos);
// Extract the relevant section
Section := Copy(HTML, curPos, EndPos - curPos);
//LogMessage('Function ParsePage_IMDBMovieMPAA - Extract the relevant Section: ' + Section + ' | |');
curPos := 1;
ItemValue1 := '';
// Parse the certification items
While PosFrom('"certificates-item">', Section, curPos) > 0 Do
Begin
// Move to the label
curPos := PosFrom('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false"', Section, curPos) + Length('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false"');
categoryName := TextBetween(Section, '>', '</span>', False, curPos);
//LogMessage('Function ParsePage_IMDBMovieMPAA - categoryName after TextBetween: ' + categoryName + ' | |');
// Move to the value
curPos := PosFrom('href="', Section, curPos) + Length('href="');
categoryValue := TextBetween(Section, '">', '</a>', False, curPos);
//LogMessage('Function ParsePage_IMDBMovieMPAA - categoryValue after TextBetween: ' + categoryValue + ' | |');
// Format the category link with proper spaces
ItemValue1 := ItemValue1 + ' • ' + categoryName + ': ' + categoryValue + '<br>';
// Move to the next item
curPos := PosFrom('</li>', Section, curPos) + Length('</li>');
End;
// Remove the trailing " • " if it exists and keep the last </br> intact
If Copy(ItemValue1, Length(ItemValue1) - 14, 14) = '<br> • ' Then
Begin
ItemValue1 := Copy(ItemValue1, 1, Length(ItemValue1) - 14);
End;
AddCustomFieldValueByName('Certification', 'Certification<br>' + ItemValue1);
If ItemValue1 <> '' Then
LogMessage('Function ParsePage_IMDBMovieMPAA - Certification:<br>' + ItemValue1 + ' | |');
End
Else
Begin
LogMessage('Function ParsePage_IMDBMovieMPAA - Certification section not found');
Result := prError; // Set to error if Certification section is not found
End;
//*)
LogMessage('Function ParsePage_IMDBMovieMPAA END=====================| |');
End; //BlockClose
--- End quote ---
Navigation
[0] Message Index
[#] Next page
[*] Previous page
Go to full version