-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from YasogaN/dev
+ Code optimization to reduce package size + Bump Package Version
- Loading branch information
Showing
6 changed files
with
97 additions
and
108 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,18 @@ | ||
import { hexToDec } from "hex2dec"; | ||
|
||
/** | ||
* Parses a google maps place url to a url with the listenentitesreviews endpoint | ||
* @param {string} url URL of the google maps place as a string | ||
* @param {string} [page] base64 encoding of the page number | ||
* @returns {string} link to fetch reviews | ||
* Converts a Google Maps place URL to a reviews endpoint URL. | ||
* @param {string} url Google Maps place URL. | ||
* @param {string} [p=""] Base64 encoding of the page number. | ||
* @returns {string} URL to fetch reviews. | ||
* @throws Will throw an error if the URL is invalid. | ||
*/ | ||
export default async function (url, page = "") { | ||
const match = url.match(/!1s([a-zA-Z0-9_:]+)!/); | ||
if (!match && !match[1]) { | ||
throw new Error('Invalid URL') | ||
export default function parseReviewURL(url, p = "") { | ||
const m = url.match(/!1s([a-zA-Z0-9_:]+)!/); | ||
if (!m || !m[1]) { | ||
throw new Error("Invalid URL") | ||
} | ||
const hex = match[1].split(':'); | ||
const d = hex.map(part => hexToDec(part)); | ||
var p | ||
if (page.length == 0) { | ||
p = '!2m1!2i10' | ||
} | ||
else { | ||
p = '!2m2!2i10!3s' + page; | ||
} | ||
return `https://www.google.com/maps/preview/review/listentitiesreviews?authuser=0&hl=en&gl=in&pb=!1m2!1y${d[0]}!2y${d[1]}${p}!3e1!4m5!3b1!4b1!5b1!6b1!7b1!5m2!1sdzvaXrvAMImImAXHsLPICA!7e81`; | ||
}; | ||
|
||
const [h1, h2] = m[1].split(":").map(hexToDec) | ||
const pS = p ? `!2m2!2i10!3s${p}` : `!2m1!2i10` | ||
return `https://www.google.com/maps/preview/review/listentitiesreviews?authuser=0&hl=en&gl=in&pb=!1m2!1y${h1}!2y${h2}${pS}!3e1!4m5!3b1!4b1!5b1!6b1!7b1!5m2!1sdzvaXrvAMImImAXHsLPICA!7e81`; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,17 @@ | ||
/** | ||
* Parses a google maps place url to a url with the listugcposts endpoint | ||
* @param {string} url URL of the google maps place as a string | ||
* @param {1|2|3|4} sort - Sorting order of reviews (1 for Most Relevant, 2 for Newest, 3 for Highest Rating, 4 for Lowest Rating) | ||
* @param {string} [page=""] base64 encoding of the page number | ||
* @param {string} [search=""] search query if searching for something in reviews | ||
* @returns {string} link to fetch reviews | ||
* Converts a Google Maps place URL to a listugcposts endpoint URL. | ||
* @param {string} url Google Maps place URL. | ||
* @param {1|2|3|4} so Sorting order (1: Most Relevant, 2: Newest, 3: Highest Rating, 4: Lowest Rating). | ||
* @param {string} [pg=""] Base64 encoding of the page number. | ||
* @param {string} [sq=""] Search query for filtering reviews. | ||
* @returns {string} URL to fetch reviews. | ||
* @throws Will throw an error if the URL is invalid. | ||
*/ | ||
export default async function (url, sort, page="", search="") { | ||
try { | ||
const match = url.match(/!1s([a-zA-Z0-9_:]+)!/); | ||
if (!match && !match[1]) { | ||
throw new Error('Invalid URL') | ||
} | ||
return `https://www.google.com/maps/rpc/listugcposts?authuser=0&hl=en&gl=in&pb=!1m7!1s${match[1]}!3s${search}!6m4!4m1!1e1!4m1!1e3!2m2!1i10!2s${page}!5m2!1sBnOwZvzePPfF4-EPy7LK0Ak!7e81!8m5!1b1!2b1!3b1!5b1!7b1!11m6!1e3!2e1!3sen!4slk!6m1!1i2!13m1!1e${sort}`; | ||
export default function (url, so, pg = "", sq = "") { | ||
const m = url.match(/!1s([a-zA-Z0-9_:]+)!/); | ||
if (!m || !m[1]) { | ||
throw new Error("Invalid URL"); | ||
} | ||
catch (e) { | ||
console.error(e) | ||
} | ||
}; | ||
|
||
return `https://www.google.com/maps/rpc/listugcposts?authuser=0&hl=en&gl=in&pb=!1m7!1s${m[1]}!3s${sq}!6m4!4m1!1e1!4m1!1e3!2m2!1i10!2s${pg}!5m2!1sBnOwZvzePPfF4-EPy7LK0Ak!7e81!8m5!1b1!2b1!3b1!5b1!7b1!11m6!1e3!2e1!3sen!4slk!6m1!1i2!13m1!1e${so}`; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,43 @@ | ||
export default async function parser(reviews) { | ||
return JSON.stringify(await Promise.all(reviews.map(review => { | ||
review = review[0]; | ||
const parsedReview = { | ||
review_id: review[0], | ||
time: { | ||
published: review[1][2], | ||
last_edited: review[1][3], | ||
/** | ||
* Parses an array of reviews and returns a minified JSON string of the parsed reviews. | ||
* @param {Array} reviews - The array of reviews to parse. Each review is expected to be an array with specific nested structures. | ||
* @returns {Promise<string>} A promise that resolves to a JSON string of the parsed reviews. | ||
* | ||
*/ | ||
export default async function parseReviews(reviews) { | ||
const parsedReviews = await Promise.all(reviews.map(([review]) => ({ | ||
review_id: review[0], | ||
time: { | ||
published: review[1][2], | ||
last_edited: review[1][3], | ||
}, | ||
author: { | ||
name: review[1][4][5][0], | ||
profile_url: review[1][4][5][1], | ||
url: review[1][4][5][2][0], | ||
id: review[1][4][5][3], | ||
}, | ||
review: { | ||
rating: review[2][0][0], | ||
text: review[2][15]?.[0]?.[0] || null, | ||
language: review[2][14]?.[0] || null, | ||
}, | ||
images: review[2][2]?.map(image => ({ | ||
id: image[0], | ||
url: image[1][6][0], | ||
size: { | ||
width: image[1][6][2][0], | ||
height: image[1][6][2][1], | ||
}, | ||
author: { | ||
name: review[1][4][5][0], | ||
profile_url: review[1][4][5][1], | ||
url: review[1][4][5][2][0], | ||
id: review[1][4][5][3], | ||
location: { | ||
friendly: image[1][21][3][7]?.[0], | ||
lat: image[1][8][0][2], | ||
long: image[1][8][0][1], | ||
}, | ||
review: { | ||
rating: review[2][0][0], | ||
text: review[2][15] ? review[2][15][0][0] : null, | ||
langage: review[2][14] ? review[2][14][0] : null, | ||
}, | ||
images: review[2][2] ? review[2][2].map(image => { | ||
return { | ||
id: image[0], | ||
url: image[1][6][0], | ||
size: { | ||
width: image[1][6][2][0], | ||
height: image[1][6][2][1], | ||
}, | ||
location: { | ||
friendly: image[1][21][3][7][0], | ||
lat: image[1][8][0][2], | ||
long: image[1][8][0][1], | ||
}, | ||
caption: image[1][21][3][5] ? image[1][21][3][5][0] : null, | ||
} | ||
}) : null, | ||
source: review[1][13][0] | ||
} | ||
return parsedReview; | ||
})), null, 2); | ||
caption: image[1][21][3][5]?.[0] || null, | ||
})) || null, | ||
source: review[1][13][0], | ||
}))); | ||
|
||
return JSON.stringify(parsedReviews, null, 2); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,80 +1,77 @@ | ||
import listugcposts from "./listugcposts.js"; | ||
import axios from "axios"; | ||
import { SortEnum } from "./types.js"; | ||
import { URL } from 'url'; | ||
import { URL } from "url"; | ||
import parser from "./parser.js"; | ||
|
||
/** | ||
* Helper function that validates the parameters for the Google Maps review scraper. | ||
* Validates parameters for the Google Maps review scraper. | ||
* | ||
* @param {string} url - The URL to validate. Must include "https://www.google.com/maps/place/". | ||
* @param {string} sort_type - The sort type to validate. Must be a valid key in SortEnum. | ||
* @param {string|number} pages - The number of pages to validate. Must be "max" or a number. | ||
* @param {boolean} clean - The clean option to validate. Must be a boolean. | ||
* @throws {Error} Throws an error if the URL is invalid. | ||
* @throws {Error} Throws an error if the sort type is invalid. | ||
* @throws {Error} Throws an error if the pages value is invalid. | ||
* @param {string} url - Must include "https://www.google.com/maps/place/". | ||
* @param {string} sort_type - Must be a valid key in SortEnum. | ||
* @param {string|number} pages - "max" or a number. | ||
* @param {boolean} clean - Must be a boolean. | ||
* @throws {Error} If any parameter is invalid. | ||
*/ | ||
export function validateParams(url, sort_type, pages, clean) { | ||
const parsedUrl = new URL(url); | ||
if (parsedUrl.host !== "www.google.com" || !parsedUrl.pathname.startsWith("/maps/place/")) { | ||
throw new Error(`Invalid URL: ${url}`); | ||
} | ||
if (!SortEnum[sort_type]) { | ||
throw new Error(`Invalid sort value: ${sort_type}`); | ||
throw new Error(`Invalid sort type: ${sort_type}`); | ||
} | ||
if (pages !== "max" && isNaN(pages)) { | ||
throw new Error(`Invalid pages value: ${pages}`); | ||
} | ||
if (typeof clean !== "boolean") { | ||
throw new Error(`Invalid value for clean value: ${clean}`); | ||
throw new Error(`Invalid value for 'clean': ${clean}`); | ||
} | ||
} | ||
|
||
/** | ||
* Helper function to fetche reviews from a given URL with specified sorting and pagination options. | ||
* Fetches reviews from a given URL with sorting and pagination options. | ||
* | ||
* @param {string} url - The URL to fetch reviews from. | ||
* @param {string} sort - The sorting option for the reviews. | ||
* @param {string} [nextPage=""] - The token for the next page of reviews, if any. | ||
* @param {string} [search_query=""] - The search query to filter reviews, if any. | ||
* @returns {Promise<Object>} A promise that resolves to the parsed JSON data of reviews. | ||
* @throws {Error} If the request fails or the response status is not 200. | ||
* @param {string} [nextPage=""] - Token for the next page, if any. | ||
* @param {string} [search_query=""] - Search query to filter reviews, if any. | ||
* @returns {Promise<Object>} Parsed JSON data of reviews. | ||
* @throws {Error} If the request fails or the response is invalid. | ||
*/ | ||
export async function fetchReviews(url, sort, nextPage = "", search_query = "") { | ||
const apiUrl = await listugcposts(url, sort, nextPage, search_query); | ||
const response = await axios.get(apiUrl); | ||
if (response.status !== 200) { | ||
throw new Error(`Failed to fetch reviews: ${response.status}`); | ||
} | ||
const data = response.data.split(")]}'")[1]; | ||
return JSON.parse(data); | ||
const rawData = response.data.split(")]}'")[1]; | ||
return JSON.parse(rawData); | ||
} | ||
|
||
/** | ||
* Helper function to paginate through reviews from a given URL, sorting and searching as specified. | ||
* Paginates through reviews from a given URL. | ||
* | ||
* @param {string} url - The URL to fetch reviews from. | ||
* @param {string} sort - The sorting parameter for the reviews. | ||
* @param {string|number} pages - The number of pages to paginate through, or "max" for all pages. | ||
* @param {string} search_query - The search query to filter reviews. | ||
* @param {Array} initialData - The initial data containing reviews and the next page token. | ||
* @returns {Promise<Array>} - A promise that resolves to an array of reviews. | ||
* @param {string} sort - Sorting parameter for reviews. | ||
* @param {string|number} pages - Number of pages or "max". | ||
* @param {string} search_query - Search query to filter reviews. | ||
* @param {boolean} clean - Whether to clean and parse the data. | ||
* @param {Array} initialData - Initial data containing reviews and next page token. | ||
* @returns {Promise<Array>} Array of reviews or parsed reviews. | ||
*/ | ||
export async function paginateReviews(url, sort, pages, search_query, clean, initialData) { | ||
let reviews = initialData[2]; | ||
let nextPage = initialData[1]?.replace(/"/g, ""); | ||
let currentPage = 2; | ||
|
||
while (nextPage && (pages === "max" || currentPage <= parseInt(pages))) { | ||
while (nextPage && (pages === "max" || currentPage <= +pages)) { | ||
console.log(`Scraping page ${currentPage}...`); | ||
const data = await fetchReviews(url, sort, nextPage, search_query); | ||
reviews = reviews.concat(data[2]); | ||
if (!data[1]) break; | ||
reviews = [...reviews, ...data[2]]; | ||
nextPage = data[1]?.replace(/"/g, ""); | ||
if (!nextPage) break; | ||
await new Promise(resolve => setTimeout(resolve, 1000)); // Avoid rate-limiting | ||
currentPage++; | ||
} | ||
|
||
return clean ? await parser(reviews) : reviews; | ||
} | ||
} |