Skip to content

Commit

Permalink
jav321: 清洗网站数据里URL中的连续//
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuukiy committed Sep 22, 2024
1 parent a39948f commit f301bf8
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 38 deletions.
74 changes: 37 additions & 37 deletions unittest/data/DCV-137 (jav321).json
Original file line number Diff line number Diff line change
Expand Up @@ -36,43 +36,43 @@
"uncensored": null,
"publish_date": "2019-05-17",
"preview_pics": [
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_0_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_1_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_2_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_3_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_4_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_5_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_6_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_7_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_8_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_9_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_10_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_11_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_12_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_13_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_14_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_15_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_16_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_17_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_18_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_19_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_20_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_21_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_22_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_23_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_24_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_25_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_26_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_27_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_28_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_29_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_30_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_31_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_32_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_33_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_34_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_35_277dcv-137.jpg",
"https://www.jav321.com//images/documentv/277dcv/137/cap_e_36_277dcv-137.jpg"
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_0_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_1_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_2_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_3_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_4_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_5_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_6_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_7_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_8_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_9_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_10_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_11_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_12_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_13_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_14_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_15_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_16_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_17_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_18_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_19_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_20_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_21_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_22_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_23_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_24_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_25_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_26_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_27_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_28_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_29_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_30_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_31_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_32_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_33_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_34_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_35_277dcv-137.jpg",
"https://www.jav321.com/images/documentv/277dcv/137/cap_e_36_277dcv-137.jpg"
],
"preview_video": "https://sample.mgstage.com/sample/documentv/277dcv/137/277DCV-137_sample.mp4"
}
2 changes: 1 addition & 1 deletion unittest/data/IPX-177 (jav321).json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"cid": "ipx00177",
"url": "https://en.jav321.com/video/ipx00177",
"plot": "ミニスカートとニーハイから覗くきれいな素肌、ニーソの太ももへの食い込み…全てを兼ね備えた「絶対領域」!ピチピチの肌感、ぷにぷにハミもも思わず触りたくなること間違いなし!さらに絶対領域を眺めながら足コキ、股コキ、尻コキ!完全着衣で堪能!!ニーハイを履いた小悪魔に罵られながらイク!!「マヂきもいんだけど!触り方が!!」「完璧すぎる、、絶対領域はぁはぁ」",
"cover": "http://pics.dmm.co.jp//digital/video/ipx00177/ipx00177pl.jpg",
"cover": "http://pics.dmm.co.jp/digital/video/ipx00177/ipx00177pl.jpg",
"big_cover": null,
"genre": [
"デジモ",
Expand Down
2 changes: 2 additions & 0 deletions web/jav321.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def parse_data(movie: MovieInfo):
if len(preview_pics) == 0:
# 尝试搜索另一种布局下的封面,需要使用onerror过滤掉明明没有封面时网站往里面塞的默认URL
preview_pics = html.xpath("//div/div/div[@class='col-md-3']/img[@onerror and @class='img-responsive']/@src")
# 有的图片链接里有多个//,网站质量堪忧……
preview_pics = [i[:8] + i[8:].replace('//', '/') for i in preview_pics]
# 磁力和ed2k链接是依赖js脚本加载的,无法通过静态网页来解析

movie.url = page_url
Expand Down

0 comments on commit f301bf8

Please sign in to comment.